Coverage Report

Created: 2026-06-10 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/common/ihevc_itrans_recon_8x8.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_itrans_recon_8x8.c
22
 *
23
 * @brief
24
 *  Contains function definitions for inverse transform  and reconstruction 8x8
25
 *
26
 *
27
 * @author
28
 *  100470
29
 *
30
 * @par List of Functions:
31
 *  - ihevc_itrans_recon_8x8()
32
 *
33
 * @remarks
34
 *  None
35
 *
36
 *******************************************************************************
37
 */
38
#include <stdio.h>
39
#include <string.h>
40
#include "ihevc_typedefs.h"
41
#include "ihevc_macros.h"
42
#include "ihevc_platform_macros.h"
43
#include "ihevc_defs.h"
44
#include "ihevc_trans_tables.h"
45
#include "ihevc_itrans_recon.h"
46
#include "ihevc_func_selector.h"
47
#include "ihevc_trans_macros.h"
48
49
/**
50
 *******************************************************************************
51
 *
52
 * @brief
53
 *  This function performs Inverse transform  and reconstruction for 8x8
54
 * input block
55
 *
56
 * @par Description:
57
 *  Performs inverse transform and adds the prediction  data and clips output
58
 * to 8 bit
59
 *
60
 * @param[in] pi2_src
61
 *  Input 8x8 coefficients
62
 *
63
 * @param[in] pi2_tmp
64
 *  Temporary 8x8 buffer for storing inverse
65
 *
66
 *  transform
67
 *  1st stage output
68
 *
69
 * @param[in] pu1_pred
70
 *  Prediction 8x8 block
71
 *
72
 * @param[out] pu1_dst
73
 *  Output 8x8 block
74
 *
75
 * @param[in] src_strd
76
 *  Input stride
77
 *
78
 * @param[in] pred_strd
79
 *  Prediction stride
80
 *
81
 * @param[in] dst_strd
82
 *  Output Stride
83
 *
84
 * @param[in] shift
85
 *  Output shift
86
 *
87
 * @param[in] zero_cols
88
 *  Zero columns in pi2_src
89
 *
90
 * @returns  Void
91
 *
92
 * @remarks
93
 *  None
94
 *
95
 *******************************************************************************
96
 */
97
98
void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99
                            WORD16 *pi2_tmp,
100
                            UWORD8 *pu1_pred,
101
                            UWORD8 *pu1_dst,
102
                            WORD32 src_strd,
103
                            WORD32 pred_strd,
104
                            WORD32 dst_strd,
105
                            WORD32 zero_cols,
106
                            WORD32 zero_rows)
107
186k
{
108
186k
    WORD32 j, k;
109
186k
    WORD32 e[4], o[4];
110
186k
    WORD32 ee[2], eo[2];
111
186k
    WORD32 add;
112
186k
    WORD32 shift;
113
186k
    WORD16 *pi2_tmp_orig;
114
186k
    WORD32 trans_size;
115
186k
    WORD32 zero_rows_2nd_stage = zero_cols;
116
186k
    WORD32 row_limit_2nd_stage;
117
118
186k
    trans_size = TRANS_SIZE_8;
119
120
186k
    pi2_tmp_orig = pi2_tmp;
121
122
186k
    if((zero_cols & 0xF0) == 0xF0)
123
22.9k
        row_limit_2nd_stage = 4;
124
163k
    else
125
163k
        row_limit_2nd_stage = TRANS_SIZE_8;
126
127
128
186k
    if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129
15.2k
    {
130
        /************************************************************************************************/
131
        /**********************************START - IT_RECON_8x8******************************************/
132
        /************************************************************************************************/
133
134
        /* Inverse Transform 1st stage */
135
15.2k
        shift = IT_SHIFT_STAGE_1;
136
15.2k
        add = 1 << (shift - 1);
137
138
113k
        for(j = 0; j < row_limit_2nd_stage; j++)
139
97.8k
        {
140
            /* Checking for Zero Cols */
141
97.8k
            if((zero_cols & 1) == 1)
142
780
            {
143
780
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144
780
            }
145
97.1k
            else
146
97.1k
            {
147
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148
485k
                for(k = 0; k < 4; k++)
149
388k
                {
150
388k
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151
388k
                                    + g_ai2_ihevc_trans_8[3][k]
152
388k
                                                    * pi2_src[3 * src_strd];
153
388k
                }
154
97.1k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155
97.1k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156
97.1k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157
97.1k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158
159
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160
97.1k
                e[0] = ee[0] + eo[0];
161
97.1k
                e[3] = ee[0] - eo[0];
162
97.1k
                e[1] = ee[1] + eo[1];
163
97.1k
                e[2] = ee[1] - eo[1];
164
485k
                for(k = 0; k < 4; k++)
165
388k
                {
166
388k
                    pi2_tmp[k] =
167
388k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
168
388k
                    pi2_tmp[k + 4] =
169
388k
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170
388k
                }
171
97.1k
            }
172
97.8k
            pi2_src++;
173
97.8k
            pi2_tmp += trans_size;
174
97.8k
            zero_cols = zero_cols >> 1;
175
97.8k
        }
176
177
15.2k
        pi2_tmp = pi2_tmp_orig;
178
179
        /* Inverse Transform 2nd stage */
180
15.2k
        shift = IT_SHIFT_STAGE_2;
181
15.2k
        add = 1 << (shift - 1);
182
15.2k
        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183
6.05k
        {
184
54.4k
            for(j = 0; j < trans_size; j++)
185
48.4k
            {
186
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187
242k
                for(k = 0; k < 4; k++)
188
193k
                {
189
193k
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190
193k
                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191
193k
                }
192
48.4k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193
48.4k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194
48.4k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195
48.4k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196
197
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198
48.4k
                e[0] = ee[0] + eo[0];
199
48.4k
                e[3] = ee[0] - eo[0];
200
48.4k
                e[1] = ee[1] + eo[1];
201
48.4k
                e[2] = ee[1] - eo[1];
202
242k
                for(k = 0; k < 4; k++)
203
193k
                {
204
193k
                    WORD32 itrans_out;
205
193k
                    itrans_out =
206
193k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
207
193k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208
193k
                    itrans_out =
209
193k
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210
193k
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211
193k
                }
212
48.4k
                pi2_tmp++;
213
48.4k
                pu1_pred += pred_strd;
214
48.4k
                pu1_dst += dst_strd;
215
48.4k
            }
216
6.05k
        }
217
9.21k
        else /* All rows of output of 1st stage are non-zero */
218
9.21k
        {
219
82.8k
            for(j = 0; j < trans_size; j++)
220
73.6k
            {
221
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222
368k
                for(k = 0; k < 4; k++)
223
294k
                {
224
294k
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225
294k
                                    + g_ai2_ihevc_trans_8[3][k]
226
294k
                                                    * pi2_tmp[3 * trans_size]
227
294k
                                    + g_ai2_ihevc_trans_8[5][k]
228
294k
                                                    * pi2_tmp[5 * trans_size]
229
294k
                                    + g_ai2_ihevc_trans_8[7][k]
230
294k
                                                    * pi2_tmp[7 * trans_size];
231
294k
                }
232
233
73.6k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234
73.6k
                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235
73.6k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236
73.6k
                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237
73.6k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238
73.6k
                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239
73.6k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240
73.6k
                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241
242
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243
73.6k
                e[0] = ee[0] + eo[0];
244
73.6k
                e[3] = ee[0] - eo[0];
245
73.6k
                e[1] = ee[1] + eo[1];
246
73.6k
                e[2] = ee[1] - eo[1];
247
368k
                for(k = 0; k < 4; k++)
248
294k
                {
249
294k
                    WORD32 itrans_out;
250
294k
                    itrans_out =
251
294k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
252
294k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253
294k
                    itrans_out =
254
294k
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255
294k
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256
294k
                }
257
73.6k
                pi2_tmp++;
258
73.6k
                pu1_pred += pred_strd;
259
73.6k
                pu1_dst += dst_strd;
260
73.6k
            }
261
9.21k
        }
262
        /************************************************************************************************/
263
        /************************************END - IT_RECON_8x8******************************************/
264
        /************************************************************************************************/
265
15.2k
    }
266
171k
    else /* All rows of input are non-zero */
267
171k
    {
268
        /************************************************************************************************/
269
        /**********************************START - IT_RECON_8x8******************************************/
270
        /************************************************************************************************/
271
272
        /* Inverse Transform 1st stage */
273
171k
        shift = IT_SHIFT_STAGE_1;
274
171k
        add = 1 << (shift - 1);
275
276
1.47M
        for(j = 0; j < row_limit_2nd_stage; j++)
277
1.30M
        {
278
            /* Checking for Zero Cols */
279
1.30M
            if((zero_cols & 1) == 1)
280
708
            {
281
708
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282
708
            }
283
1.30M
            else
284
1.30M
            {
285
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286
6.50M
                for(k = 0; k < 4; k++)
287
5.20M
                {
288
5.20M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289
5.20M
                                    + g_ai2_ihevc_trans_8[3][k]
290
5.20M
                                                    * pi2_src[3 * src_strd]
291
5.20M
                                    + g_ai2_ihevc_trans_8[5][k]
292
5.20M
                                                    * pi2_src[5 * src_strd]
293
5.20M
                                    + g_ai2_ihevc_trans_8[7][k]
294
5.20M
                                                    * pi2_src[7 * src_strd];
295
5.20M
                }
296
297
1.30M
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298
1.30M
                                + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299
1.30M
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300
1.30M
                                + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301
1.30M
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302
1.30M
                                + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303
1.30M
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304
1.30M
                                + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305
306
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307
1.30M
                e[0] = ee[0] + eo[0];
308
1.30M
                e[3] = ee[0] - eo[0];
309
1.30M
                e[1] = ee[1] + eo[1];
310
1.30M
                e[2] = ee[1] - eo[1];
311
6.50M
                for(k = 0; k < 4; k++)
312
5.20M
                {
313
5.20M
                    pi2_tmp[k] =
314
5.20M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
315
5.20M
                    pi2_tmp[k + 4] =
316
5.20M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317
5.20M
                }
318
1.30M
            }
319
1.30M
            pi2_src++;
320
1.30M
            pi2_tmp += trans_size;
321
1.30M
            zero_cols = zero_cols >> 1;
322
1.30M
        }
323
324
171k
        pi2_tmp = pi2_tmp_orig;
325
326
        /* Inverse Transform 2nd stage */
327
171k
        shift = IT_SHIFT_STAGE_2;
328
171k
        add = 1 << (shift - 1);
329
171k
        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330
16.9k
        {
331
152k
            for(j = 0; j < trans_size; j++)
332
135k
            {
333
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334
677k
                for(k = 0; k < 4; k++)
335
541k
                {
336
541k
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337
541k
                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338
541k
                }
339
135k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340
135k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341
135k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342
135k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343
344
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345
135k
                e[0] = ee[0] + eo[0];
346
135k
                e[3] = ee[0] - eo[0];
347
135k
                e[1] = ee[1] + eo[1];
348
135k
                e[2] = ee[1] - eo[1];
349
677k
                for(k = 0; k < 4; k++)
350
541k
                {
351
541k
                    WORD32 itrans_out;
352
541k
                    itrans_out =
353
541k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
354
541k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355
541k
                    itrans_out =
356
541k
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357
541k
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358
541k
                }
359
135k
                pi2_tmp++;
360
135k
                pu1_pred += pred_strd;
361
135k
                pu1_dst += dst_strd;
362
135k
            }
363
16.9k
        }
364
154k
        else /* All rows of output of 1st stage are non-zero */
365
154k
        {
366
1.38M
            for(j = 0; j < trans_size; j++)
367
1.23M
            {
368
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369
6.17M
                for(k = 0; k < 4; k++)
370
4.93M
                {
371
4.93M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372
4.93M
                                    + g_ai2_ihevc_trans_8[3][k]
373
4.93M
                                                    * pi2_tmp[3 * trans_size]
374
4.93M
                                    + g_ai2_ihevc_trans_8[5][k]
375
4.93M
                                                    * pi2_tmp[5 * trans_size]
376
4.93M
                                    + g_ai2_ihevc_trans_8[7][k]
377
4.93M
                                                    * pi2_tmp[7 * trans_size];
378
4.93M
                }
379
380
1.23M
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381
1.23M
                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382
1.23M
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383
1.23M
                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384
1.23M
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385
1.23M
                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386
1.23M
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387
1.23M
                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388
389
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390
1.23M
                e[0] = ee[0] + eo[0];
391
1.23M
                e[3] = ee[0] - eo[0];
392
1.23M
                e[1] = ee[1] + eo[1];
393
1.23M
                e[2] = ee[1] - eo[1];
394
6.17M
                for(k = 0; k < 4; k++)
395
4.93M
                {
396
4.93M
                    WORD32 itrans_out;
397
4.93M
                    itrans_out =
398
4.93M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
399
4.93M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400
4.93M
                    itrans_out =
401
4.93M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402
4.93M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403
4.93M
                }
404
1.23M
                pi2_tmp++;
405
1.23M
                pu1_pred += pred_strd;
406
1.23M
                pu1_dst += dst_strd;
407
1.23M
            }
408
154k
        }
409
        /************************************************************************************************/
410
        /************************************END - IT_RECON_8x8******************************************/
411
        /************************************************************************************************/
412
171k
    }
413
186k
}
414