Coverage Report

Created: 2025-08-29 06:39

/src/libavc/common/svc/isvc_iquant_itrans_recon.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2022 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
/**
21
 *******************************************************************************
22
 * @file
23
 *  ih264_iquant_itrans_recon.c
24
 *
25
 * @brief
26
 *  Contains definition of functions for h264 inverse quantization inverse
27
 *transformation and recon
28
 *
29
 * @author
30
 *  Ittiam
31
 *
32
 *  @par List of Functions:
33
 *  - ih264_iquant_itrans_recon_4x4()
34
 *  - ih264_iquant_itrans_recon_8x8()
35
 *  - ih264_iquant_itrans_recon_4x4_dc()
36
 *  - ih264_iquant_itrans_recon_8x8_dc()
37
 *  - ih264_iquant_itrans_recon_chroma_4x4()
38
 *  -ih264_iquant_itrans_recon_chroma_4x4_dc()
39
 *
40
 * @remarks
41
 *
42
 *******************************************************************************
43
 */
44
45
/*****************************************************************************/
46
/* File Includes                                                             */
47
/*****************************************************************************/
48
#include <stdint.h>
49
50
#include "ih264_typedefs.h"
51
#include "ih264_debug.h"
52
#include "ih264_defs.h"
53
#include "ih264_trans_macros.h"
54
#include "ih264_macros.h"
55
#include "ih264_platform_macros.h"
56
#include "ih264_trans_data.h"
57
#include "ih264_size_defs.h"
58
#include "ih264_structs.h"
59
#include "isvc_trans_quant_itrans_iquant.h"
60
61
/*
62
 ********************************************************************************
63
 *
64
 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
65
 * prediction buffer
66
 *
67
 * @par Description:
68
 *  The quantized residue is first inverse quantized, then inverse transformed.
69
 *  This inverse transformed content is added to the prediction buffer to recon-
70
 *  struct the end output
71
 *
72
 * @param[in] pi2_src
73
 *  quantized 4x4 block
74
 *
75
 * @param[in] pu1_pred
76
 *  prediction 4x4 block
77
 *
78
 * @param[in] pi2_res
79
 *  residue 4x4 block
80
 *
81
 * @param[out] pu1_out
82
 *  reconstructed 4x4 block
83
 *
84
 * @param[in] src_strd
85
 *  quantization buffer stride
86
 *
87
 * @param[in] i4_pred_stride,
88
 *  Prediction buffer stride
89
 *
90
 * @param[in] i4_out_stride
91
 *  recon buffer Stride
92
 *
93
 * @param[in] i4_res_stride
94
 *  residue buffer Stride
95
 *
96
 * @param[in] pu2_scaling_list
97
 *  pointer to scaling list
98
 *
99
 * @param[in] pu2_norm_adjust
100
 *  pointer to inverse scale matrix
101
 *
102
 * @param[in] u4_qp_div_6
103
 *  Floor (qp/6)
104
 *
105
 * @param[in] pi2_tmp
106
 * temporary buffer of size 1*16
107
 *
108
 * @returns none
109
 *
110
 * @remarks none
111
 *
112
 *******************************************************************************
113
 */
114
void isvc_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
115
                                  buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
116
                                  buffer_container_t *ps_rec,
117
                                  iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
118
                                  WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
119
                                  UWORD8 u1_res_accumulate)
120
7.94M
{
121
7.94M
    WORD16 x0, x1, x2, x3, i;
122
7.94M
    WORD32 q0, q1, q2, q3;
123
7.94M
    WORD16 i_macro;
124
125
7.94M
    WORD16 *pi2_src = ps_src->pv_data;
126
7.94M
    WORD16 *pi2_res = ps_res->pv_data;
127
7.94M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
128
7.94M
    UWORD8 *pu1_pred = ps_pred->pv_data;
129
7.94M
    UWORD8 *pu1_out = ps_rec->pv_data;
130
7.94M
    WORD32 i4_src_stride = ps_src->i4_data_stride;
131
7.94M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
132
7.94M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
133
7.94M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
134
7.94M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
135
7.94M
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
136
7.94M
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
137
7.94M
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
138
7.94M
    WORD16 *pi2_src_ptr = pi2_src;
139
7.94M
    WORD16 *pi2_tmp_ptr = pi2_tmp;
140
7.94M
    UWORD8 *pu1_pred_ptr = pu1_pred;
141
7.94M
    WORD16 *pi2_res_ptr = pi2_res;
142
7.94M
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
143
7.94M
    UWORD8 *pu1_out_ptr = pu1_out;
144
7.94M
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
145
146
    /* inverse quant */
147
    /*horizontal inverse transform */
148
39.8M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
149
31.8M
    {
150
31.8M
        q0 = pi2_src_ptr[0];
151
31.8M
        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
152
31.8M
        if(i == 0 && i4_iq_start_idx == 1) q0 = pi2_dc_src[0];  // Restoring dc value for intra case
153
154
31.8M
        q2 = pi2_src_ptr[2];
155
31.8M
        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
156
157
31.8M
        x0 = q0 + q2;
158
31.8M
        x1 = q0 - q2;
159
160
31.8M
        q1 = pi2_src_ptr[1];
161
31.8M
        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
162
163
31.8M
        q3 = pi2_src_ptr[3];
164
31.8M
        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
165
166
31.8M
        x2 = (q1 >> 1) - q3;
167
31.8M
        x3 = q1 + (q3 >> 1);
168
169
31.8M
        pi2_tmp_ptr[0] = x0 + x3;
170
31.8M
        pi2_tmp_ptr[1] = x1 + x2;
171
31.8M
        pi2_tmp_ptr[2] = x1 - x2;
172
31.8M
        pi2_tmp_ptr[3] = x0 - x3;
173
174
31.8M
        pi2_src_ptr += i4_src_stride;
175
31.8M
        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
176
31.8M
        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
177
31.8M
        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
178
31.8M
    }
179
180
    /* vertical inverse transform */
181
7.94M
    pi2_tmp_ptr = pi2_tmp;
182
40.7M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
183
32.8M
    {
184
32.8M
        pu1_pred_ptr = pu1_pred;
185
32.8M
        pi2_res_ptr = pi2_res;
186
32.8M
        pi2_res_pred_ptr = pi2_res_pred;
187
32.8M
        pu1_out = pu1_out_ptr;
188
189
32.8M
        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
190
32.8M
        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
191
32.8M
        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
192
32.8M
        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
193
194
        /* inverse prediction */
195
32.8M
        i_macro = x0 + x3;
196
32.8M
        i_macro = ((i_macro + 32) >> 6);
197
32.8M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
198
32.8M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
199
32.8M
        pu1_pred_ptr += i4_pred_stride;
200
32.8M
        pu1_out += i4_out_stride;
201
32.8M
        pi2_res_ptr += i4_res_stride;
202
32.8M
        pi2_res_pred_ptr += i4_res_pred_stride;
203
204
32.8M
        i_macro = x1 + x2;
205
32.8M
        i_macro = ((i_macro + 32) >> 6);
206
32.8M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
207
32.8M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
208
32.8M
        pu1_pred_ptr += i4_pred_stride;
209
32.8M
        pu1_out += i4_out_stride;
210
32.8M
        pi2_res_ptr += i4_res_stride;
211
32.8M
        pi2_res_pred_ptr += i4_res_pred_stride;
212
213
32.8M
        i_macro = x1 - x2;
214
32.8M
        i_macro = ((i_macro + 32) >> 6);
215
32.8M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
216
32.8M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
217
32.8M
        pu1_pred_ptr += i4_pred_stride;
218
32.8M
        pu1_out += i4_out_stride;
219
32.8M
        pi2_res_ptr += i4_res_stride;
220
32.8M
        pi2_res_pred_ptr += i4_res_pred_stride;
221
222
32.8M
        i_macro = x0 - x3;
223
32.8M
        i_macro = ((i_macro + 32) >> 6);
224
32.8M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
225
32.8M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
226
227
32.8M
        pi2_tmp_ptr++;
228
32.8M
        pu1_out_ptr++;
229
32.8M
        pu1_pred++;
230
32.8M
        pi2_res++;
231
32.8M
        pi2_res_pred++;
232
32.8M
    }
233
7.94M
}
234
235
void isvc_iquant_itrans_recon_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred,
236
                                     buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
237
                                     buffer_container_t *ps_rec,
238
                                     iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
239
                                     WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
240
                                     UWORD8 u1_res_accumulate)
241
10.0M
{
242
10.0M
    WORD16 *pi2_src = ps_src->pv_data;
243
10.0M
    WORD16 *pi2_res = ps_res->pv_data;
244
10.0M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
245
10.0M
    UWORD8 *pu1_pred = ps_pred->pv_data;
246
10.0M
    UWORD8 *pu1_out = ps_rec->pv_data;
247
10.0M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
248
10.0M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
249
10.0M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
250
10.0M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
251
10.0M
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
252
10.0M
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
253
10.0M
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
254
10.0M
    UWORD8 *pu1_pred_ptr = pu1_pred;
255
10.0M
    WORD16 *pi2_res_ptr = pi2_res;
256
10.0M
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
257
10.0M
    UWORD8 *pu1_out_ptr = pu1_out;
258
10.0M
    WORD32 q0;
259
10.0M
    WORD16 i_macro, i;
260
10.0M
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
261
10.0M
    UNUSED(pi2_tmp);
262
263
10.0M
    if(i4_iq_start_idx == 0)
264
0
    {
265
0
        q0 = pi2_src[0];
266
0
        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
267
0
    }
268
10.0M
    else
269
10.0M
    {
270
10.0M
        q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
271
10.0M
    }
272
10.0M
    i_macro = ((q0 + 32) >> 6);
273
49.7M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
274
39.6M
    {
275
39.6M
        pu1_pred_ptr = pu1_pred;
276
39.6M
        pi2_res_ptr = pi2_res;
277
39.6M
        pi2_res_pred_ptr = pi2_res_pred;
278
39.6M
        pu1_out = pu1_out_ptr;
279
280
        /* inverse prediction */
281
39.6M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
282
39.6M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
283
39.6M
        pu1_pred_ptr += i4_pred_stride;
284
39.6M
        pu1_out += i4_out_stride;
285
39.6M
        pi2_res_ptr += i4_res_stride;
286
39.6M
        pi2_res_pred_ptr += i4_res_pred_stride;
287
288
39.6M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
289
39.6M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
290
39.6M
        pu1_pred_ptr += i4_pred_stride;
291
39.6M
        pu1_out += i4_out_stride;
292
39.6M
        pi2_res_ptr += i4_res_stride;
293
39.6M
        pi2_res_pred_ptr += i4_res_pred_stride;
294
295
39.6M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
296
39.6M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
297
39.6M
        pu1_pred_ptr += i4_pred_stride;
298
39.6M
        pu1_out += i4_out_stride;
299
39.6M
        pi2_res_ptr += i4_res_stride;
300
39.6M
        pi2_res_pred_ptr += i4_res_pred_stride;
301
302
39.6M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
303
39.6M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
304
305
39.6M
        pu1_out_ptr++;
306
39.6M
        pu1_pred++;
307
39.6M
        pi2_res++;
308
39.6M
        pi2_res_pred++;
309
39.6M
    }
310
10.0M
}
311
312
/**
313
 *******************************************************************************
314
 *
315
 * @brief
316
 *  This function performs inverse quant and Inverse transform type Ci4 for 8x8
317
 *block
318
 *
319
 * @par Description:
320
 *  Performs inverse transform Ci8 and adds the residue to get the
321
 *  reconstructed block
322
 *
323
 * @param[in] pi2_src
324
 *  Input 8x8coefficients
325
 *
326
 * @param[in] pu1_pred
327
 *  Prediction 8x8 block
328
 *
329
 * @param[out] pu1_recon
330
 *  Output 8x8 block
331
 *
332
 * @param[in] q_div
333
 *  QP/6
334
 *
335
 * @param[in] q_rem
336
 *  QP%6
337
 *
338
 * @param[in] q_lev
339
 *  Quantizer level
340
 *
341
 * @param[in] src_strd
342
 *  Input stride
343
 *
344
 * @param[in] i4_pred_stride,
345
 *  Prediction stride
346
 *
347
 * @param[in] i4_out_stride
348
 *  Output Stride
349
 *
350
 * @param[in] pi4_tmp
351
 *  temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
352
 *  the tmp for each block
353
 *
354
 * @param[in] pu4_iquant_mat
355
 *  Pointer to the inverse quantization matrix
356
 *
357
 * @returns  Void
358
 *
359
 * @remarks
360
 *  None
361
 *
362
 *******************************************************************************
363
 */
364
void isvc_iquant_itrans_recon_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
365
                                  buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
366
                                  buffer_container_t *ps_rec,
367
                                  iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
368
                                  WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
369
                                  UWORD8 u1_res_accumulate)
370
0
{
371
0
    WORD32 i;
372
0
    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
373
0
    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
374
0
    WORD16 i_macro;
375
0
    WORD32 q;
376
377
0
    WORD16 *pi2_src = ps_src->pv_data;
378
0
    WORD16 *pi2_res = ps_res->pv_data;
379
0
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
380
0
    UWORD8 *pu1_pred = ps_pred->pv_data;
381
0
    UWORD8 *pu1_out = ps_rec->pv_data;
382
0
    WORD32 i4_res_stride = ps_res->i4_data_stride;
383
0
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
384
0
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
385
0
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
386
0
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
387
0
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
388
0
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
389
0
    WORD16 *pi2_tmp_ptr = pi2_tmp;
390
0
    UWORD8 *pu1_pred_ptr = pu1_pred;
391
0
    WORD16 *pi2_res_ptr = pi2_res;
392
0
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
393
0
    UWORD8 *pu1_out_ptr = pu1_out;
394
0
    WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0;
395
0
    UNUSED(i4_iq_start_idx);
396
0
    UNUSED(pi2_dc_src);
397
398
0
    ASSERT(ps_src->i4_data_stride == SUB_BLK_WIDTH_8x8);
399
400
    /*************************************************************/
401
    /* De quantization of coefficients. Will be replaced by SIMD */
402
    /* operations on platform. Note : DC coeff is not scaled     */
403
    /*************************************************************/
404
0
    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
405
0
    {
406
0
        q = pi2_src[i];
407
0
        INV_QUANT(q, pu2_iscal_mat[i], pu2_weigh_mat[i], u4_qp_div_6, rnd_fact, 6);
408
0
        pi2_tmp_ptr[i] = q;
409
0
    }
410
411
    /* Perform Inverse transform */
412
    /*--------------------------------------------------------------------*/
413
    /* IDCT [ Horizontal transformation ]                                 */
414
    /*--------------------------------------------------------------------*/
415
0
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
416
0
    {
417
        /*------------------------------------------------------------------*/
418
        /* y0 = w0 + w4                                                     */
419
        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
420
        /* y2 = w0 - w4                                                     */
421
        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
422
        /* y4 = (w2 >> 1) - w6                                              */
423
        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
424
        /* y6 = w2 + (w6 >> 1)                                              */
425
        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
426
        /*------------------------------------------------------------------*/
427
0
        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
428
429
0
        i_y1 =
430
0
            ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
431
432
0
        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
433
434
0
        i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
435
436
0
        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
437
438
0
        i_y5 =
439
0
            ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
440
441
0
        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
442
443
0
        i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
444
445
        /*------------------------------------------------------------------*/
446
        /* z0 = y0 + y6                                                     */
447
        /* z1 = y1 + (y7 >> 2)                                              */
448
        /* z2 = y2 + y4                                                     */
449
        /* z3 = y3 + (y5 >> 2)                                              */
450
        /* z4 = y2 - y4                                                     */
451
        /* z5 = (y3 >> 2) - y5                                              */
452
        /* z6 = y0 - y6                                                     */
453
        /* z7 = y7 - (y1 >> 2)                                              */
454
        /*------------------------------------------------------------------*/
455
0
        i_z0 = i_y0 + i_y6;
456
0
        i_z1 = i_y1 + (i_y7 >> 2);
457
0
        i_z2 = i_y2 + i_y4;
458
0
        i_z3 = i_y3 + (i_y5 >> 2);
459
0
        i_z4 = i_y2 - i_y4;
460
0
        i_z5 = (i_y3 >> 2) - i_y5;
461
0
        i_z6 = i_y0 - i_y6;
462
0
        i_z7 = i_y7 - (i_y1 >> 2);
463
464
        /*------------------------------------------------------------------*/
465
        /* x0 = z0 + z7                                                     */
466
        /* x1 = z2 + z5                                                     */
467
        /* x2 = z4 + z3                                                     */
468
        /* x3 = z6 + z1                                                     */
469
        /* x4 = z6 - z1                                                     */
470
        /* x5 = z4 - z3                                                     */
471
        /* x6 = z2 - z5                                                     */
472
        /* x7 = z0 - z7                                                     */
473
        /*------------------------------------------------------------------*/
474
0
        pi2_tmp_ptr[0] = i_z0 + i_z7;
475
0
        pi2_tmp_ptr[1] = i_z2 + i_z5;
476
0
        pi2_tmp_ptr[2] = i_z4 + i_z3;
477
0
        pi2_tmp_ptr[3] = i_z6 + i_z1;
478
0
        pi2_tmp_ptr[4] = i_z6 - i_z1;
479
0
        pi2_tmp_ptr[5] = i_z4 - i_z3;
480
0
        pi2_tmp_ptr[6] = i_z2 - i_z5;
481
0
        pi2_tmp_ptr[7] = i_z0 - i_z7;
482
483
        /* move to the next row */
484
        // pi2_src_ptr += SUB_BLK_WIDTH_8x8;
485
0
        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
486
0
    }
487
    /*--------------------------------------------------------------------*/
488
    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
489
    /*                                                                    */
490
    /* Add the prediction and store it back to reconstructed frame buffer */
491
    /* [Prediction buffer itself in this case]                            */
492
    /*--------------------------------------------------------------------*/
493
494
0
    pi2_tmp_ptr = pi2_tmp;
495
0
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
496
0
    {
497
0
        pu1_pred_ptr = pu1_pred;
498
0
        pi2_res_ptr = pi2_res;
499
0
        pi2_res_pred_ptr = pi2_res_pred;
500
0
        pu1_out = pu1_out_ptr;
501
        /*------------------------------------------------------------------*/
502
        /* y0j = w0j + w4j                                                  */
503
        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
504
        /* y2j = w0j -w4j                                                   */
505
        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
506
        /* y4j = ( w2j >> 1 ) -w6j                                          */
507
        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
508
        /* y6j = w2j + ( w6j >> 1 )                                         */
509
        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
510
        /*------------------------------------------------------------------*/
511
0
        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
512
513
0
        i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
514
0
               (pi2_tmp_ptr[56] >> 1);
515
516
0
        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
517
518
0
        i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
519
520
0
        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
521
522
0
        i_y5 =
523
0
            (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
524
525
0
        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
526
527
0
        i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
528
529
        /*------------------------------------------------------------------*/
530
        /* z0j = y0j + y6j                                                  */
531
        /* z1j = y1j + (y7j >> 2)                                           */
532
        /* z2j = y2j + y4j                                                  */
533
        /* z3j = y3j + (y5j >> 2)                                           */
534
        /* z4j = y2j -y4j                                                   */
535
        /* z5j = (y3j >> 2) -y5j                                            */
536
        /* z6j = y0j -y6j                                                   */
537
        /* z7j = y7j -(y1j >> 2)                                            */
538
        /*------------------------------------------------------------------*/
539
0
        i_z0 = i_y0 + i_y6;
540
0
        i_z1 = i_y1 + (i_y7 >> 2);
541
0
        i_z2 = i_y2 + i_y4;
542
0
        i_z3 = i_y3 + (i_y5 >> 2);
543
0
        i_z4 = i_y2 - i_y4;
544
0
        i_z5 = (i_y3 >> 2) - i_y5;
545
0
        i_z6 = i_y0 - i_y6;
546
0
        i_z7 = i_y7 - (i_y1 >> 2);
547
548
        /*------------------------------------------------------------------*/
549
        /* x0j = z0j + z7j                                                  */
550
        /* x1j = z2j + z5j                                                  */
551
        /* x2j = z4j + z3j                                                  */
552
        /* x3j = z6j + z1j                                                  */
553
        /* x4j = z6j -z1j                                                   */
554
        /* x5j = z4j -z3j                                                   */
555
        /* x6j = z2j -z5j                                                   */
556
        /* x7j = z0j -z7j                                                   */
557
        /*------------------------------------------------------------------*/
558
0
        i_macro = ((i_z0 + i_z7 + 32) >> 6);
559
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
560
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
561
        /* Change uc_recBuffer to Point to next element in the same column*/
562
0
        pu1_pred_ptr += i4_pred_stride;
563
0
        pu1_out += i4_out_stride;
564
0
        pi2_res_ptr += i4_res_stride;
565
0
        pi2_res_pred_ptr += i4_res_pred_stride;
566
567
0
        i_macro = ((i_z2 + i_z5 + 32) >> 6);
568
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
569
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
570
0
        pu1_pred_ptr += i4_pred_stride;
571
0
        pu1_out += i4_out_stride;
572
0
        pi2_res_ptr += i4_res_stride;
573
0
        pi2_res_pred_ptr += i4_res_pred_stride;
574
575
0
        i_macro = ((i_z4 + i_z3 + 32) >> 6);
576
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
577
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
578
0
        pu1_pred_ptr += i4_pred_stride;
579
0
        pu1_out += i4_out_stride;
580
0
        pi2_res_ptr += i4_res_stride;
581
0
        pi2_res_pred_ptr += i4_res_pred_stride;
582
583
0
        i_macro = ((i_z6 + i_z1 + 32) >> 6);
584
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
585
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
586
0
        pu1_pred_ptr += i4_pred_stride;
587
0
        pu1_out += i4_out_stride;
588
0
        pi2_res_ptr += i4_res_stride;
589
0
        pi2_res_pred_ptr += i4_res_pred_stride;
590
591
0
        i_macro = ((i_z6 - i_z1 + 32) >> 6);
592
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
593
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
594
0
        pu1_pred_ptr += i4_pred_stride;
595
0
        pu1_out += i4_out_stride;
596
0
        pi2_res_ptr += i4_res_stride;
597
0
        pi2_res_pred_ptr += i4_res_pred_stride;
598
599
0
        i_macro = ((i_z4 - i_z3 + 32) >> 6);
600
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
601
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
602
0
        pu1_pred_ptr += i4_pred_stride;
603
0
        pu1_out += i4_out_stride;
604
0
        pi2_res_ptr += i4_res_stride;
605
0
        pi2_res_pred_ptr += i4_res_pred_stride;
606
607
0
        i_macro = ((i_z2 - i_z5 + 32) >> 6);
608
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
609
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
610
0
        pu1_pred_ptr += i4_pred_stride;
611
0
        pu1_out += i4_out_stride;
612
0
        pi2_res_ptr += i4_res_stride;
613
0
        pi2_res_pred_ptr += i4_res_pred_stride;
614
615
0
        i_macro = ((i_z0 - i_z7 + 32) >> 6);
616
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
617
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
618
619
0
        pi2_tmp_ptr++;
620
0
        pu1_out_ptr++;
621
0
        pu1_pred++;
622
0
        pi2_res++;
623
0
        pi2_res_pred++;
624
0
    }
625
0
}
626
627
/*
628
 ********************************************************************************
629
 *
630
 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
631
 * prediction buffer
632
 *
633
 * @par Description:
634
 *  The quantized residue is first inverse quantized, then inverse transformed.
635
 *  This inverse transformed content is added to the prediction buffer to recon-
636
 *  struct the end output
637
 *
638
 * @param[in] pi2_src
639
 *  quantized 4x4 block
640
 *
641
 * @param[in] pu1_pred
642
 *  prediction 4x4 block
643
 *
644
 * @param[out] pu1_out
645
 *  reconstructed 4x4 block
646
 *
647
 * @param[in] src_strd
648
 *  quantization buffer stride
649
 *
650
 * @param[in] i4_pred_stride,
651
 *  Prediction buffer stride
652
 *
653
 * @param[in] i4_out_stride
654
 *  recon buffer Stride
655
 *
656
 * @param[in] pu2_scaling_list
657
 *  pointer to scaling list
658
 *
659
 * @param[in] pu2_norm_adjust
660
 *  pointer to inverse scale matrix
661
 *
662
 * @param[in] u4_qp_div_6
663
 *  Floor (qp/6)
664
 *
665
 * @param[in] pi4_tmp
666
 * temporary buffer of size 1*16
667
 *
668
 * @returns none
669
 *
670
 * @remarks none
671
 *
672
 *******************************************************************************
673
 */
674
void isvc_iquant_itrans_recon_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
675
                                         buffer_container_t *ps_res_pred,
676
                                         buffer_container_t *ps_res, buffer_container_t *ps_rec,
677
                                         iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
678
                                         WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
679
                                         WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
680
1.41M
{
681
1.41M
    WORD16 x0, x1, x2, x3, i;
682
1.41M
    WORD32 q0, q1, q2, q3;
683
1.41M
    WORD16 i_macro;
684
685
1.41M
    WORD16 *pi2_src = ps_src->pv_data;
686
1.41M
    WORD16 *pi2_res = ps_res->pv_data;
687
1.41M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
688
1.41M
    UWORD8 *pu1_pred = ps_pred->pv_data;
689
1.41M
    UWORD8 *pu1_out = ps_rec->pv_data;
690
1.41M
    WORD32 i4_src_stride = ps_src->i4_data_stride;
691
1.41M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
692
1.41M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
693
1.41M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
694
1.41M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
695
1.41M
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
696
1.41M
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
697
1.41M
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
698
1.41M
    WORD16 *pi2_src_ptr = pi2_src;
699
1.41M
    WORD16 *pi2_tmp_ptr = pi2_tmp;
700
1.41M
    UWORD8 *pu1_pred_ptr = pu1_pred;
701
1.41M
    WORD16 *pi2_res_ptr = pi2_res;
702
1.41M
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
703
1.41M
    UWORD8 *pu1_out_ptr = pu1_out;
704
1.41M
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
705
706
1.41M
    UNUSED(i4_iq_start_idx);
707
708
    /* inverse quant */
709
    /*horizontal inverse transform */
710
7.09M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
711
5.67M
    {
712
5.67M
        if(i == 0)
713
1.42M
        {
714
1.42M
            q0 = pi2_dc_src[0];
715
1.42M
        }
716
4.25M
        else
717
4.25M
        {
718
4.25M
            q0 = pi2_src_ptr[0];
719
4.25M
            INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
720
4.25M
        }
721
722
5.67M
        q2 = pi2_src_ptr[2];
723
5.67M
        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
724
725
5.67M
        x0 = q0 + q2;
726
5.67M
        x1 = q0 - q2;
727
728
5.67M
        q1 = pi2_src_ptr[1];
729
5.67M
        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
730
731
5.67M
        q3 = pi2_src_ptr[3];
732
5.67M
        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
733
734
5.67M
        x2 = (q1 >> 1) - q3;
735
5.67M
        x3 = q1 + (q3 >> 1);
736
737
5.67M
        pi2_tmp_ptr[0] = x0 + x3;
738
5.67M
        pi2_tmp_ptr[1] = x1 + x2;
739
5.67M
        pi2_tmp_ptr[2] = x1 - x2;
740
5.67M
        pi2_tmp_ptr[3] = x0 - x3;
741
742
5.67M
        pi2_src_ptr += i4_src_stride;
743
5.67M
        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
744
5.67M
        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
745
5.67M
        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
746
5.67M
    }
747
748
    /* vertical inverse transform */
749
1.41M
    pi2_tmp_ptr = pi2_tmp;
750
7.09M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
751
5.67M
    {
752
5.67M
        pu1_pred_ptr = pu1_pred;
753
5.67M
        pi2_res_ptr = pi2_res;
754
5.67M
        pi2_res_pred_ptr = pi2_res_pred;
755
5.67M
        pu1_out = pu1_out_ptr;
756
757
5.67M
        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
758
5.67M
        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
759
5.67M
        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
760
5.67M
        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
761
762
        /* inverse prediction */
763
5.67M
        i_macro = x0 + x3;
764
5.67M
        i_macro = ((i_macro + 32) >> 6);
765
5.67M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
766
5.67M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
767
5.67M
        pu1_pred_ptr += i4_pred_stride;
768
5.67M
        pu1_out += i4_out_stride;
769
5.67M
        pi2_res_ptr += i4_res_stride;
770
5.67M
        pi2_res_pred_ptr += i4_res_pred_stride;
771
772
5.67M
        i_macro = x1 + x2;
773
5.67M
        i_macro = ((i_macro + 32) >> 6);
774
5.67M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
775
5.67M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
776
5.67M
        pu1_pred_ptr += i4_pred_stride;
777
5.67M
        pu1_out += i4_out_stride;
778
5.67M
        pi2_res_ptr += i4_res_stride;
779
5.67M
        pi2_res_pred_ptr += i4_res_pred_stride;
780
781
5.67M
        i_macro = x1 - x2;
782
5.67M
        i_macro = ((i_macro + 32) >> 6);
783
5.67M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
784
5.67M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
785
5.67M
        pu1_pred_ptr += i4_pred_stride;
786
5.67M
        pu1_out += i4_out_stride;
787
5.67M
        pi2_res_ptr += i4_res_stride;
788
5.67M
        pi2_res_pred_ptr += i4_res_pred_stride;
789
790
5.67M
        i_macro = x0 - x3;
791
5.67M
        i_macro = ((i_macro + 32) >> 6);
792
5.67M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
793
5.67M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
794
795
5.67M
        pi2_tmp_ptr++;
796
5.67M
        pu1_out_ptr += 2;  // Interleaved store for output
797
5.67M
        pu1_pred += 2;     // Interleaved load for pred buffer
798
5.67M
        pi2_res += 2;
799
5.67M
        pi2_res_pred += 2;
800
5.67M
    }
801
1.41M
}
802
803
/*
804
 ********************************************************************************
805
 *
806
 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
807
 * prediction buffer if only dc value is present for residue
808
 *
809
 * @par Description:
810
 *  The quantized residue is first inverse quantized,
811
 *  This inverse quantized content is added to the prediction buffer to recon-
812
 *  struct the end output
813
 *
814
 * @param[in] pi2_src
815
 *  quantized dc coefficient
816
 *
817
 * @param[in] pu1_pred
818
 *  prediction 4x4 block in interleaved format
819
 *
820
 * @param[in] i4_pred_stride,
821
 *  Prediction buffer stride in interleaved format
822
 *
823
 * @param[in] i4_out_stride
824
 *  recon buffer Stride
825
 *
826
 * @returns none
827
 *
828
 * @remarks none
829
 *
830
 *******************************************************************************
831
 */
832
833
void isvc_iquant_itrans_recon_chroma_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred,
834
                                            buffer_container_t *ps_res_pred,
835
                                            buffer_container_t *ps_res, buffer_container_t *ps_rec,
836
                                            iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
837
                                            WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
838
                                            WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
839
624k
{
840
624k
    WORD32 q0;
841
624k
    WORD16 i_macro, i;
842
843
624k
    WORD16 *pi2_src = ps_src->pv_data;
844
624k
    WORD16 *pi2_res = ps_res->pv_data;
845
624k
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
846
624k
    UWORD8 *pu1_pred = ps_pred->pv_data;
847
624k
    UWORD8 *pu1_out = ps_rec->pv_data;
848
624k
    WORD32 i4_res_stride = ps_res->i4_data_stride;
849
624k
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
850
624k
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
851
624k
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
852
624k
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
853
624k
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
854
624k
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
855
624k
    UWORD8 *pu1_pred_ptr = pu1_pred;
856
624k
    WORD16 *pi2_res_ptr = pi2_res;
857
624k
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
858
624k
    UWORD8 *pu1_out_ptr = pu1_out;
859
860
624k
    UNUSED(pi2_src);
861
624k
    UNUSED(pu2_iscal_mat);
862
624k
    UNUSED(pu2_weigh_mat);
863
624k
    UNUSED(u4_qp_div_6);
864
624k
    UNUSED(pi2_tmp);
865
624k
    UNUSED(i4_iq_start_idx);
866
867
624k
    q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
868
624k
    i_macro = ((q0 + 32) >> 6);
869
870
3.11M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
871
2.49M
    {
872
2.49M
        pu1_pred_ptr = pu1_pred;
873
2.49M
        pi2_res_ptr = pi2_res;
874
2.49M
        pi2_res_pred_ptr = pi2_res_pred;
875
2.49M
        pu1_out = pu1_out_ptr;
876
877
        /* inverse prediction */
878
2.49M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
879
2.49M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
880
2.49M
        pu1_pred_ptr += i4_pred_stride;
881
2.49M
        pu1_out += i4_out_stride;
882
2.49M
        pi2_res_ptr += i4_res_stride;
883
2.49M
        pi2_res_pred_ptr += i4_res_pred_stride;
884
885
2.49M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
886
2.49M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
887
2.49M
        pu1_pred_ptr += i4_pred_stride;
888
2.49M
        pu1_out += i4_out_stride;
889
2.49M
        pi2_res_ptr += i4_res_stride;
890
2.49M
        pi2_res_pred_ptr += i4_res_pred_stride;
891
892
2.49M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
893
2.49M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
894
2.49M
        pu1_pred_ptr += i4_pred_stride;
895
2.49M
        pu1_out += i4_out_stride;
896
2.49M
        pi2_res_ptr += i4_res_stride;
897
2.49M
        pi2_res_pred_ptr += i4_res_pred_stride;
898
899
2.49M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
900
2.49M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
901
902
2.49M
        pu1_out_ptr += 2;
903
2.49M
        pu1_pred += 2;
904
2.49M
        pi2_res += 2;
905
2.49M
        pi2_res_pred += 2;
906
2.49M
    }
907
624k
}
908
909
/*
910
 ********************************************************************************
911
 *
912
 * @brief This function reconstructs a 4x4 sub block from quantized residue and
913
 * prediction buffer assuming cbf=0
914
 *
915
 * @param[in] ps_src
916
 *  quantized 4x4 block
917
 *
918
 * @param[in] ps_pred
919
 *  prediction 4x4 block
920
 *
921
 * @param[in] ps_res
922
 *  residue 4x4 block
923
 *
924
 * @param[in] ps_res_pred
925
 *  residual pred 4x4 block
926
 *
927
 * @param[out] ps_out
928
 *  reconstructed 4x4 block
929
 *
930
 * @param[out] ps_iq_it_res_rec_constants
931
 *  reconstructed 4x4 block
932
 *
933
 * @param[out] pi2_tmp
934
 *  scratch buf
935
 *
936
 * @param[out] pi2_dc_src
937
 *  Pointer to dc coeff location
938
 *
939
 * @param[out] i4_iq_start_idx
940
 *  Idx of first coeff
941
 *
942
 * @param[in] pi2_tmp
943
 * temporary buffer of size 1*16
944
 *
945
 * @param[in] u1_res_accumulate
946
 * Flag to control residual accumulation
947
 *
948
 * @returns none
949
 *
950
 *******************************************************************************
951
 */
952
void isvc_zcbf_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
953
                                       buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
954
                                       buffer_container_t *ps_rec,
955
                                       iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
956
                                       WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
957
                                       UWORD8 u1_res_accumulate)
958
42.3M
{
959
42.3M
    WORD32 i, j;
960
961
42.3M
    UWORD8 *pu1_out = ps_rec->pv_data;
962
42.3M
    WORD16 *pi2_res = ps_res->pv_data;
963
42.3M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
964
42.3M
    UWORD8 *pu1_pred = ps_pred->pv_data;
965
42.3M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
966
42.3M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
967
42.3M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
968
42.3M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
969
970
42.3M
    UNUSED(ps_src);
971
42.3M
    UNUSED(ps_iq_it_res_rec_constants);
972
42.3M
    UNUSED(pi2_tmp);
973
42.3M
    UNUSED(pi2_dc_src);
974
42.3M
    UNUSED(i4_iq_start_idx);
975
976
42.3M
    if(u1_res_accumulate)
977
229k
    {
978
1.14M
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
979
919k
        {
980
4.59M
            for(j = 0; j < SUB_BLK_WIDTH_4x4; j++)
981
3.67M
            {
982
3.67M
                pi2_res[j + i * i4_res_stride] = isvc_get_residue(
983
3.67M
                    0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate);
984
3.67M
                pu1_out[j + i * i4_out_stride] =
985
3.67M
                    CLIP3(0, UINT8_MAX,
986
3.67M
                          pu1_pred[j + i * i4_pred_stride] + pi2_res[j + i * i4_res_stride]);
987
3.67M
            }
988
919k
        }
989
229k
    }
990
42.1M
    else
991
42.1M
    {
992
208M
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
993
165M
        {
994
828M
            for(j = 0; j < SUB_BLK_WIDTH_4x4; j++)
995
662M
            {
996
662M
                pi2_res[j + i * i4_res_stride] = 0;
997
662M
                pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride];
998
662M
            }
999
165M
        }
1000
42.1M
    }
1001
42.3M
}
1002
1003
/*
1004
 ********************************************************************************
1005
 *
1006
 * @brief This function reconstructs a 4x4 sub block from quantized residue and
1007
 * prediction buffer assuming cbf=0
1008
 *
1009
 * @param[in] ps_src
1010
 *  quantized 4x4 block
1011
 *
1012
 * @param[in] ps_pred
1013
 *  prediction 4x4 block
1014
 *
1015
 * @param[in] ps_res
1016
 *  residue 4x4 block
1017
 *
1018
 * @param[in] ps_res_pred
1019
 *  residual pred 4x4 block
1020
 *
1021
 * @param[out] ps_out
1022
 *  reconstructed 4x4 block
1023
 *
1024
 * @param[out] ps_iq_it_res_rec_constants
1025
 *  reconstructed 4x4 block
1026
 *
1027
 * @param[out] pi2_tmp
1028
 *  scratch buf
1029
 *
1030
 * @param[out] pi2_dc_src
1031
 *  Pointer to dc coeff location
1032
 *
1033
 * @param[out] i4_iq_start_idx
1034
 *  Idx of first coeff
1035
 *
1036
 * @param[in] pi2_tmp
1037
 * temporary buffer of size 1*16
1038
 *
1039
 * @param[in] u1_res_accumulate
1040
 * Flag to control residual accumulation
1041
 *
1042
 * @returns none
1043
 *
1044
 *******************************************************************************
1045
 */
1046
void isvc_chroma_zcbf_iquant_itrans_recon_4x4(
1047
    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
1048
    buffer_container_t *ps_res, buffer_container_t *ps_rec,
1049
    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
1050
    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
1051
62.9M
{
1052
62.9M
    WORD32 i, j;
1053
1054
62.9M
    UWORD8 *pu1_out = ps_rec->pv_data;
1055
62.9M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
1056
62.9M
    WORD16 *pi2_res = ps_res->pv_data;
1057
62.9M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
1058
62.9M
    UWORD8 *pu1_pred = ps_pred->pv_data;
1059
62.9M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
1060
62.9M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
1061
62.9M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
1062
1063
62.9M
    UNUSED(ps_src);
1064
62.9M
    UNUSED(ps_iq_it_res_rec_constants);
1065
62.9M
    UNUSED(pi2_tmp);
1066
62.9M
    UNUSED(pi2_dc_src);
1067
62.9M
    UNUSED(i4_iq_start_idx);
1068
1069
62.9M
    if(u1_res_accumulate)
1070
19.1k
    {
1071
95.9k
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
1072
76.7k
        {
1073
383k
            for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2)
1074
307k
            {
1075
307k
                pi2_res[j + i * i4_res_stride] = isvc_get_residue(
1076
307k
                    0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate);
1077
307k
                pu1_out[j + i * i4_out_stride] = CLIP3(
1078
307k
                    0, UINT8_MAX,
1079
307k
                    ((WORD16) pu1_pred[j + i * i4_pred_stride]) + pi2_res[j + i * i4_res_stride]);
1080
307k
            }
1081
76.7k
        }
1082
19.1k
    }
1083
62.9M
    else
1084
62.9M
    {
1085
313M
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
1086
250M
        {
1087
1.25G
            for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2)
1088
999M
            {
1089
999M
                pi2_res[j + i * i4_res_stride] = 0;
1090
999M
                pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride];
1091
999M
            }
1092
250M
        }
1093
62.9M
    }
1094
62.9M
}