Coverage Report

Created: 2026-02-14 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavc/common/svc/isvc_iquant_itrans_recon.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2022 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
/**
21
 *******************************************************************************
22
 * @file
23
 *  ih264_iquant_itrans_recon.c
24
 *
25
 * @brief
26
 *  Contains definition of functions for h264 inverse quantization inverse
27
 *transformation and recon
28
 *
29
 * @author
30
 *  Ittiam
31
 *
32
 *  @par List of Functions:
33
 *  - ih264_iquant_itrans_recon_4x4()
34
 *  - ih264_iquant_itrans_recon_8x8()
35
 *  - ih264_iquant_itrans_recon_4x4_dc()
36
 *  - ih264_iquant_itrans_recon_8x8_dc()
37
 *  - ih264_iquant_itrans_recon_chroma_4x4()
38
 *  -ih264_iquant_itrans_recon_chroma_4x4_dc()
39
 *
40
 * @remarks
41
 *
42
 *******************************************************************************
43
 */
44
45
/*****************************************************************************/
46
/* File Includes                                                             */
47
/*****************************************************************************/
48
#include <stdint.h>
49
50
#include "ih264_typedefs.h"
51
#include "ih264_debug.h"
52
#include "ih264_defs.h"
53
#include "ih264_trans_macros.h"
54
#include "ih264_macros.h"
55
#include "ih264_platform_macros.h"
56
#include "ih264_trans_data.h"
57
#include "ih264_size_defs.h"
58
#include "ih264_structs.h"
59
#include "isvc_trans_quant_itrans_iquant.h"
60
61
/*
62
 ********************************************************************************
63
 *
64
 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
65
 * prediction buffer
66
 *
67
 * @par Description:
68
 *  The quantized residue is first inverse quantized, then inverse transformed.
69
 *  This inverse transformed content is added to the prediction buffer to recon-
70
 *  struct the end output
71
 *
72
 * @param[in] pi2_src
73
 *  quantized 4x4 block
74
 *
75
 * @param[in] pu1_pred
76
 *  prediction 4x4 block
77
 *
78
 * @param[in] pi2_res
79
 *  residue 4x4 block
80
 *
81
 * @param[out] pu1_out
82
 *  reconstructed 4x4 block
83
 *
84
 * @param[in] src_strd
85
 *  quantization buffer stride
86
 *
87
 * @param[in] i4_pred_stride,
88
 *  Prediction buffer stride
89
 *
90
 * @param[in] i4_out_stride
91
 *  recon buffer Stride
92
 *
93
 * @param[in] i4_res_stride
94
 *  residue buffer Stride
95
 *
96
 * @param[in] pu2_scaling_list
97
 *  pointer to scaling list
98
 *
99
 * @param[in] pu2_norm_adjust
100
 *  pointer to inverse scale matrix
101
 *
102
 * @param[in] u4_qp_div_6
103
 *  Floor (qp/6)
104
 *
105
 * @param[in] pi2_tmp
106
 * temporary buffer of size 1*16
107
 *
108
 * @returns none
109
 *
110
 * @remarks none
111
 *
112
 *******************************************************************************
113
 */
114
void isvc_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
115
                                  buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
116
                                  buffer_container_t *ps_rec,
117
                                  iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
118
                                  WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
119
                                  UWORD8 u1_res_accumulate)
120
5.66M
{
121
5.66M
    WORD16 x0, x1, x2, x3, i;
122
5.66M
    WORD32 q0, q1, q2, q3;
123
5.66M
    WORD16 i_macro;
124
125
5.66M
    WORD16 *pi2_src = ps_src->pv_data;
126
5.66M
    WORD16 *pi2_res = ps_res->pv_data;
127
5.66M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
128
5.66M
    UWORD8 *pu1_pred = ps_pred->pv_data;
129
5.66M
    UWORD8 *pu1_out = ps_rec->pv_data;
130
5.66M
    WORD32 i4_src_stride = ps_src->i4_data_stride;
131
5.66M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
132
5.66M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
133
5.66M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
134
5.66M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
135
5.66M
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
136
5.66M
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
137
5.66M
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
138
5.66M
    WORD16 *pi2_src_ptr = pi2_src;
139
5.66M
    WORD16 *pi2_tmp_ptr = pi2_tmp;
140
5.66M
    UWORD8 *pu1_pred_ptr = pu1_pred;
141
5.66M
    WORD16 *pi2_res_ptr = pi2_res;
142
5.66M
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
143
5.66M
    UWORD8 *pu1_out_ptr = pu1_out;
144
5.66M
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
145
146
    /* inverse quant */
147
    /*horizontal inverse transform */
148
28.1M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
149
22.4M
    {
150
22.4M
        q0 = pi2_src_ptr[0];
151
22.4M
        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
152
22.4M
        if(i == 0 && i4_iq_start_idx == 1) q0 = pi2_dc_src[0];  // Restoring dc value for intra case
153
154
22.4M
        q2 = pi2_src_ptr[2];
155
22.4M
        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
156
157
22.4M
        x0 = q0 + q2;
158
22.4M
        x1 = q0 - q2;
159
160
22.4M
        q1 = pi2_src_ptr[1];
161
22.4M
        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
162
163
22.4M
        q3 = pi2_src_ptr[3];
164
22.4M
        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
165
166
22.4M
        x2 = (q1 >> 1) - q3;
167
22.4M
        x3 = q1 + (q3 >> 1);
168
169
22.4M
        pi2_tmp_ptr[0] = x0 + x3;
170
22.4M
        pi2_tmp_ptr[1] = x1 + x2;
171
22.4M
        pi2_tmp_ptr[2] = x1 - x2;
172
22.4M
        pi2_tmp_ptr[3] = x0 - x3;
173
174
22.4M
        pi2_src_ptr += i4_src_stride;
175
22.4M
        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
176
22.4M
        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
177
22.4M
        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
178
22.4M
    }
179
180
    /* vertical inverse transform */
181
5.66M
    pi2_tmp_ptr = pi2_tmp;
182
27.8M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
183
22.2M
    {
184
22.2M
        pu1_pred_ptr = pu1_pred;
185
22.2M
        pi2_res_ptr = pi2_res;
186
22.2M
        pi2_res_pred_ptr = pi2_res_pred;
187
22.2M
        pu1_out = pu1_out_ptr;
188
189
22.2M
        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
190
22.2M
        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
191
22.2M
        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
192
22.2M
        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
193
194
        /* inverse prediction */
195
22.2M
        i_macro = x0 + x3;
196
22.2M
        i_macro = ((i_macro + 32) >> 6);
197
22.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
198
22.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
199
22.2M
        pu1_pred_ptr += i4_pred_stride;
200
22.2M
        pu1_out += i4_out_stride;
201
22.2M
        pi2_res_ptr += i4_res_stride;
202
22.2M
        pi2_res_pred_ptr += i4_res_pred_stride;
203
204
22.2M
        i_macro = x1 + x2;
205
22.2M
        i_macro = ((i_macro + 32) >> 6);
206
22.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
207
22.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
208
22.2M
        pu1_pred_ptr += i4_pred_stride;
209
22.2M
        pu1_out += i4_out_stride;
210
22.2M
        pi2_res_ptr += i4_res_stride;
211
22.2M
        pi2_res_pred_ptr += i4_res_pred_stride;
212
213
22.2M
        i_macro = x1 - x2;
214
22.2M
        i_macro = ((i_macro + 32) >> 6);
215
22.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
216
22.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
217
22.2M
        pu1_pred_ptr += i4_pred_stride;
218
22.2M
        pu1_out += i4_out_stride;
219
22.2M
        pi2_res_ptr += i4_res_stride;
220
22.2M
        pi2_res_pred_ptr += i4_res_pred_stride;
221
222
22.2M
        i_macro = x0 - x3;
223
22.2M
        i_macro = ((i_macro + 32) >> 6);
224
22.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
225
22.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
226
227
22.2M
        pi2_tmp_ptr++;
228
22.2M
        pu1_out_ptr++;
229
22.2M
        pu1_pred++;
230
22.2M
        pi2_res++;
231
22.2M
        pi2_res_pred++;
232
22.2M
    }
233
5.66M
}
234
235
void isvc_iquant_itrans_recon_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred,
236
                                     buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
237
                                     buffer_container_t *ps_rec,
238
                                     iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
239
                                     WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
240
                                     UWORD8 u1_res_accumulate)
241
8.84M
{
242
8.84M
    WORD16 *pi2_src = ps_src->pv_data;
243
8.84M
    WORD16 *pi2_res = ps_res->pv_data;
244
8.84M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
245
8.84M
    UWORD8 *pu1_pred = ps_pred->pv_data;
246
8.84M
    UWORD8 *pu1_out = ps_rec->pv_data;
247
8.84M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
248
8.84M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
249
8.84M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
250
8.84M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
251
8.84M
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
252
8.84M
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
253
8.84M
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
254
8.84M
    UWORD8 *pu1_pred_ptr = pu1_pred;
255
8.84M
    WORD16 *pi2_res_ptr = pi2_res;
256
8.84M
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
257
8.84M
    UWORD8 *pu1_out_ptr = pu1_out;
258
8.84M
    WORD32 q0;
259
8.84M
    WORD16 i_macro, i;
260
8.84M
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
261
8.84M
    UNUSED(pi2_tmp);
262
263
8.84M
    if(i4_iq_start_idx == 0)
264
0
    {
265
0
        q0 = pi2_src[0];
266
0
        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
267
0
    }
268
8.84M
    else
269
8.84M
    {
270
8.84M
        q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
271
8.84M
    }
272
8.84M
    i_macro = ((q0 + 32) >> 6);
273
43.1M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
274
34.2M
    {
275
34.2M
        pu1_pred_ptr = pu1_pred;
276
34.2M
        pi2_res_ptr = pi2_res;
277
34.2M
        pi2_res_pred_ptr = pi2_res_pred;
278
34.2M
        pu1_out = pu1_out_ptr;
279
280
        /* inverse prediction */
281
34.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
282
34.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
283
34.2M
        pu1_pred_ptr += i4_pred_stride;
284
34.2M
        pu1_out += i4_out_stride;
285
34.2M
        pi2_res_ptr += i4_res_stride;
286
34.2M
        pi2_res_pred_ptr += i4_res_pred_stride;
287
288
34.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
289
34.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
290
34.2M
        pu1_pred_ptr += i4_pred_stride;
291
34.2M
        pu1_out += i4_out_stride;
292
34.2M
        pi2_res_ptr += i4_res_stride;
293
34.2M
        pi2_res_pred_ptr += i4_res_pred_stride;
294
295
34.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
296
34.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
297
34.2M
        pu1_pred_ptr += i4_pred_stride;
298
34.2M
        pu1_out += i4_out_stride;
299
34.2M
        pi2_res_ptr += i4_res_stride;
300
34.2M
        pi2_res_pred_ptr += i4_res_pred_stride;
301
302
34.2M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
303
34.2M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
304
305
34.2M
        pu1_out_ptr++;
306
34.2M
        pu1_pred++;
307
34.2M
        pi2_res++;
308
34.2M
        pi2_res_pred++;
309
34.2M
    }
310
8.84M
}
311
312
/**
313
 *******************************************************************************
314
 *
315
 * @brief
316
 *  This function performs inverse quant and Inverse transform type Ci4 for 8x8
317
 *block
318
 *
319
 * @par Description:
320
 *  Performs inverse transform Ci8 and adds the residue to get the
321
 *  reconstructed block
322
 *
323
 * @param[in] pi2_src
324
 *  Input 8x8coefficients
325
 *
326
 * @param[in] pu1_pred
327
 *  Prediction 8x8 block
328
 *
329
 * @param[out] pu1_recon
330
 *  Output 8x8 block
331
 *
332
 * @param[in] q_div
333
 *  QP/6
334
 *
335
 * @param[in] q_rem
336
 *  QP%6
337
 *
338
 * @param[in] q_lev
339
 *  Quantizer level
340
 *
341
 * @param[in] src_strd
342
 *  Input stride
343
 *
344
 * @param[in] i4_pred_stride,
345
 *  Prediction stride
346
 *
347
 * @param[in] i4_out_stride
348
 *  Output Stride
349
 *
350
 * @param[in] pi4_tmp
351
 *  temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
352
 *  the tmp for each block
353
 *
354
 * @param[in] pu4_iquant_mat
355
 *  Pointer to the inverse quantization matrix
356
 *
357
 * @returns  Void
358
 *
359
 * @remarks
360
 *  None
361
 *
362
 *******************************************************************************
363
 */
364
void isvc_iquant_itrans_recon_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
365
                                  buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
366
                                  buffer_container_t *ps_rec,
367
                                  iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
368
                                  WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
369
                                  UWORD8 u1_res_accumulate)
370
0
{
371
0
    WORD32 i;
372
0
    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
373
0
    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
374
0
    WORD16 i_macro;
375
0
    WORD32 q;
376
377
0
    WORD16 *pi2_src = ps_src->pv_data;
378
0
    WORD16 *pi2_res = ps_res->pv_data;
379
0
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
380
0
    UWORD8 *pu1_pred = ps_pred->pv_data;
381
0
    UWORD8 *pu1_out = ps_rec->pv_data;
382
0
    WORD32 i4_res_stride = ps_res->i4_data_stride;
383
0
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
384
0
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
385
0
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
386
0
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
387
0
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
388
0
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
389
0
    WORD16 *pi2_tmp_ptr = pi2_tmp;
390
0
    UWORD8 *pu1_pred_ptr = pu1_pred;
391
0
    WORD16 *pi2_res_ptr = pi2_res;
392
0
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
393
0
    UWORD8 *pu1_out_ptr = pu1_out;
394
0
    WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0;
395
0
    UNUSED(i4_iq_start_idx);
396
0
    UNUSED(pi2_dc_src);
397
398
0
    ASSERT(ps_src->i4_data_stride == SUB_BLK_WIDTH_8x8);
399
400
    /*************************************************************/
401
    /* De quantization of coefficients. Will be replaced by SIMD */
402
    /* operations on platform. Note : DC coeff is not scaled     */
403
    /*************************************************************/
404
0
    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
405
0
    {
406
0
        q = pi2_src[i];
407
0
        INV_QUANT(q, pu2_iscal_mat[i], pu2_weigh_mat[i], u4_qp_div_6, rnd_fact, 6);
408
0
        pi2_tmp_ptr[i] = q;
409
0
    }
410
411
    /* Perform Inverse transform */
412
    /*--------------------------------------------------------------------*/
413
    /* IDCT [ Horizontal transformation ]                                 */
414
    /*--------------------------------------------------------------------*/
415
0
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
416
0
    {
417
        /*------------------------------------------------------------------*/
418
        /* y0 = w0 + w4                                                     */
419
        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
420
        /* y2 = w0 - w4                                                     */
421
        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
422
        /* y4 = (w2 >> 1) - w6                                              */
423
        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
424
        /* y6 = w2 + (w6 >> 1)                                              */
425
        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
426
        /*------------------------------------------------------------------*/
427
0
        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4]);
428
429
0
        i_y1 =
430
0
            ((WORD32) (-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7] - (pi2_tmp_ptr[7] >> 1));
431
432
0
        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4]);
433
434
0
        i_y3 = ((WORD32) pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3] - (pi2_tmp_ptr[3] >> 1));
435
436
0
        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6]);
437
438
0
        i_y5 =
439
0
            ((WORD32) (-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5] + (pi2_tmp_ptr[5] >> 1));
440
441
0
        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
442
443
0
        i_y7 = ((WORD32) pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1] + (pi2_tmp_ptr[1] >> 1));
444
445
        /*------------------------------------------------------------------*/
446
        /* z0 = y0 + y6                                                     */
447
        /* z1 = y1 + (y7 >> 2)                                              */
448
        /* z2 = y2 + y4                                                     */
449
        /* z3 = y3 + (y5 >> 2)                                              */
450
        /* z4 = y2 - y4                                                     */
451
        /* z5 = (y3 >> 2) - y5                                              */
452
        /* z6 = y0 - y6                                                     */
453
        /* z7 = y7 - (y1 >> 2)                                              */
454
        /*------------------------------------------------------------------*/
455
0
        i_z0 = i_y0 + i_y6;
456
0
        i_z1 = i_y1 + (i_y7 >> 2);
457
0
        i_z2 = i_y2 + i_y4;
458
0
        i_z3 = i_y3 + (i_y5 >> 2);
459
0
        i_z4 = i_y2 - i_y4;
460
0
        i_z5 = (i_y3 >> 2) - i_y5;
461
0
        i_z6 = i_y0 - i_y6;
462
0
        i_z7 = i_y7 - (i_y1 >> 2);
463
464
        /*------------------------------------------------------------------*/
465
        /* x0 = z0 + z7                                                     */
466
        /* x1 = z2 + z5                                                     */
467
        /* x2 = z4 + z3                                                     */
468
        /* x3 = z6 + z1                                                     */
469
        /* x4 = z6 - z1                                                     */
470
        /* x5 = z4 - z3                                                     */
471
        /* x6 = z2 - z5                                                     */
472
        /* x7 = z0 - z7                                                     */
473
        /*------------------------------------------------------------------*/
474
0
        pi2_tmp_ptr[0] = i_z0 + i_z7;
475
0
        pi2_tmp_ptr[1] = i_z2 + i_z5;
476
0
        pi2_tmp_ptr[2] = i_z4 + i_z3;
477
0
        pi2_tmp_ptr[3] = i_z6 + i_z1;
478
0
        pi2_tmp_ptr[4] = i_z6 - i_z1;
479
0
        pi2_tmp_ptr[5] = i_z4 - i_z3;
480
0
        pi2_tmp_ptr[6] = i_z2 - i_z5;
481
0
        pi2_tmp_ptr[7] = i_z0 - i_z7;
482
483
        /* move to the next row */
484
        // pi2_src_ptr += SUB_BLK_WIDTH_8x8;
485
0
        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
486
0
    }
487
    /*--------------------------------------------------------------------*/
488
    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
489
    /*                                                                    */
490
    /* Add the prediction and store it back to reconstructed frame buffer */
491
    /* [Prediction buffer itself in this case]                            */
492
    /*--------------------------------------------------------------------*/
493
494
0
    pi2_tmp_ptr = pi2_tmp;
495
0
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
496
0
    {
497
0
        pu1_pred_ptr = pu1_pred;
498
0
        pi2_res_ptr = pi2_res;
499
0
        pi2_res_pred_ptr = pi2_res_pred;
500
0
        pu1_out = pu1_out_ptr;
501
        /*------------------------------------------------------------------*/
502
        /* y0j = w0j + w4j                                                  */
503
        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
504
        /* y2j = w0j -w4j                                                   */
505
        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
506
        /* y4j = ( w2j >> 1 ) -w6j                                          */
507
        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
508
        /* y6j = w2j + ( w6j >> 1 )                                         */
509
        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
510
        /*------------------------------------------------------------------*/
511
0
        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
512
513
0
        i_y1 = (WORD32) (-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56] -
514
0
               (pi2_tmp_ptr[56] >> 1);
515
516
0
        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
517
518
0
        i_y3 = (WORD32) pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24] - (pi2_tmp_ptr[24] >> 1);
519
520
0
        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
521
522
0
        i_y5 =
523
0
            (WORD32) (-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40] + (pi2_tmp_ptr[40] >> 1);
524
525
0
        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
526
527
0
        i_y7 = (WORD32) pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8] + (pi2_tmp_ptr[8] >> 1);
528
529
        /*------------------------------------------------------------------*/
530
        /* z0j = y0j + y6j                                                  */
531
        /* z1j = y1j + (y7j >> 2)                                           */
532
        /* z2j = y2j + y4j                                                  */
533
        /* z3j = y3j + (y5j >> 2)                                           */
534
        /* z4j = y2j -y4j                                                   */
535
        /* z5j = (y3j >> 2) -y5j                                            */
536
        /* z6j = y0j -y6j                                                   */
537
        /* z7j = y7j -(y1j >> 2)                                            */
538
        /*------------------------------------------------------------------*/
539
0
        i_z0 = i_y0 + i_y6;
540
0
        i_z1 = i_y1 + (i_y7 >> 2);
541
0
        i_z2 = i_y2 + i_y4;
542
0
        i_z3 = i_y3 + (i_y5 >> 2);
543
0
        i_z4 = i_y2 - i_y4;
544
0
        i_z5 = (i_y3 >> 2) - i_y5;
545
0
        i_z6 = i_y0 - i_y6;
546
0
        i_z7 = i_y7 - (i_y1 >> 2);
547
548
        /*------------------------------------------------------------------*/
549
        /* x0j = z0j + z7j                                                  */
550
        /* x1j = z2j + z5j                                                  */
551
        /* x2j = z4j + z3j                                                  */
552
        /* x3j = z6j + z1j                                                  */
553
        /* x4j = z6j -z1j                                                   */
554
        /* x5j = z4j -z3j                                                   */
555
        /* x6j = z2j -z5j                                                   */
556
        /* x7j = z0j -z7j                                                   */
557
        /*------------------------------------------------------------------*/
558
0
        i_macro = ((i_z0 + i_z7 + 32) >> 6);
559
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
560
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
561
        /* Change uc_recBuffer to Point to next element in the same column*/
562
0
        pu1_pred_ptr += i4_pred_stride;
563
0
        pu1_out += i4_out_stride;
564
0
        pi2_res_ptr += i4_res_stride;
565
0
        pi2_res_pred_ptr += i4_res_pred_stride;
566
567
0
        i_macro = ((i_z2 + i_z5 + 32) >> 6);
568
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
569
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
570
0
        pu1_pred_ptr += i4_pred_stride;
571
0
        pu1_out += i4_out_stride;
572
0
        pi2_res_ptr += i4_res_stride;
573
0
        pi2_res_pred_ptr += i4_res_pred_stride;
574
575
0
        i_macro = ((i_z4 + i_z3 + 32) >> 6);
576
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
577
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
578
0
        pu1_pred_ptr += i4_pred_stride;
579
0
        pu1_out += i4_out_stride;
580
0
        pi2_res_ptr += i4_res_stride;
581
0
        pi2_res_pred_ptr += i4_res_pred_stride;
582
583
0
        i_macro = ((i_z6 + i_z1 + 32) >> 6);
584
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
585
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
586
0
        pu1_pred_ptr += i4_pred_stride;
587
0
        pu1_out += i4_out_stride;
588
0
        pi2_res_ptr += i4_res_stride;
589
0
        pi2_res_pred_ptr += i4_res_pred_stride;
590
591
0
        i_macro = ((i_z6 - i_z1 + 32) >> 6);
592
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
593
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
594
0
        pu1_pred_ptr += i4_pred_stride;
595
0
        pu1_out += i4_out_stride;
596
0
        pi2_res_ptr += i4_res_stride;
597
0
        pi2_res_pred_ptr += i4_res_pred_stride;
598
599
0
        i_macro = ((i_z4 - i_z3 + 32) >> 6);
600
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
601
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
602
0
        pu1_pred_ptr += i4_pred_stride;
603
0
        pu1_out += i4_out_stride;
604
0
        pi2_res_ptr += i4_res_stride;
605
0
        pi2_res_pred_ptr += i4_res_pred_stride;
606
607
0
        i_macro = ((i_z2 - i_z5 + 32) >> 6);
608
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
609
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
610
0
        pu1_pred_ptr += i4_pred_stride;
611
0
        pu1_out += i4_out_stride;
612
0
        pi2_res_ptr += i4_res_stride;
613
0
        pi2_res_pred_ptr += i4_res_pred_stride;
614
615
0
        i_macro = ((i_z0 - i_z7 + 32) >> 6);
616
0
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
617
0
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
618
619
0
        pi2_tmp_ptr++;
620
0
        pu1_out_ptr++;
621
0
        pu1_pred++;
622
0
        pi2_res++;
623
0
        pi2_res_pred++;
624
0
    }
625
0
}
626
627
/*
628
 ********************************************************************************
629
 *
630
 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
631
 * prediction buffer
632
 *
633
 * @par Description:
634
 *  The quantized residue is first inverse quantized, then inverse transformed.
635
 *  This inverse transformed content is added to the prediction buffer to recon-
636
 *  struct the end output
637
 *
638
 * @param[in] pi2_src
639
 *  quantized 4x4 block
640
 *
641
 * @param[in] pu1_pred
642
 *  prediction 4x4 block
643
 *
644
 * @param[out] pu1_out
645
 *  reconstructed 4x4 block
646
 *
647
 * @param[in] src_strd
648
 *  quantization buffer stride
649
 *
650
 * @param[in] i4_pred_stride,
651
 *  Prediction buffer stride
652
 *
653
 * @param[in] i4_out_stride
654
 *  recon buffer Stride
655
 *
656
 * @param[in] pu2_scaling_list
657
 *  pointer to scaling list
658
 *
659
 * @param[in] pu2_norm_adjust
660
 *  pointer to inverse scale matrix
661
 *
662
 * @param[in] u4_qp_div_6
663
 *  Floor (qp/6)
664
 *
665
 * @param[in] pi4_tmp
666
 * temporary buffer of size 1*16
667
 *
668
 * @returns none
669
 *
670
 * @remarks none
671
 *
672
 *******************************************************************************
673
 */
674
void isvc_iquant_itrans_recon_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
675
                                         buffer_container_t *ps_res_pred,
676
                                         buffer_container_t *ps_res, buffer_container_t *ps_rec,
677
                                         iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
678
                                         WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
679
                                         WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
680
1.98M
{
681
1.98M
    WORD16 x0, x1, x2, x3, i;
682
1.98M
    WORD32 q0, q1, q2, q3;
683
1.98M
    WORD16 i_macro;
684
685
1.98M
    WORD16 *pi2_src = ps_src->pv_data;
686
1.98M
    WORD16 *pi2_res = ps_res->pv_data;
687
1.98M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
688
1.98M
    UWORD8 *pu1_pred = ps_pred->pv_data;
689
1.98M
    UWORD8 *pu1_out = ps_rec->pv_data;
690
1.98M
    WORD32 i4_src_stride = ps_src->i4_data_stride;
691
1.98M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
692
1.98M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
693
1.98M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
694
1.98M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
695
1.98M
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
696
1.98M
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
697
1.98M
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
698
1.98M
    WORD16 *pi2_src_ptr = pi2_src;
699
1.98M
    WORD16 *pi2_tmp_ptr = pi2_tmp;
700
1.98M
    UWORD8 *pu1_pred_ptr = pu1_pred;
701
1.98M
    WORD16 *pi2_res_ptr = pi2_res;
702
1.98M
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
703
1.98M
    UWORD8 *pu1_out_ptr = pu1_out;
704
1.98M
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
705
706
1.98M
    UNUSED(i4_iq_start_idx);
707
708
    /* inverse quant */
709
    /*horizontal inverse transform */
710
9.91M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
711
7.93M
    {
712
7.93M
        if(i == 0)
713
1.98M
        {
714
1.98M
            q0 = pi2_dc_src[0];
715
1.98M
        }
716
5.94M
        else
717
5.94M
        {
718
5.94M
            q0 = pi2_src_ptr[0];
719
5.94M
            INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
720
5.94M
        }
721
722
7.93M
        q2 = pi2_src_ptr[2];
723
7.93M
        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
724
725
7.93M
        x0 = q0 + q2;
726
7.93M
        x1 = q0 - q2;
727
728
7.93M
        q1 = pi2_src_ptr[1];
729
7.93M
        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
730
731
7.93M
        q3 = pi2_src_ptr[3];
732
7.93M
        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
733
734
7.93M
        x2 = (q1 >> 1) - q3;
735
7.93M
        x3 = q1 + (q3 >> 1);
736
737
7.93M
        pi2_tmp_ptr[0] = x0 + x3;
738
7.93M
        pi2_tmp_ptr[1] = x1 + x2;
739
7.93M
        pi2_tmp_ptr[2] = x1 - x2;
740
7.93M
        pi2_tmp_ptr[3] = x0 - x3;
741
742
7.93M
        pi2_src_ptr += i4_src_stride;
743
7.93M
        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
744
7.93M
        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
745
7.93M
        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
746
7.93M
    }
747
748
    /* vertical inverse transform */
749
1.98M
    pi2_tmp_ptr = pi2_tmp;
750
9.90M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
751
7.92M
    {
752
7.92M
        pu1_pred_ptr = pu1_pred;
753
7.92M
        pi2_res_ptr = pi2_res;
754
7.92M
        pi2_res_pred_ptr = pi2_res_pred;
755
7.92M
        pu1_out = pu1_out_ptr;
756
757
7.92M
        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
758
7.92M
        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
759
7.92M
        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
760
7.92M
        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
761
762
        /* inverse prediction */
763
7.92M
        i_macro = x0 + x3;
764
7.92M
        i_macro = ((i_macro + 32) >> 6);
765
7.92M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
766
7.92M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
767
7.92M
        pu1_pred_ptr += i4_pred_stride;
768
7.92M
        pu1_out += i4_out_stride;
769
7.92M
        pi2_res_ptr += i4_res_stride;
770
7.92M
        pi2_res_pred_ptr += i4_res_pred_stride;
771
772
7.92M
        i_macro = x1 + x2;
773
7.92M
        i_macro = ((i_macro + 32) >> 6);
774
7.92M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
775
7.92M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
776
7.92M
        pu1_pred_ptr += i4_pred_stride;
777
7.92M
        pu1_out += i4_out_stride;
778
7.92M
        pi2_res_ptr += i4_res_stride;
779
7.92M
        pi2_res_pred_ptr += i4_res_pred_stride;
780
781
7.92M
        i_macro = x1 - x2;
782
7.92M
        i_macro = ((i_macro + 32) >> 6);
783
7.92M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
784
7.92M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
785
7.92M
        pu1_pred_ptr += i4_pred_stride;
786
7.92M
        pu1_out += i4_out_stride;
787
7.92M
        pi2_res_ptr += i4_res_stride;
788
7.92M
        pi2_res_pred_ptr += i4_res_pred_stride;
789
790
7.92M
        i_macro = x0 - x3;
791
7.92M
        i_macro = ((i_macro + 32) >> 6);
792
7.92M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
793
7.92M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
794
795
7.92M
        pi2_tmp_ptr++;
796
7.92M
        pu1_out_ptr += 2;  // Interleaved store for output
797
7.92M
        pu1_pred += 2;     // Interleaved load for pred buffer
798
7.92M
        pi2_res += 2;
799
7.92M
        pi2_res_pred += 2;
800
7.92M
    }
801
1.98M
}
802
803
/*
804
 ********************************************************************************
805
 *
806
 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
807
 * prediction buffer if only dc value is present for residue
808
 *
809
 * @par Description:
810
 *  The quantized residue is first inverse quantized,
811
 *  This inverse quantized content is added to the prediction buffer to recon-
812
 *  struct the end output
813
 *
814
 * @param[in] pi2_src
815
 *  quantized dc coefficient
816
 *
817
 * @param[in] pu1_pred
818
 *  prediction 4x4 block in interleaved format
819
 *
820
 * @param[in] i4_pred_stride,
821
 *  Prediction buffer stride in interleaved format
822
 *
823
 * @param[in] i4_out_stride
824
 *  recon buffer Stride
825
 *
826
 * @returns none
827
 *
828
 * @remarks none
829
 *
830
 *******************************************************************************
831
 */
832
833
void isvc_iquant_itrans_recon_chroma_4x4_dc(buffer_container_t *ps_src, buffer_container_t *ps_pred,
834
                                            buffer_container_t *ps_res_pred,
835
                                            buffer_container_t *ps_res, buffer_container_t *ps_rec,
836
                                            iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
837
                                            WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
838
                                            WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
839
855k
{
840
855k
    WORD32 q0;
841
855k
    WORD16 i_macro, i;
842
843
855k
    WORD16 *pi2_src = ps_src->pv_data;
844
855k
    WORD16 *pi2_res = ps_res->pv_data;
845
855k
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
846
855k
    UWORD8 *pu1_pred = ps_pred->pv_data;
847
855k
    UWORD8 *pu1_out = ps_rec->pv_data;
848
855k
    WORD32 i4_res_stride = ps_res->i4_data_stride;
849
855k
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
850
855k
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
851
855k
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
852
855k
    const UWORD16 *pu2_iscal_mat = ps_iq_it_res_rec_constants->pu2_iscal_mat;
853
855k
    const UWORD16 *pu2_weigh_mat = ps_iq_it_res_rec_constants->pu2_weigh_mat;
854
855k
    UWORD32 u4_qp_div_6 = ps_iq_it_res_rec_constants->u4_qp_div_6;
855
855k
    UWORD8 *pu1_pred_ptr = pu1_pred;
856
855k
    WORD16 *pi2_res_ptr = pi2_res;
857
855k
    WORD16 *pi2_res_pred_ptr = pi2_res_pred;
858
855k
    UWORD8 *pu1_out_ptr = pu1_out;
859
860
855k
    UNUSED(pi2_src);
861
855k
    UNUSED(pu2_iscal_mat);
862
855k
    UNUSED(pu2_weigh_mat);
863
855k
    UNUSED(u4_qp_div_6);
864
855k
    UNUSED(pi2_tmp);
865
855k
    UNUSED(i4_iq_start_idx);
866
867
855k
    q0 = pi2_dc_src[0];  // Restoring dc value for intra case3
868
855k
    i_macro = ((q0 + 32) >> 6);
869
870
4.27M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
871
3.41M
    {
872
3.41M
        pu1_pred_ptr = pu1_pred;
873
3.41M
        pi2_res_ptr = pi2_res;
874
3.41M
        pi2_res_pred_ptr = pi2_res_pred;
875
3.41M
        pu1_out = pu1_out_ptr;
876
877
        /* inverse prediction */
878
3.41M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
879
3.41M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
880
3.41M
        pu1_pred_ptr += i4_pred_stride;
881
3.41M
        pu1_out += i4_out_stride;
882
3.41M
        pi2_res_ptr += i4_res_stride;
883
3.41M
        pi2_res_pred_ptr += i4_res_pred_stride;
884
885
3.41M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
886
3.41M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
887
3.41M
        pu1_pred_ptr += i4_pred_stride;
888
3.41M
        pu1_out += i4_out_stride;
889
3.41M
        pi2_res_ptr += i4_res_stride;
890
3.41M
        pi2_res_pred_ptr += i4_res_pred_stride;
891
892
3.41M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
893
3.41M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
894
3.41M
        pu1_pred_ptr += i4_pred_stride;
895
3.41M
        pu1_out += i4_out_stride;
896
3.41M
        pi2_res_ptr += i4_res_stride;
897
3.41M
        pi2_res_pred_ptr += i4_res_pred_stride;
898
899
3.41M
        pi2_res_ptr[0] = isvc_get_residue(i_macro, pi2_res_pred_ptr[0], u1_res_accumulate);
900
3.41M
        *pu1_out = CLIP_U8(pi2_res_ptr[0] + pu1_pred_ptr[0]);
901
902
3.41M
        pu1_out_ptr += 2;
903
3.41M
        pu1_pred += 2;
904
3.41M
        pi2_res += 2;
905
3.41M
        pi2_res_pred += 2;
906
3.41M
    }
907
855k
}
908
909
/*
910
 ********************************************************************************
911
 *
912
 * @brief This function reconstructs a 4x4 sub block from quantized residue and
913
 * prediction buffer assuming cbf=0
914
 *
915
 * @param[in] ps_src
916
 *  quantized 4x4 block
917
 *
918
 * @param[in] ps_pred
919
 *  prediction 4x4 block
920
 *
921
 * @param[in] ps_res
922
 *  residue 4x4 block
923
 *
924
 * @param[in] ps_res_pred
925
 *  residual pred 4x4 block
926
 *
927
 * @param[out] ps_out
928
 *  reconstructed 4x4 block
929
 *
930
 * @param[out] ps_iq_it_res_rec_constants
931
 *  reconstructed 4x4 block
932
 *
933
 * @param[out] pi2_tmp
934
 *  scratch buf
935
 *
936
 * @param[out] pi2_dc_src
937
 *  Pointer to dc coeff location
938
 *
939
 * @param[out] i4_iq_start_idx
940
 *  Idx of first coeff
941
 *
942
 * @param[in] pi2_tmp
943
 * temporary buffer of size 1*16
944
 *
945
 * @param[in] u1_res_accumulate
946
 * Flag to control residual accumulation
947
 *
948
 * @returns none
949
 *
950
 *******************************************************************************
951
 */
952
void isvc_zcbf_iquant_itrans_recon_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
953
                                       buffer_container_t *ps_res_pred, buffer_container_t *ps_res,
954
                                       buffer_container_t *ps_rec,
955
                                       iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants,
956
                                       WORD16 *pi2_tmp, WORD16 *pi2_dc_src, WORD32 i4_iq_start_idx,
957
                                       UWORD8 u1_res_accumulate)
958
35.9M
{
959
35.9M
    WORD32 i, j;
960
961
35.9M
    UWORD8 *pu1_out = ps_rec->pv_data;
962
35.9M
    WORD16 *pi2_res = ps_res->pv_data;
963
35.9M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
964
35.9M
    UWORD8 *pu1_pred = ps_pred->pv_data;
965
35.9M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
966
35.9M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
967
35.9M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
968
35.9M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
969
970
35.9M
    UNUSED(ps_src);
971
35.9M
    UNUSED(ps_iq_it_res_rec_constants);
972
35.9M
    UNUSED(pi2_tmp);
973
35.9M
    UNUSED(pi2_dc_src);
974
35.9M
    UNUSED(i4_iq_start_idx);
975
976
35.9M
    if(u1_res_accumulate)
977
363k
    {
978
1.81M
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
979
1.45M
        {
980
7.25M
            for(j = 0; j < SUB_BLK_WIDTH_4x4; j++)
981
5.79M
            {
982
5.79M
                pi2_res[j + i * i4_res_stride] = isvc_get_residue(
983
5.79M
                    0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate);
984
5.79M
                pu1_out[j + i * i4_out_stride] =
985
5.79M
                    CLIP3(0, UINT8_MAX,
986
5.79M
                          pu1_pred[j + i * i4_pred_stride] + pi2_res[j + i * i4_res_stride]);
987
5.79M
            }
988
1.45M
        }
989
363k
    }
990
35.6M
    else
991
35.6M
    {
992
173M
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
993
138M
        {
994
680M
            for(j = 0; j < SUB_BLK_WIDTH_4x4; j++)
995
542M
            {
996
542M
                pi2_res[j + i * i4_res_stride] = 0;
997
542M
                pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride];
998
542M
            }
999
138M
        }
1000
35.6M
    }
1001
35.9M
}
1002
1003
/*
1004
 ********************************************************************************
1005
 *
1006
 * @brief This function reconstructs a 4x4 sub block from quantized residue and
1007
 * prediction buffer assuming cbf=0
1008
 *
1009
 * @param[in] ps_src
1010
 *  quantized 4x4 block
1011
 *
1012
 * @param[in] ps_pred
1013
 *  prediction 4x4 block
1014
 *
1015
 * @param[in] ps_res
1016
 *  residue 4x4 block
1017
 *
1018
 * @param[in] ps_res_pred
1019
 *  residual pred 4x4 block
1020
 *
1021
 * @param[out] ps_out
1022
 *  reconstructed 4x4 block
1023
 *
1024
 * @param[out] ps_iq_it_res_rec_constants
1025
 *  reconstructed 4x4 block
1026
 *
1027
 * @param[out] pi2_tmp
1028
 *  scratch buf
1029
 *
1030
 * @param[out] pi2_dc_src
1031
 *  Pointer to dc coeff location
1032
 *
1033
 * @param[out] i4_iq_start_idx
1034
 *  Idx of first coeff
1035
 *
1036
 * @param[in] pi2_tmp
1037
 * temporary buffer of size 1*16
1038
 *
1039
 * @param[in] u1_res_accumulate
1040
 * Flag to control residual accumulation
1041
 *
1042
 * @returns none
1043
 *
1044
 *******************************************************************************
1045
 */
1046
void isvc_chroma_zcbf_iquant_itrans_recon_4x4(
1047
    buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res_pred,
1048
    buffer_container_t *ps_res, buffer_container_t *ps_rec,
1049
    iq_it_res_rec_constants_t *ps_iq_it_res_rec_constants, WORD16 *pi2_tmp, WORD16 *pi2_dc_src,
1050
    WORD32 i4_iq_start_idx, UWORD8 u1_res_accumulate)
1051
50.4M
{
1052
50.4M
    WORD32 i, j;
1053
1054
50.4M
    UWORD8 *pu1_out = ps_rec->pv_data;
1055
50.4M
    WORD32 i4_out_stride = ps_rec->i4_data_stride;
1056
50.4M
    WORD16 *pi2_res = ps_res->pv_data;
1057
50.4M
    WORD16 *pi2_res_pred = ps_res_pred->pv_data;
1058
50.4M
    UWORD8 *pu1_pred = ps_pred->pv_data;
1059
50.4M
    WORD32 i4_res_stride = ps_res->i4_data_stride;
1060
50.4M
    WORD32 i4_res_pred_stride = ps_res_pred->i4_data_stride;
1061
50.4M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
1062
1063
50.4M
    UNUSED(ps_src);
1064
50.4M
    UNUSED(ps_iq_it_res_rec_constants);
1065
50.4M
    UNUSED(pi2_tmp);
1066
50.4M
    UNUSED(pi2_dc_src);
1067
50.4M
    UNUSED(i4_iq_start_idx);
1068
1069
50.4M
    if(u1_res_accumulate)
1070
33.1k
    {
1071
165k
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
1072
132k
        {
1073
660k
            for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2)
1074
528k
            {
1075
528k
                pi2_res[j + i * i4_res_stride] = isvc_get_residue(
1076
528k
                    0, pi2_res_pred[j + i * i4_res_pred_stride], u1_res_accumulate);
1077
528k
                pu1_out[j + i * i4_out_stride] = CLIP3(
1078
528k
                    0, UINT8_MAX,
1079
528k
                    ((WORD16) pu1_pred[j + i * i4_pred_stride]) + pi2_res[j + i * i4_res_stride]);
1080
528k
            }
1081
132k
        }
1082
33.1k
    }
1083
50.3M
    else
1084
50.3M
    {
1085
249M
        for(i = 0; i < SUB_BLK_HEIGHT_4x4; i++)
1086
198M
        {
1087
986M
            for(j = 0; j < SUB_BLK_WIDTH_4x4 * 2; j += 2)
1088
788M
            {
1089
788M
                pi2_res[j + i * i4_res_stride] = 0;
1090
788M
                pu1_out[j + i * i4_out_stride] = pu1_pred[j + i * i4_pred_stride];
1091
788M
            }
1092
198M
        }
1093
50.3M
    }
1094
50.4M
}