Coverage Report

Created: 2025-07-11 06:39

/src/libavc/common/ih264_iquant_itrans_recon.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
*******************************************************************************
23
* @file
24
*  ih264_iquant_itrans_recon.c
25
*
26
* @brief
27
*  Contains definition of functions for h264 inverse quantization,
28
*  inverse transformation and recon
29
*
30
* @author
31
*  ittiam
32
*
33
* @par List of Functions:
34
*  - ih264_iquant_itrans_recon_4x4
35
*  - ih264_iquant_itrans_recon_8x8
36
*  - ih264_iquant_itrans_recon_4x4_dc
37
*  - ih264_iquant_itrans_recon_8x8_dc
38
*  - ih264_iquant_itrans_recon_chroma_4x4
39
*  - ih264_iquant_itrans_recon_chroma_4x4_dc
40
*
41
* @remarks
42
*
43
*******************************************************************************
44
*/
45
46
/*****************************************************************************/
47
/* File Includes                                                             */
48
/*****************************************************************************/
49
50
/* User Include Files */
51
#include "ih264_typedefs.h"
52
#include "ih264_defs.h"
53
#include "ih264_macros.h"
54
#include "ih264_size_defs.h"
55
#include "ih264_trans_macros.h"
56
#include "ih264_trans_data.h"
57
#include "ih264_structs.h"
58
#include "ih264_trans_quant_itrans_iquant.h"
59
#include "ih264_platform_macros.h"
60
61
/*****************************************************************************/
62
/*  Function definitions                                                     */
63
/*****************************************************************************/
64
65
/**
66
********************************************************************************
67
*
68
* @brief This function reconstructs a 4x4 sub block from quantized residue and
69
*  prediction buffer
70
*
71
* @par Description:
72
*  The quantized residue is first inverse quantized, then inverse transformed.
73
*  This inverse transformed content is added to the prediction buffer to recon-
74
*  struct the end output
75
*
76
* @param[in] pi2_src
77
*  quantized 4x4 block
78
*
79
* @param[in] pu1_pred
80
*  prediction 4x4 block
81
*
82
* @param[out] pu1_out
83
*  reconstructed 4x4 block
84
*
85
* @param[in] pred_strd
86
*  Prediction buffer stride
87
*
88
* @param[in] out_strd
89
*  recon buffer Stride
90
*
91
* @param[in] pu2_iscal_mat
92
*  pointer to inverse scaling matrix
93
*
94
* @param[in] pu2_weigh_mat
95
*  pointer to weight matrix
96
*
97
* @param[in] u4_qp_div_6
98
*  Floor (qp/6)
99
*
100
* @param[in] pi2_tmp
101
*  temporary buffer of size 1*16
102
*
103
* @param[in] iq_start_idx
104
*  Differentiates b/w intra or inter
105
*
106
* @param[in] pi2_dc_ld_addr
107
*  Address to load DC value of the 4x4 blk
108
*
109
* @returns none
110
*
111
* @remarks none
112
*
113
*******************************************************************************
114
*/
115
void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
116
                                   UWORD8 *pu1_pred,
117
                                   UWORD8 *pu1_out,
118
                                   WORD32 pred_strd,
119
                                   WORD32 out_strd,
120
                                   const UWORD16 *pu2_iscal_mat,
121
                                   const UWORD16 *pu2_weigh_mat,
122
                                   UWORD32 u4_qp_div_6,
123
                                   WORD16 *pi2_tmp,
124
                                   WORD32 iq_start_idx,
125
                                   WORD16 *pi2_dc_ld_addr)
126
405k
{
127
405k
    WORD16 *pi2_src_ptr = pi2_src;
128
405k
    WORD16 *pi2_tmp_ptr = pi2_tmp;
129
405k
    UWORD8 *pu1_pred_ptr = pu1_pred;
130
405k
    UWORD8 *pu1_out_ptr = pu1_out;
131
405k
    WORD16 x0, x1, x2, x3, i;
132
405k
    WORD32 q0, q1, q2, q3;
133
405k
    WORD16 i_macro;
134
405k
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
135
136
    /* inverse quant */
137
    /* horizontal inverse transform */
138
2.02M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
139
1.62M
    {
140
1.62M
        q0 = pi2_src_ptr[0];
141
1.62M
        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
142
        /* Restoring dc value for intra case */
143
1.62M
        if (i==0 && iq_start_idx == 1)
144
66.7k
        {
145
66.7k
            q0 = pi2_dc_ld_addr[0];
146
66.7k
        }
147
148
1.62M
        q2 = pi2_src_ptr[2];
149
1.62M
        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
150
151
1.62M
        x0 = q0 + q2;
152
1.62M
        x1 = q0 - q2;
153
154
1.62M
        q1 = pi2_src_ptr[1];
155
1.62M
        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
156
157
1.62M
        q3 = pi2_src_ptr[3];
158
1.62M
        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
159
160
1.62M
        x2 = (q1 >> 1) - q3;
161
1.62M
        x3 = q1 + (q3 >> 1);
162
163
1.62M
        pi2_tmp_ptr[0] = x0 + x3;
164
1.62M
        pi2_tmp_ptr[1] = x1 + x2;
165
1.62M
        pi2_tmp_ptr[2] = x1 - x2;
166
1.62M
        pi2_tmp_ptr[3] = x0 - x3;
167
168
1.62M
        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
169
1.62M
        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
170
1.62M
        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
171
1.62M
        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
172
1.62M
    }
173
174
    /* vertical inverse transform */
175
405k
    pi2_tmp_ptr = pi2_tmp;
176
2.02M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
177
1.62M
    {
178
1.62M
        pu1_pred_ptr = pu1_pred;
179
1.62M
        pu1_out = pu1_out_ptr;
180
181
1.62M
        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
182
1.62M
        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
183
1.62M
        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
184
1.62M
        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
185
186
        /* inverse prediction */
187
1.62M
        i_macro = x0 + x3;
188
1.62M
        i_macro = ((i_macro + 32) >> 6);
189
1.62M
        i_macro += *pu1_pred_ptr;
190
1.62M
        *pu1_out = CLIP_U8(i_macro);
191
1.62M
        pu1_pred_ptr += pred_strd;
192
1.62M
        pu1_out += out_strd;
193
194
1.62M
        i_macro = x1 + x2;
195
1.62M
        i_macro = ((i_macro + 32) >> 6);
196
1.62M
        i_macro += *pu1_pred_ptr;
197
1.62M
        *pu1_out = CLIP_U8(i_macro);
198
1.62M
        pu1_pred_ptr += pred_strd;
199
1.62M
        pu1_out += out_strd;
200
201
1.62M
        i_macro = x1 - x2;
202
1.62M
        i_macro = ((i_macro + 32) >> 6);
203
1.62M
        i_macro += *pu1_pred_ptr;
204
1.62M
        *pu1_out = CLIP_U8(i_macro);
205
1.62M
        pu1_pred_ptr += pred_strd;
206
1.62M
        pu1_out += out_strd;
207
208
1.62M
        i_macro = x0 - x3;
209
1.62M
        i_macro = ((i_macro + 32) >> 6);
210
1.62M
        i_macro += *pu1_pred_ptr;
211
1.62M
        *pu1_out = CLIP_U8(i_macro);
212
213
1.62M
        pi2_tmp_ptr++;
214
1.62M
        pu1_out_ptr++;
215
1.62M
        pu1_pred++;
216
1.62M
    }
217
405k
}
218
219
/**
220
********************************************************************************
221
*
222
* @brief This function reconstructs a 4x4 sub block from quantized residue and
223
*  prediction buffer, if only dc value is present for residue
224
*
225
* @par Description:
226
*  The quantized residue is first inverse quantized, then inverse transformed.
227
*  This inverse transformed content is added to the prediction buffer to recon-
228
*  struct the end output
229
*
230
* @param[in] pi2_src
231
*  quantized 4x4 block
232
*
233
* @param[in] pu1_pred
234
*  prediction 4x4 block
235
*
236
* @param[out] pu1_out
237
*  reconstructed 4x4 block
238
*
239
* @param[in] pred_strd
240
*  Prediction buffer stride
241
*
242
* @param[in] out_strd
243
*  recon buffer Stride
244
*
245
* @param[in] pu2_iscal_mat
246
*  pointer to inverse scaling matrix
247
*
248
* @param[in] pu2_weigh_mat
249
*  pointer to weight matrix
250
*
251
* @param[in] u4_qp_div_6
252
*  Floor (qp/6)
253
*
254
* @param[in] pi2_tmp
255
*  temporary buffer of size 1*16
256
*
257
* @param[in] iq_start_idx
258
*  Differentiates b/w intra or inter
259
*
260
* @param[in] pi2_dc_ld_addr
261
*  Address to load DC value of the 4x4 blk
262
*
263
* @returns none
264
*
265
* @remarks none
266
*
267
*******************************************************************************
268
*/
269
void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
270
                                      UWORD8 *pu1_pred,
271
                                      UWORD8 *pu1_out,
272
                                      WORD32 pred_strd,
273
                                      WORD32 out_strd,
274
                                      const UWORD16 *pu2_iscal_mat,
275
                                      const UWORD16 *pu2_weigh_mat,
276
                                      UWORD32 u4_qp_div_6,
277
                                      WORD16 *pi2_tmp,
278
                                      WORD32 iq_start_idx,
279
                                      WORD16 *pi2_dc_ld_addr)
280
723k
{
281
723k
    UWORD8 *pu1_pred_ptr = pu1_pred;
282
723k
    UWORD8 *pu1_out_ptr = pu1_out;
283
723k
    WORD32 q0;
284
723k
    WORD16 x, i_macro, i;
285
723k
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
286
287
723k
    UNUSED(pi2_tmp);
288
723k
    if(iq_start_idx == 0)
289
48.9k
    {
290
48.9k
        q0 = pi2_src[0];
291
48.9k
        INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
292
48.9k
    }
293
674k
    else
294
674k
    {
295
674k
        q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
296
674k
    }
297
723k
    i_macro = ((q0 + 32) >> 6);
298
3.61M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
299
2.89M
    {
300
2.89M
        pu1_pred_ptr = pu1_pred;
301
2.89M
        pu1_out = pu1_out_ptr;
302
303
        /* inverse prediction */
304
2.89M
        x = i_macro + *pu1_pred_ptr;
305
2.89M
        *pu1_out = CLIP_U8(x);
306
2.89M
        pu1_pred_ptr += pred_strd;
307
2.89M
        pu1_out += out_strd;
308
309
2.89M
        x = i_macro + *pu1_pred_ptr;
310
2.89M
        *pu1_out = CLIP_U8(x);
311
2.89M
        pu1_pred_ptr += pred_strd;
312
2.89M
        pu1_out += out_strd;
313
314
2.89M
        x = i_macro + *pu1_pred_ptr;
315
2.89M
        *pu1_out = CLIP_U8(x);
316
2.89M
        pu1_pred_ptr += pred_strd;
317
2.89M
        pu1_out += out_strd;
318
319
2.89M
        x = i_macro + *pu1_pred_ptr;
320
2.89M
        *pu1_out = CLIP_U8(x);
321
322
2.89M
        pu1_out_ptr++;
323
2.89M
        pu1_pred++;
324
2.89M
    }
325
723k
}
326
327
/**
328
********************************************************************************
329
*
330
* @brief This function reconstructs a 8x8 sub block from quantized residue and
331
*  prediction buffer
332
*
333
* @par Description:
334
*  The quantized residue is first inverse quantized, then inverse transformed.
335
*  This inverse transformed content is added to the prediction buffer to recon-
336
*  struct the end output
337
*
338
* @param[in] pi2_src
339
*  quantized 4x4 block
340
*
341
* @param[in] pu1_pred
342
*  prediction 4x4 block
343
*
344
* @param[out] pu1_out
345
*  reconstructed 4x4 block
346
*
347
* @param[in] pred_strd
348
*  Prediction buffer stride
349
*
350
* @param[in] out_strd
351
*  recon buffer Stride
352
*
353
* @param[in] pu2_iscal_mat
354
*  pointer to inverse scaling matrix
355
*
356
* @param[in] pu2_weigh_mat
357
*  pointer to weight matrix
358
*
359
* @param[in] u4_qp_div_6
360
*  Floor (qp/6)
361
*
362
* @param[in] pi2_tmp
363
*  temporary buffer of size 1*16. we dont need a bigger block since we reuse
364
*  the tmp for each block
365
*
366
* @param[in] iq_start_idx
367
*  UNUSED
368
*
369
* @param[in] pi2_dc_ld_addr
370
*  UNUSED
371
*
372
* @returns none
373
*
374
* @remarks none
375
*
376
*******************************************************************************
377
*/
378
void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
379
                                   UWORD8 *pu1_pred,
380
                                   UWORD8 *pu1_out,
381
                                   WORD32 pred_strd,
382
                                   WORD32 out_strd,
383
                                   const UWORD16 *pu2_iscale_mat,
384
                                   const UWORD16 *pu2_weigh_mat,
385
                                   UWORD32 qp_div,
386
                                   WORD16 *pi2_tmp,
387
                                   WORD32 iq_start_idx,
388
                                   WORD16 *pi2_dc_ld_addr)
389
129k
{
390
129k
    WORD32 i;
391
129k
    WORD16 *pi2_tmp_ptr = pi2_tmp;
392
129k
    UWORD8 *pu1_pred_ptr = pu1_pred;
393
129k
    UWORD8 *pu1_out_ptr = pu1_out;
394
129k
    WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
395
129k
    WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
396
129k
    WORD16 i_macro;
397
129k
    WORD32 q;
398
129k
    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
399
400
129k
    UNUSED(iq_start_idx);
401
129k
    UNUSED(pi2_dc_ld_addr);
402
    /*************************************************************/
403
    /* De quantization of coefficients. Will be replaced by SIMD */
404
    /* operations on platform. Note : DC coeff is not scaled     */
405
    /*************************************************************/
406
8.43M
    for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
407
8.30M
    {
408
8.30M
        q = pi2_src[i];
409
8.30M
        INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
410
8.30M
        pi2_tmp_ptr[i] = q;
411
8.30M
    }
412
    /* Perform Inverse transform */
413
    /*--------------------------------------------------------------------*/
414
    /* IDCT [ Horizontal transformation ]                                 */
415
    /*--------------------------------------------------------------------*/
416
1.16M
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
417
1.03M
    {
418
        /*------------------------------------------------------------------*/
419
        /* y0 = w0 + w4                                                     */
420
        /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
421
        /* y2 = w0 - w4                                                     */
422
        /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
423
        /* y4 = (w2 >> 1) - w6                                              */
424
        /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
425
        /* y6 = w2 + (w6 >> 1)                                              */
426
        /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
427
        /*------------------------------------------------------------------*/
428
1.03M
        i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
429
430
1.03M
        i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
431
1.03M
                        - (pi2_tmp_ptr[7] >> 1));
432
433
1.03M
        i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
434
435
1.03M
        i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
436
1.03M
                        - (pi2_tmp_ptr[3] >> 1));
437
438
1.03M
        i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
439
440
1.03M
        i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
441
1.03M
                        + (pi2_tmp_ptr[5] >> 1));
442
443
1.03M
        i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
444
445
1.03M
        i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
446
1.03M
                        + (pi2_tmp_ptr[1] >> 1));
447
448
        /*------------------------------------------------------------------*/
449
        /* z0 = y0 + y6                                                     */
450
        /* z1 = y1 + (y7 >> 2)                                              */
451
        /* z2 = y2 + y4                                                     */
452
        /* z3 = y3 + (y5 >> 2)                                              */
453
        /* z4 = y2 - y4                                                     */
454
        /* z5 = (y3 >> 2) - y5                                              */
455
        /* z6 = y0 - y6                                                     */
456
        /* z7 = y7 - (y1 >> 2)                                              */
457
        /*------------------------------------------------------------------*/
458
1.03M
        i_z0 = i_y0 + i_y6;
459
1.03M
        i_z1 = i_y1 + (i_y7 >> 2);
460
1.03M
        i_z2 = i_y2 + i_y4;
461
1.03M
        i_z3 = i_y3 + (i_y5 >> 2);
462
1.03M
        i_z4 = i_y2 - i_y4;
463
1.03M
        i_z5 = (i_y3 >> 2) - i_y5;
464
1.03M
        i_z6 = i_y0 - i_y6;
465
1.03M
        i_z7 = i_y7 - (i_y1 >> 2);
466
467
        /*------------------------------------------------------------------*/
468
        /* x0 = z0 + z7                                                     */
469
        /* x1 = z2 + z5                                                     */
470
        /* x2 = z4 + z3                                                     */
471
        /* x3 = z6 + z1                                                     */
472
        /* x4 = z6 - z1                                                     */
473
        /* x5 = z4 - z3                                                     */
474
        /* x6 = z2 - z5                                                     */
475
        /* x7 = z0 - z7                                                     */
476
        /*------------------------------------------------------------------*/
477
1.03M
        pi2_tmp_ptr[0] = i_z0 + i_z7;
478
1.03M
        pi2_tmp_ptr[1] = i_z2 + i_z5;
479
1.03M
        pi2_tmp_ptr[2] = i_z4 + i_z3;
480
1.03M
        pi2_tmp_ptr[3] = i_z6 + i_z1;
481
1.03M
        pi2_tmp_ptr[4] = i_z6 - i_z1;
482
1.03M
        pi2_tmp_ptr[5] = i_z4 - i_z3;
483
1.03M
        pi2_tmp_ptr[6] = i_z2 - i_z5;
484
1.03M
        pi2_tmp_ptr[7] = i_z0 - i_z7;
485
486
        /* move to the next row */
487
        //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
488
1.03M
        pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
489
1.03M
    }
490
491
    /*--------------------------------------------------------------------*/
492
    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
493
    /*                                                                    */
494
    /* Add the prediction and store it back to reconstructed frame buffer */
495
    /* [Prediction buffer itself in this case]                            */
496
    /*--------------------------------------------------------------------*/
497
129k
    pi2_tmp_ptr = pi2_tmp;
498
1.16M
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
499
1.03M
    {
500
1.03M
        pu1_pred_ptr = pu1_pred;
501
1.03M
        pu1_out = pu1_out_ptr;
502
        /*------------------------------------------------------------------*/
503
        /* y0j = w0j + w4j                                                  */
504
        /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
505
        /* y2j = w0j -w4j                                                   */
506
        /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
507
        /* y4j = ( w2j >> 1 ) -w6j                                          */
508
        /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
509
        /* y6j = w2j + ( w6j >> 1 )                                         */
510
        /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
511
        /*------------------------------------------------------------------*/
512
1.03M
        i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
513
514
1.03M
        i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
515
1.03M
                        - (pi2_tmp_ptr[56] >> 1);
516
517
1.03M
        i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
518
519
1.03M
        i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
520
1.03M
                        - (pi2_tmp_ptr[24] >> 1);
521
522
1.03M
        i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
523
524
1.03M
        i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
525
1.03M
                        + (pi2_tmp_ptr[40] >> 1);
526
527
1.03M
        i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
528
529
1.03M
        i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
530
1.03M
                        + (pi2_tmp_ptr[8] >> 1);
531
532
        /*------------------------------------------------------------------*/
533
        /* z0j = y0j + y6j                                                  */
534
        /* z1j = y1j + (y7j >> 2)                                           */
535
        /* z2j = y2j + y4j                                                  */
536
        /* z3j = y3j + (y5j >> 2)                                           */
537
        /* z4j = y2j -y4j                                                   */
538
        /* z5j = (y3j >> 2) -y5j                                            */
539
        /* z6j = y0j -y6j                                                   */
540
        /* z7j = y7j -(y1j >> 2)                                            */
541
        /*------------------------------------------------------------------*/
542
1.03M
        i_z0 = i_y0 + i_y6;
543
1.03M
        i_z1 = i_y1 + (i_y7 >> 2);
544
1.03M
        i_z2 = i_y2 + i_y4;
545
1.03M
        i_z3 = i_y3 + (i_y5 >> 2);
546
1.03M
        i_z4 = i_y2 - i_y4;
547
1.03M
        i_z5 = (i_y3 >> 2) - i_y5;
548
1.03M
        i_z6 = i_y0 - i_y6;
549
1.03M
        i_z7 = i_y7 - (i_y1 >> 2);
550
551
        /*------------------------------------------------------------------*/
552
        /* x0j = z0j + z7j                                                  */
553
        /* x1j = z2j + z5j                                                  */
554
        /* x2j = z4j + z3j                                                  */
555
        /* x3j = z6j + z1j                                                  */
556
        /* x4j = z6j -z1j                                                   */
557
        /* x5j = z4j -z3j                                                   */
558
        /* x6j = z2j -z5j                                                   */
559
        /* x7j = z0j -z7j                                                   */
560
        /*------------------------------------------------------------------*/
561
1.03M
        i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
562
1.03M
        *pu1_out = CLIP_U8(i_macro);
563
        /* Change uc_recBuffer to Point to next element in the same column*/
564
1.03M
        pu1_pred_ptr += pred_strd;
565
1.03M
        pu1_out += out_strd;
566
567
1.03M
        i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
568
1.03M
        *pu1_out = CLIP_U8(i_macro);
569
1.03M
        pu1_pred_ptr += pred_strd;
570
1.03M
        pu1_out += out_strd;
571
572
1.03M
        i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
573
1.03M
        *pu1_out = CLIP_U8(i_macro);
574
1.03M
        pu1_pred_ptr += pred_strd;
575
1.03M
        pu1_out += out_strd;
576
577
1.03M
        i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
578
1.03M
        *pu1_out = CLIP_U8(i_macro);
579
1.03M
        pu1_pred_ptr += pred_strd;
580
1.03M
        pu1_out += out_strd;
581
582
1.03M
        i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
583
1.03M
        *pu1_out = CLIP_U8(i_macro);
584
1.03M
        pu1_pred_ptr += pred_strd;
585
1.03M
        pu1_out += out_strd;
586
587
1.03M
        i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
588
1.03M
        *pu1_out = CLIP_U8(i_macro);
589
1.03M
        pu1_pred_ptr += pred_strd;
590
1.03M
        pu1_out += out_strd;
591
592
1.03M
        i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
593
1.03M
        *pu1_out = CLIP_U8(i_macro);
594
1.03M
        pu1_pred_ptr += pred_strd;
595
1.03M
        pu1_out += out_strd;
596
597
1.03M
        i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
598
1.03M
        *pu1_out = CLIP_U8(i_macro);
599
600
1.03M
        pi2_tmp_ptr++;
601
1.03M
        pu1_out_ptr++;
602
1.03M
        pu1_pred++;
603
1.03M
    }
604
129k
}
605
606
/**
607
********************************************************************************
608
*
609
* @brief This function reconstructs a 8x8 sub block from quantized residue and
610
*  prediction buffer, if only dc value is present
611
*
612
* @par Description:
613
*  The quantized residue is first inverse quantized, then inverse transformed.
614
*  This inverse transformed content is added to the prediction buffer to recon-
615
*  struct the end output
616
*
617
* @param[in] pi2_src
618
*  quantized 4x4 block
619
*
620
* @param[in] pu1_pred
621
*  prediction 4x4 block
622
*
623
* @param[out] pu1_out
624
*  reconstructed 4x4 block
625
*
626
* @param[in] pred_strd
627
*  Prediction buffer stride
628
*
629
* @param[in] out_strd
630
*  recon buffer Stride
631
*
632
* @param[in] pu2_iscal_mat
633
*  pointer to inverse scaling matrix
634
*
635
* @param[in] pu2_weigh_mat
636
*  pointer to weight matrix
637
*
638
* @param[in] u4_qp_div_6
639
*  Floor (qp/6)
640
*
641
* @param[in] pi2_tmp
642
*  temporary buffer of size 1*16. we dont need a bigger block since we reuse
643
*  the tmp for each block
644
*
645
* @param[in] iq_start_idx
646
*  UNUSED
647
*
648
* @param[in] pi2_dc_ld_addr
649
*  UNUSED
650
*
651
* @returns none
652
*
653
* @remarks none
654
*
655
*******************************************************************************
656
*/
657
void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
658
                                      UWORD8 *pu1_pred,
659
                                      UWORD8 *pu1_out,
660
                                      WORD32 pred_strd,
661
                                      WORD32 out_strd,
662
                                      const UWORD16 *pu2_iscale_mat,
663
                                      const UWORD16 *pu2_weigh_mat,
664
                                      UWORD32 qp_div,
665
                                      WORD16 *pi2_tmp,
666
                                      WORD32 iq_start_idx,
667
                                      WORD16 *pi2_dc_ld_addr)
668
26.5k
{
669
26.5k
    UWORD8 *pu1_pred_ptr = pu1_pred;
670
26.5k
    UWORD8 *pu1_out_ptr = pu1_out;
671
26.5k
    WORD16 x, i, i_macro;
672
26.5k
    WORD32 q;
673
26.5k
    WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
674
675
26.5k
    UNUSED(pi2_tmp);
676
26.5k
    UNUSED(iq_start_idx);
677
26.5k
    UNUSED(pi2_dc_ld_addr);
678
    /*************************************************************/
679
    /* Dequantization of coefficients. Will be replaced by SIMD  */
680
    /* operations on platform. Note : DC coeff is not scaled     */
681
    /*************************************************************/
682
26.5k
    q = pi2_src[0];
683
26.5k
    INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
684
26.5k
    i_macro = (q + 32) >> 6;
685
    /* Perform Inverse transform */
686
    /*--------------------------------------------------------------------*/
687
    /* IDCT [ Horizontal transformation ]                                 */
688
    /*--------------------------------------------------------------------*/
689
    /*--------------------------------------------------------------------*/
690
    /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
691
    /*                                                                    */
692
    /* Add the prediction and store it back to reconstructed frame buffer */
693
    /* [Prediction buffer itself in this case]                            */
694
    /*--------------------------------------------------------------------*/
695
238k
    for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
696
212k
    {
697
212k
        pu1_pred_ptr = pu1_pred;
698
212k
        pu1_out = pu1_out_ptr;
699
700
212k
        x = i_macro + *pu1_pred_ptr;
701
212k
        *pu1_out = CLIP_U8(x);
702
        /* Change uc_recBuffer to Point to next element in the same column*/
703
212k
        pu1_pred_ptr += pred_strd;
704
212k
        pu1_out += out_strd;
705
706
212k
        x = i_macro + *pu1_pred_ptr;
707
212k
        *pu1_out = CLIP_U8(x);
708
212k
        pu1_pred_ptr += pred_strd;
709
212k
        pu1_out += out_strd;
710
711
212k
        x = i_macro + *pu1_pred_ptr;
712
212k
        *pu1_out = CLIP_U8(x);
713
212k
        pu1_pred_ptr += pred_strd;
714
212k
        pu1_out += out_strd;
715
716
212k
        x = i_macro + *pu1_pred_ptr;
717
212k
        *pu1_out = CLIP_U8(x);
718
212k
        pu1_pred_ptr += pred_strd;
719
212k
        pu1_out += out_strd;
720
721
212k
        x = i_macro + *pu1_pred_ptr;
722
212k
        *pu1_out = CLIP_U8(x);
723
212k
        pu1_pred_ptr += pred_strd;
724
212k
        pu1_out += out_strd;
725
726
212k
        x = i_macro + *pu1_pred_ptr;
727
212k
        *pu1_out = CLIP_U8(x);
728
212k
        pu1_pred_ptr += pred_strd;
729
212k
        pu1_out += out_strd;
730
731
212k
        x = i_macro + *pu1_pred_ptr;
732
212k
        *pu1_out = CLIP_U8(x);
733
212k
        pu1_pred_ptr += pred_strd;
734
212k
        pu1_out += out_strd;
735
736
212k
        x = i_macro + *pu1_pred_ptr;
737
212k
        *pu1_out = CLIP_U8(x);
738
739
212k
        pu1_out_ptr++;
740
212k
        pu1_pred++;
741
212k
    }
742
26.5k
}
743
744
/**
745
********************************************************************************
746
*
747
* @brief This function reconstructs a 4x4 sub block from quantized residue and
748
*  prediction buffer
749
*
750
* @par Description:
751
*  The quantized residue is first inverse quantized, then inverse transformed.
752
*  This inverse transformed content is added to the prediction buffer to recon-
753
*  struct the end output
754
*
755
* @param[in] pi2_src
756
*  quantized 4x4 block
757
*
758
* @param[in] pu1_pred
759
*  prediction 4x4 block
760
*
761
* @param[out] pu1_out
762
*  reconstructed 4x4 block
763
*
764
* @param[in] pred_strd
765
*  Prediction buffer stride
766
*
767
* @param[in] out_strd
768
*  recon buffer Stride
769
*
770
* @param[in] pu2_iscal_mat
771
*  pointer to inverse scaling matrix
772
*
773
* @param[in] pu2_weigh_mat
774
*  pointer to weight matrix
775
*
776
* @param[in] u4_qp_div_6
777
*  Floor (qp/6)
778
*
779
* @param[in] pi2_tmp
780
*  temporary buffer of size 1*16
781
*
782
* @param[in] pi2_dc_src
783
*  Address to load DC value of the 4x4 blk
784
*
785
* @returns none
786
*
787
* @remarks none
788
*
789
*******************************************************************************
790
*/
791
void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
792
                                          UWORD8 *pu1_pred,
793
                                          UWORD8 *pu1_out,
794
                                          WORD32 pred_strd,
795
                                          WORD32 out_strd,
796
                                          const UWORD16 *pu2_iscal_mat,
797
                                          const UWORD16 *pu2_weigh_mat,
798
                                          UWORD32 u4_qp_div_6,
799
                                          WORD16 *pi2_tmp,
800
                                          WORD16 *pi2_dc_src)
801
184k
{
802
184k
    WORD16 *pi2_src_ptr = pi2_src;
803
184k
    WORD16 *pi2_tmp_ptr = pi2_tmp;
804
184k
    UWORD8 *pu1_pred_ptr = pu1_pred;
805
184k
    UWORD8 *pu1_out_ptr = pu1_out;
806
184k
    WORD16 x0, x1, x2, x3, i;
807
184k
    WORD32 q0, q1, q2, q3;
808
184k
    WORD16 i_macro;
809
184k
    WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
810
811
    /* inverse quant */
812
    /* horizontal inverse transform */
813
923k
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
814
738k
    {
815
738k
        if(i == 0)
816
184k
        {
817
184k
            q0 = pi2_dc_src[0];
818
184k
        }
819
553k
        else
820
553k
        {
821
553k
            q0 = pi2_src_ptr[0];
822
553k
            INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
823
553k
        }
824
825
738k
        q2 = pi2_src_ptr[2];
826
738k
        INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact, 4);
827
828
738k
        x0 = q0 + q2;
829
738k
        x1 = q0 - q2;
830
831
738k
        q1 = pi2_src_ptr[1];
832
738k
        INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact, 4);
833
834
738k
        q3 = pi2_src_ptr[3];
835
738k
        INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact, 4);
836
837
738k
        x2 = (q1 >> 1) - q3;
838
738k
        x3 = q1 + (q3 >> 1);
839
840
738k
        pi2_tmp_ptr[0] = x0 + x3;
841
738k
        pi2_tmp_ptr[1] = x1 + x2;
842
738k
        pi2_tmp_ptr[2] = x1 - x2;
843
738k
        pi2_tmp_ptr[3] = x0 - x3;
844
845
738k
        pi2_src_ptr += SUB_BLK_WIDTH_4x4;
846
738k
        pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
847
738k
        pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
848
738k
        pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
849
738k
    }
850
851
    /* vertical inverse transform */
852
184k
    pi2_tmp_ptr = pi2_tmp;
853
923k
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
854
738k
    {
855
738k
        pu1_pred_ptr = pu1_pred;
856
738k
        pu1_out = pu1_out_ptr;
857
858
738k
        x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
859
738k
        x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
860
738k
        x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
861
738k
        x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
862
863
        /* inverse prediction */
864
738k
        i_macro = x0 + x3;
865
738k
        i_macro = ((i_macro + 32) >> 6);
866
738k
        i_macro += *pu1_pred_ptr;
867
738k
        *pu1_out = CLIP_U8(i_macro);
868
738k
        pu1_pred_ptr += pred_strd;
869
738k
        pu1_out += out_strd;
870
871
738k
        i_macro = x1 + x2;
872
738k
        i_macro = ((i_macro + 32) >> 6);
873
738k
        i_macro += *pu1_pred_ptr;
874
738k
        *pu1_out = CLIP_U8(i_macro);
875
738k
        pu1_pred_ptr += pred_strd;
876
738k
        pu1_out += out_strd;
877
878
738k
        i_macro = x1 - x2;
879
738k
        i_macro = ((i_macro + 32) >> 6);
880
738k
        i_macro += *pu1_pred_ptr;
881
738k
        *pu1_out = CLIP_U8(i_macro);
882
738k
        pu1_pred_ptr += pred_strd;
883
738k
        pu1_out += out_strd;
884
885
738k
        i_macro = x0 - x3;
886
738k
        i_macro = ((i_macro + 32) >> 6);
887
738k
        i_macro += *pu1_pred_ptr;
888
738k
        *pu1_out = CLIP_U8(i_macro);
889
890
738k
        pi2_tmp_ptr++;
891
738k
        pu1_out_ptr += 2; // Interleaved store for output
892
738k
        pu1_pred += 2; // Interleaved load for pred buffer
893
738k
    }
894
184k
}
895
896
/**
897
********************************************************************************
898
*
899
* @brief This function reconstructs a 4x4 sub block from quantized residue and
900
*  prediction buffer if only dc value is present for residue
901
*
902
* @par Description:
903
*  The quantized residue is first inverse quantized,
904
*  This inverse quantized content is added to the prediction buffer to recon-
905
*  struct the end output
906
*
907
* @param[in] pi2_src
908
*  quantized dc coefficient
909
*
910
* @param[in] pu1_pred
911
*  prediction 4x4 block in interleaved format
912
*
913
* @param[in] pred_strd,
914
*  Prediction buffer stride in interleaved format
915
*
916
* @param[in] out_strd
917
*  recon buffer Stride
918
*
919
* @param[in] pu2_iscal_mat
920
*  pointer to inverse scaling matrix
921
*
922
* @param[in] pu2_weigh_mat
923
*  pointer to weight matrix
924
*
925
* @param[in] u4_qp_div_6
926
*  Floor (qp/6)
927
*
928
* @param[in] pi2_tmp
929
*  temporary buffer of size 1*16
930
*
931
* @param[in] pi2_dc_src
932
*  Address to load DC value of the 4x4 blk
933
*
934
* @returns none
935
*
936
* @remarks none
937
*
938
*******************************************************************************
939
*/
940
void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
941
                                             UWORD8 *pu1_pred,
942
                                             UWORD8 *pu1_out,
943
                                             WORD32 pred_strd,
944
                                             WORD32 out_strd,
945
                                             const UWORD16 *pu2_iscal_mat,
946
                                             const UWORD16 *pu2_weigh_mat,
947
                                             UWORD32 u4_qp_div_6,
948
                                             WORD16 *pi2_tmp,
949
                                             WORD16 *pi2_dc_src)
950
365k
{
951
365k
    UWORD8 *pu1_pred_ptr = pu1_pred;
952
365k
    UWORD8 *pu1_out_ptr = pu1_out;
953
365k
    WORD32 q0;
954
365k
    WORD16 x, i_macro, i;
955
956
365k
    UNUSED(pi2_src);
957
365k
    UNUSED(pu2_iscal_mat);
958
365k
    UNUSED(pu2_weigh_mat);
959
365k
    UNUSED(u4_qp_div_6);
960
365k
    UNUSED(pi2_tmp);
961
962
365k
    q0 = pi2_dc_src[0];    // Restoring dc value for intra case3
963
365k
    i_macro = ((q0 + 32) >> 6);
964
965
1.82M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
966
1.46M
    {
967
1.46M
        pu1_pred_ptr = pu1_pred;
968
1.46M
        pu1_out = pu1_out_ptr;
969
970
        /* inverse prediction */
971
1.46M
        x = i_macro + *pu1_pred_ptr;
972
1.46M
        *pu1_out =  CLIP_U8(x);
973
1.46M
        pu1_pred_ptr += pred_strd;
974
1.46M
        pu1_out += out_strd;
975
976
1.46M
        x = i_macro + *pu1_pred_ptr;
977
1.46M
        *pu1_out = CLIP_U8(x);
978
1.46M
        pu1_pred_ptr += pred_strd;
979
1.46M
        pu1_out += out_strd;
980
981
1.46M
        x = i_macro + *pu1_pred_ptr;
982
1.46M
        *pu1_out = CLIP_U8(x);
983
1.46M
        pu1_pred_ptr += pred_strd;
984
1.46M
        pu1_out += out_strd;
985
986
1.46M
        x = i_macro + *pu1_pred_ptr;
987
1.46M
        *pu1_out = CLIP_U8(x);
988
989
1.46M
        pu1_out_ptr+=2;
990
1.46M
        pu1_pred+=2;
991
1.46M
    }
992
365k
}