Coverage Report

Created: 2025-07-18 07:04

/src/libavc/common/svc/isvc_resi_trans_quant.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2022 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
/**
21
 *******************************************************************************
22
 * @file
23
 *  ih264_resi_trans_quant.c
24
 *
25
 * @brief
26
 *  Contains function definitions single stage  forward transform for H.264
27
 *  It will calculate the residue, do the cf and then do quantization
28
 *
29
 * @author
30
 *  Ittiam
31
 *
32
 * @par List of Functions:
33
 *  - ih264_resi_trans_quant_4x4()
34
 *  - ih264_resi_trans_quant_chroma_4x4
35
 *  - ih264_hadamard_quant_4x4
36
 *  - ih264_hadamard_quant_2x2_uv
37
 *  - ih264_resi_trans_quant_8x8
38
 *
39
 * @remarks
40
 *******************************************************************************
41
 */
42
/* System include files */
43
#include <stdbool.h>
44
#include <stddef.h>
45
46
/* User include files */
47
#include "ih264_typedefs.h"
48
#include "ih264_defs.h"
49
#include "ih264_size_defs.h"
50
#include "ih264_macros.h"
51
#include "ih264_trans_macros.h"
52
#include "ih264_trans_data.h"
53
#include "ih264_structs.h"
54
#include "isvc_trans_quant_itrans_iquant.h"
55
56
static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res)
57
13.6M
{
58
13.6M
    return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res));
59
13.6M
}
60
61
/**
62
 *******************************************************************************
63
 *
64
 * @brief
65
 *   This function performs forward transform and quantization on a 4*4 block
66
 *
67
 * @par Description:
68
 *   The function accepts source buffer and estimation buffer. From these, it
69
 *   computes the residue. This is residue is then transformed and quantized.
70
 *   The transform and quantization are in placed computed. They use the residue
71
 *   buffer for this.
72
 *
73
 * @param[in] pu1_src
74
 *   Pointer to source sub-block
75
 *
76
 * @param[in] pu1_pred
77
 *   Pointer to prediction sub-block
78
 *
79
 * @param[in] pi2_out
80
 *   Pointer to residual sub-block
81
 *
82
 * @param[in] i4_src_stride
83
 *   Source stride
84
 *
85
 * @param[in] i4_pred_stride
86
 *   Prediction stride
87
 *
88
 * @param[in] dst_strd
89
 *   Destination stride
90
 *
91
 * @param[in] u4_qbits
92
 *    QP_BITS_h264_4x4 + floor(QP/6)
93
 *
94
 * @param[in] pu2_threshold_matrix
95
 *   Pointer to Forward Quant Threshold Matrix
96
 *
97
 * @param[in] pu2_scale_matrix
98
 *   Pointer to Forward Quant Scale Matrix
99
 *
100
 * @param[in] u4_round_factor
101
 *   Quantization Round factor
102
 *
103
 * @param[out] pu1_nnz
104
 *   Total non-zero coefficients in the current sub-block
105
 *
106
 * @returns
107
 *
108
 * @remarks
109
 *   None
110
 *
111
 *******************************************************************************
112
 */
113
void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
114
                               buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
115
                               resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
116
                               WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
117
96.1M
{
118
96.1M
    UWORD32 i;
119
96.1M
    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
120
96.1M
    WORD32 i4_value;
121
122
96.1M
    UWORD8 *pu1_src = ps_src->pv_data;
123
96.1M
    UWORD8 *pu1_pred = ps_pred->pv_data;
124
96.1M
    WORD16 *pi2_out = ps_out->pv_data;
125
96.1M
    WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
126
96.1M
    WORD32 i4_src_stride = ps_src->i4_data_stride;
127
96.1M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
128
18.4E
    WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
129
96.1M
    WORD16 *pi2_out_tmp = pi2_out;
130
96.1M
    UWORD32 u4_nonzero_coeff = 0;
131
96.1M
    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
132
96.1M
    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
133
96.1M
    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
134
96.1M
    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
135
136
476M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
137
380M
    {
138
        /* computing prediction error (residue) */
139
380M
        x4 = pu1_src[0] - pu1_pred[0];
140
380M
        x5 = pu1_src[1] - pu1_pred[1];
141
380M
        x6 = pu1_src[2] - pu1_pred[2];
142
380M
        x7 = pu1_src[3] - pu1_pred[3];
143
144
380M
        if(u1_use_upsampled_res)
145
2.22M
        {
146
2.22M
            x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
147
2.22M
            x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
148
2.22M
            x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
149
2.22M
            x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
150
2.22M
        }
151
152
        /* Horizontal transform */
153
380M
        x0 = x4 + x7;
154
380M
        x1 = x5 + x6;
155
380M
        x2 = x5 - x6;
156
380M
        x3 = x4 - x7;
157
158
380M
        pi2_out_tmp[0] = x0 + x1;
159
380M
        pi2_out_tmp[1] = (x3 << 1) + x2;
160
380M
        pi2_out_tmp[2] = x0 - x1;
161
380M
        pi2_out_tmp[3] = x3 - (x2 << 1);
162
163
        /* pointing to next row; */
164
380M
        pu1_src += i4_src_stride;
165
380M
        pu1_pred += i4_pred_stride;
166
380M
        pi2_out_tmp += 4;
167
380M
        pi2_upsampled_res += i4_upsampled_res_stride;
168
380M
    }
169
170
96.1M
    pi2_out_tmp = pi2_out;
171
172
467M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
173
370M
    {
174
        /* Vertical transform and quantization */
175
370M
        x4 = pi2_out_tmp[0];
176
370M
        x5 = pi2_out_tmp[4];
177
370M
        x6 = pi2_out_tmp[8];
178
370M
        x7 = pi2_out_tmp[12];
179
180
370M
        x0 = x4 + x7;
181
370M
        x1 = x5 + x6;
182
370M
        x2 = x5 - x6;
183
370M
        x3 = x4 - x7;
184
185
        /* quantization is done in place */
186
187
370M
        i4_value = x0 + x1;
188
189
370M
        if(i == 0)
190
96.7M
        {
191
96.7M
            (*pi2_dc_out) = i4_value;
192
96.7M
        }
193
194
370M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
195
370M
                  u4_nonzero_coeff);
196
370M
        pi2_out_tmp[0] = i4_value;
197
198
370M
        i4_value = (x3 << 1) + x2;
199
370M
        FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
200
370M
                  u4_nonzero_coeff);
201
370M
        pi2_out_tmp[4] = i4_value;
202
203
370M
        i4_value = x0 - x1;
204
370M
        FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
205
370M
                  u4_nonzero_coeff);
206
370M
        pi2_out_tmp[8] = i4_value;
207
208
370M
        i4_value = x3 - (x2 << 1);
209
370M
        FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
210
370M
                  u4_qbits, u4_nonzero_coeff);
211
370M
        pi2_out_tmp[12] = i4_value;
212
213
370M
        pi2_out_tmp++;
214
370M
        pu2_scale_matrix++;
215
370M
        pu2_threshold_matrix++;
216
370M
    }
217
218
    /* Return total nonzero coefficients in the current sub block */
219
96.1M
    *pu1_nnz = u4_nonzero_coeff;
220
96.1M
}
221
222
/**
223
 *******************************************************************************
224
 *
225
 * @brief
226
 *   This function performs forward transform and quantization on a 4*4 chroma
227
 *block with interleaved values
228
 *
229
 * @par Description:
230
 *   The function accepts source buffer and estimation buffer. From these, it
231
 *   computes the residue. This is residue is then transformed and quantized.
232
 *   The transform and quantization are in placed computed. They use the residue
233
 *   buffer for this.
234
 *
235
 * @param[in] pu1_src
236
 *   Pointer to source sub-block
237
 *
238
 * @param[in] pu1_pred
239
 *   Pointer to prediction sub-block
240
 *
241
 * @param[in] pi2_out
242
 *   Pointer to residual sub-block
243
 *
244
 * @param[in] i4_src_stride
245
 *   Source stride
246
 *
247
 * @param[in] i4_pred_stride
248
 *   Prediction stride
249
 *
250
 * @param[in] dst_strd
251
 *   Destination stride
252
 *
253
 * @param[in] u4_qbits
254
 *    QP_BITS_h264_4x4 + floor(QP/6)
255
 *
256
 * @param[in] pu2_threshold_matrix
257
 *   Pointer to Forward Quant Threshold Matrix
258
 *
259
 * @param[in] pu2_scale_matrix
260
 *   Pointer to Forward Quant Scale Matrix
261
 *
262
 * @param[in] u4_round_factor
263
 *   Quantization Round factor
264
 *
265
 * @param[out] pu1_nnz
266
 *   Total non-zero coefficients in the current sub-block
267
 *
268
 * @returns
269
 *
270
 * @remarks
271
 *   None
272
 *
273
 *******************************************************************************
274
 */
275
void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
276
                                      buffer_container_t *ps_out,
277
                                      buffer_container_t *ps_upsampled_res,
278
                                      resi_trans_quant_constants_t *ps_quant_constants,
279
                                      UWORD8 *pu1_nnz, WORD16 *pi2_dc_out,
280
                                      UWORD8 u1_use_upsampled_res)
281
47.8M
{
282
47.8M
    UWORD32 i;
283
47.8M
    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
284
47.8M
    WORD32 i4_value;
285
286
47.8M
    UWORD8 *pu1_src = ps_src->pv_data;
287
47.8M
    UWORD8 *pu1_pred = ps_pred->pv_data;
288
47.8M
    WORD16 *pi2_out = ps_out->pv_data;
289
47.9M
    WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
290
47.8M
    WORD32 i4_src_stride = ps_src->i4_data_stride;
291
47.8M
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
292
18.4E
    WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
293
47.8M
    WORD16 *pi2_out_tmp = pi2_out;
294
47.8M
    UWORD32 u4_nonzero_coeff = 0;
295
47.8M
    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
296
47.8M
    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
297
47.8M
    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
298
47.8M
    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
299
300
239M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
301
191M
    {
302
        /* computing prediction error (residue) */
303
191M
        x4 = pu1_src[0] - pu1_pred[0];
304
191M
        x5 = pu1_src[2] - pu1_pred[2];
305
191M
        x6 = pu1_src[4] - pu1_pred[4];
306
191M
        x7 = pu1_src[6] - pu1_pred[6];
307
308
191M
        if(u1_use_upsampled_res)
309
1.19M
        {
310
1.19M
            x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
311
1.19M
            x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
312
1.19M
            x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
313
1.19M
            x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
314
1.19M
        }
315
316
        /* Horizontal transform */
317
191M
        x0 = x4 + x7;
318
191M
        x1 = x5 + x6;
319
191M
        x2 = x5 - x6;
320
191M
        x3 = x4 - x7;
321
322
191M
        pi2_out_tmp[0] = x0 + x1;
323
191M
        pi2_out_tmp[1] = (x3 << 1) + x2;
324
191M
        pi2_out_tmp[2] = x0 - x1;
325
191M
        pi2_out_tmp[3] = x3 - (x2 << 1);
326
327
        /* pointing to next row; */
328
191M
        pu1_src += i4_src_stride;
329
191M
        pu1_pred += i4_pred_stride;
330
191M
        pi2_out_tmp += 4;
331
191M
        pi2_upsampled_res += i4_upsampled_res_stride;
332
191M
    }
333
47.8M
    pi2_out_tmp = pi2_out;
334
238M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
335
190M
    {
336
        /* Vertical transform and quantization */
337
190M
        x4 = pi2_out_tmp[0];
338
190M
        x5 = pi2_out_tmp[4];
339
190M
        x6 = pi2_out_tmp[8];
340
190M
        x7 = pi2_out_tmp[12];
341
342
190M
        x0 = x4 + x7;
343
190M
        x1 = x5 + x6;
344
190M
        x2 = x5 - x6;
345
190M
        x3 = x4 - x7;
346
347
        /* quantization is done in place */
348
349
190M
        i4_value = x0 + x1;
350
351
190M
        if(i == 0)
352
48.0M
        {
353
48.0M
            *pi2_dc_out = i4_value;
354
48.0M
        }
355
356
190M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
357
190M
                  u4_nonzero_coeff);
358
190M
        pi2_out_tmp[0] = i4_value;
359
360
190M
        i4_value = (x3 << 1) + x2;
361
190M
        FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
362
190M
                  u4_nonzero_coeff);
363
190M
        pi2_out_tmp[4] = i4_value;
364
365
190M
        i4_value = x0 - x1;
366
190M
        FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
367
190M
                  u4_nonzero_coeff);
368
190M
        pi2_out_tmp[8] = i4_value;
369
370
190M
        i4_value = x3 - (x2 << 1);
371
190M
        FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
372
190M
                  u4_qbits, u4_nonzero_coeff);
373
190M
        pi2_out_tmp[12] = i4_value;
374
375
190M
        pi2_out_tmp++;
376
190M
        pu2_scale_matrix++;
377
190M
        pu2_threshold_matrix++;
378
190M
    }
379
380
    /* Return total nonzero coefficients in the current sub block */
381
47.8M
    *pu1_nnz = u4_nonzero_coeff;
382
47.8M
}
383
384
/**
385
 *******************************************************************************
386
 *
387
 * @brief
388
 *   This function performs forward hadamard transform and quantization on a 4*4
389
 *block
390
 *
391
 * @par Description:
392
 *   The function accepts source buffer and estimation buffer. From these, it
393
 *   computes the residue. This is residue is then transformed and quantized.
394
 *   The transform and quantization are in placed computed. They use the residue
395
 *   buffer for this.
396
 *
397
 * @param[in] pu1_src
398
 *   Pointer to source sub-block
399
 *
400
 * @param[in] pu1_pred
401
 *   Pointer to prediction sub-block
402
 *
403
 * @param[in] pi2_out
404
 *   Pointer to residual sub-block
405
 *
406
 * @param[in] i4_src_stride
407
 *   Source stride
408
 *
409
 * @param[in] i4_pred_stride
410
 *   Prediction stride
411
 *
412
 * @param[in] dst_strd
413
 *   Destination stride
414
 *
415
 * @param[in] u4_qbits
416
 *    QP_BITS_h264_4x4 + floor(QP/6)
417
 *
418
 * @param[in] pu2_threshold_matrix
419
 *   Pointer to Forward Quant Threshold Matrix
420
 *
421
 * @param[in] pu2_scale_matrix
422
 *   Pointer to Forward Quant Scale Matrix
423
 *
424
 * @param[in] u4_round_factor
425
 *   Quantization Round factor
426
 *
427
 * @param[out] pu1_nnz
428
 *   Total non-zero coefficients in the current sub-block
429
 *
430
 * @returns
431
 *
432
 * @remarks
433
 *   None
434
 *
435
 */
436
437
void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst,
438
                             resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
439
3.51M
{
440
3.51M
    WORD32 i;
441
3.51M
    WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value;
442
443
3.51M
    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
444
3.51M
    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
445
3.51M
    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
446
3.51M
    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
447
448
3.51M
    *pu1_nnz = 0;
449
450
17.5M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
451
14.0M
    {
452
14.0M
        x4 = pi2_src[0];
453
14.0M
        x5 = pi2_src[1];
454
14.0M
        x6 = pi2_src[2];
455
14.0M
        x7 = pi2_src[3];
456
457
14.0M
        x0 = x4 + x7;
458
14.0M
        x1 = x5 + x6;
459
14.0M
        x2 = x5 - x6;
460
14.0M
        x3 = x4 - x7;
461
462
14.0M
        pi2_dst[0] = x0 + x1;
463
14.0M
        pi2_dst[1] = x3 + x2;
464
14.0M
        pi2_dst[2] = x0 - x1;
465
14.0M
        pi2_dst[3] = x3 - x2;
466
467
14.0M
        pi2_src += 4;
468
14.0M
        pi2_dst += 4;
469
14.0M
    }
470
471
    /* Vertical transform and quantization */
472
3.51M
    pi2_dst -= SUB_BLK_WIDTH_4x4 << 2;
473
474
17.5M
    for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
475
14.0M
    {
476
14.0M
        x4 = pi2_dst[0];
477
14.0M
        x5 = pi2_dst[4];
478
14.0M
        x6 = pi2_dst[8];
479
14.0M
        x7 = pi2_dst[12];
480
481
14.0M
        x0 = x4 + x7;
482
14.0M
        x1 = x5 + x6;
483
14.0M
        x2 = x5 - x6;
484
14.0M
        x3 = x4 - x7;
485
486
14.0M
        i4_value = (x0 + x1) >> 1;
487
14.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
488
14.0M
                  pu1_nnz[0]);
489
14.0M
        pi2_dst[0] = i4_value;
490
491
14.0M
        i4_value = (x3 + x2) >> 1;
492
14.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
493
14.0M
                  pu1_nnz[0]);
494
14.0M
        pi2_dst[4] = i4_value;
495
496
14.0M
        i4_value = (x0 - x1) >> 1;
497
14.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
498
14.0M
                  pu1_nnz[0]);
499
14.0M
        pi2_dst[8] = i4_value;
500
501
14.0M
        i4_value = (x3 - x2) >> 1;
502
14.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
503
14.0M
                  pu1_nnz[0]);
504
14.0M
        pi2_dst[12] = i4_value;
505
506
14.0M
        pi2_dst++;
507
14.0M
    }
508
3.51M
}
509
510
/**
511
 *******************************************************************************
512
 *
513
 * @brief
514
 *   This function performs forward hadamard transform and quantization on a 2*2
515
 *block for both U and V planes
516
 *
517
 * @par Description:
518
 *   The function accepts source buffer and estimation buffer. From these, it
519
 *   computes the residue. This is residue is then transformed and quantized.
520
 *   The transform and quantization are in placed computed. They use the residue
521
 *   buffer for this.
522
 *
523
 * @param[in] pu1_src
524
 *   Pointer to source sub-block
525
 *
526
 * @param[in] pu1_pred
527
 *   Pointer to prediction sub-block
528
 *
529
 * @param[in] pi2_out
530
 *   Pointer to residual sub-block
531
 *
532
 * @param[in] i4_src_stride
533
 *   Source stride
534
 *
535
 * @param[in] i4_pred_stride
536
 *   Prediction stride
537
 *
538
 * @param[in] dst_strd
539
 *   Destination stride
540
 *
541
 * @param[in] u4_qbits
542
 *    QP_BITS_h264_4x4 + floor(QP/6)
543
 *
544
 * @param[in] pu2_threshold_matrix
545
 *   Pointer to Forward Quant Threshold Matrix
546
 *
547
 * @param[in] pu2_scale_matrix
548
 *   Pointer to Forward Quant Scale Matrix
549
 *
550
 * @param[in] u4_round_factor
551
 *   Quantization Round factor
552
 *
553
 * @param[out] pu1_nnz
554
 *   Total non-zero coefficients in the current sub-block
555
 *
556
 * @returns
557
 *
558
 * @remarks
559
 *   NNZ for dc is populated at 0 and 5th position of pu1_nnz
560
 *
561
 */
562
563
void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst,
564
                                resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
565
6.03M
{
566
6.03M
    WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
567
6.03M
    WORD32 i4_value, plane;
568
569
6.03M
    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
570
6.03M
    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
571
6.03M
    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
572
6.03M
    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
573
574
18.0M
    for(plane = 0; plane < 2; plane++)
575
12.0M
    {
576
12.0M
        pu1_nnz[plane] = 0;
577
578
        /* Horizontal transform */
579
12.0M
        x4 = pi2_src[0];
580
12.0M
        x5 = pi2_src[1];
581
12.0M
        x6 = pi2_src[2];
582
12.0M
        x7 = pi2_src[3];
583
584
12.0M
        x0 = x4 + x5;
585
12.0M
        x1 = x4 - x5;
586
12.0M
        x2 = x6 + x7;
587
12.0M
        x3 = x6 - x7;
588
589
        /* Vertical transform and quantization */
590
12.0M
        i4_value = (x0 + x2);
591
12.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
592
12.0M
                  pu1_nnz[plane]);
593
12.0M
        pi2_dst[0] = i4_value;
594
595
12.0M
        i4_value = (x0 - x2);
596
12.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
597
12.0M
                  pu1_nnz[plane]);
598
12.0M
        pi2_dst[2] = i4_value;
599
600
12.0M
        i4_value = (x1 - x3);
601
12.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
602
12.0M
                  pu1_nnz[plane]);
603
12.0M
        pi2_dst[3] = i4_value;
604
605
12.0M
        i4_value = (x1 + x3);
606
12.0M
        FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
607
12.0M
                  pu1_nnz[plane]);
608
12.0M
        pi2_dst[1] = i4_value;
609
610
12.0M
        pi2_dst += 4;
611
12.0M
        pi2_src += 4;
612
12.0M
    }
613
6.03M
}
614
615
/*
616
 *******************************************************************************
617
 *
618
 * @brief
619
 *  This function performs Single stage forward transform CF8 and quantization
620
 *on 8*8 blocks for h.264
621
 *
622
 * @par Description:
623
 *  Performs single stage 8x8 forward transform CF8 after calculating the
624
 *residue The result is then quantized
625
 *
626
 * @param[in] pu1_src
627
 *  Input 8x8 pixels
628
 *
629
 * @param[in] pu1_pred
630
 *  Input 8x8 pixels
631
 *
632
 * @param[in] pi1_out
633
 * Output 8x8 pixels
634
 *
635
 * @param[in] u4_thresh
636
 *  Threshold under which the coeffs are not quantized
637
 *
638
 *  @param[in] u4_qp_div
639
 *  QP/6
640
 *
641
 *  @param[in] u4_qp_rem
642
 *  QP%6
643
 *
644
 * @param[in] u2_src_stride
645
 *  Source stride
646
 *
647
 * @param[in] i4_pred_stride
648
 * stride for prediciton buffer
649
 *
650
 *  @param[in] dst_strd
651
 *  stride for destination buffer
652
 *
653
 *  @param[in] pu4_quant_mat
654
 *  Pointer to the 4x4 quantization matrix
655
 *
656
 * @returns  Void
657
 *
658
 *
659
 *******************************************************************************
660
 */
661
void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
662
                               buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
663
                               resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
664
                               WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
665
0
{
666
0
    UWORD32 i;
667
0
    WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
668
0
    WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
669
670
0
    UWORD8 *pu1_src = ps_src->pv_data;
671
0
    UWORD8 *pu1_pred = ps_pred->pv_data;
672
0
    WORD16 *pi2_out = ps_out->pv_data;
673
0
    WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
674
0
    WORD32 i4_src_stride = ps_src->i4_data_stride;
675
0
    WORD32 i4_pred_stride = ps_pred->i4_data_stride;
676
0
    WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
677
0
    WORD16 *pi2_out_tmp = pi2_out;
678
0
    UWORD32 u4_nonzero_coeff = 0;
679
0
    const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
680
0
    const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
681
0
    UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
682
0
    UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
683
684
0
    UNUSED(pi2_dc_out);
685
686
    /*Horizontal transform */
687
    /* we are going to use the a's and r's in a twisted way since */
688
    /*i dont want to declare more variables */
689
0
    for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
690
0
    {
691
0
        r0 = pu1_src[0];
692
0
        r0 -= pu1_pred[0];
693
0
        r1 = pu1_src[1];
694
0
        r1 -= pu1_pred[1];
695
0
        r2 = pu1_src[2];
696
0
        r2 -= pu1_pred[2];
697
0
        r3 = pu1_src[3];
698
0
        r3 -= pu1_pred[3];
699
0
        r4 = pu1_src[4];
700
0
        r4 -= pu1_pred[4];
701
0
        r5 = pu1_src[5];
702
0
        r5 -= pu1_pred[5];
703
0
        r6 = pu1_src[6];
704
0
        r6 -= pu1_pred[6];
705
0
        r7 = pu1_src[7];
706
0
        r7 -= pu1_pred[7];
707
708
0
        if(u1_use_upsampled_res)
709
0
        {
710
0
            r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]);
711
0
            r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]);
712
0
            r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]);
713
0
            r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]);
714
0
            r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]);
715
0
            r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]);
716
0
            r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]);
717
0
            r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]);
718
0
        }
719
720
0
        a0 = r0 + r7;
721
0
        a1 = r1 + r6;
722
0
        a2 = r2 + r5;
723
0
        a3 = r3 + r4;
724
725
0
        a4 = a0 + a3;
726
0
        a5 = a1 + a2;
727
0
        a6 = a0 - a3;
728
0
        a7 = a1 - a2;
729
730
0
        pi2_out_tmp[0] = a4 + a5;
731
732
0
        pi2_out_tmp[2] = a6 + (a7 >> 1);
733
0
        pi2_out_tmp[4] = a4 - a5;
734
0
        pi2_out_tmp[6] = (a6 >> 1) - a7;
735
736
0
        a0 = r0 - r7;
737
0
        a1 = r1 - r6;
738
0
        a2 = r2 - r5;
739
0
        a3 = r3 - r4;
740
741
0
        a4 = a1 + a2 + ((a0 >> 1) + a0);
742
0
        a5 = a0 - a3 - ((a2 >> 1) + a2);
743
0
        a6 = a0 + a3 - ((a1 >> 1) + a1);
744
0
        a7 = a1 - a2 + ((a3 >> 1) + a3);
745
746
0
        pi2_out_tmp[1] = a4 + (a7 >> 2);
747
0
        pi2_out_tmp[3] = a5 + (a6 >> 2);
748
0
        pi2_out_tmp[5] = a6 - (a5 >> 2);
749
0
        pi2_out_tmp[7] = (a4 >> 2) - a7;
750
751
0
        pu1_src += i4_src_stride;
752
0
        pu1_pred += i4_pred_stride;
753
0
        pi2_out_tmp += 8;
754
0
        pi2_upsampled_res += i4_upsampled_res_stride;
755
0
    }
756
757
    /*vertical transform and quant */
758
759
0
    pi2_out_tmp = pi2_out;
760
761
0
    for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
762
0
    {
763
0
        r0 = pi2_out_tmp[0];
764
0
        r1 = pi2_out_tmp[8];
765
0
        r2 = pi2_out_tmp[16];
766
0
        r3 = pi2_out_tmp[24];
767
0
        r4 = pi2_out_tmp[32];
768
0
        r5 = pi2_out_tmp[40];
769
0
        r6 = pi2_out_tmp[48];
770
0
        r7 = pi2_out_tmp[56];
771
772
0
        a0 = r0 + r7;
773
0
        a1 = r1 + r6;
774
0
        a2 = r2 + r5;
775
0
        a3 = r3 + r4;
776
777
0
        a4 = a0 + a3;
778
0
        a5 = a1 + a2;
779
0
        a6 = a0 - a3;
780
0
        a7 = a1 - a2;
781
782
0
        a0 = r0 - r7;
783
0
        a1 = r1 - r6;
784
0
        a2 = r2 - r5;
785
0
        a3 = r3 - r4;
786
787
0
        r0 = a4 + a5;
788
0
        r2 = a6 + (a7 >> 1);
789
0
        r4 = a4 - a5;
790
0
        r6 = (a6 >> 1) - a7;
791
792
0
        a4 = a1 + a2 + ((a0 >> 1) + a0);
793
0
        a5 = a0 - a3 - ((a2 >> 1) + a2);
794
0
        a6 = a0 + a3 - ((a1 >> 1) + a1);
795
0
        a7 = a1 - a2 + ((a3 >> 1) + a3);
796
797
0
        r1 = a4 + (a7 >> 2);
798
0
        r3 = a5 + (a6 >> 2);
799
0
        r5 = a6 - (a5 >> 2);
800
0
        r7 = (a4 >> 2) - a7;
801
802
0
        FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
803
0
                  u4_nonzero_coeff);
804
0
        pi2_out_tmp[0] = r0;
805
806
0
        FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
807
0
                  u4_nonzero_coeff);
808
0
        pi2_out_tmp[8] = r1;
809
810
0
        FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits,
811
0
                  u4_nonzero_coeff);
812
0
        pi2_out_tmp[16] = r2;
813
814
0
        FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits,
815
0
                  u4_nonzero_coeff);
816
0
        pi2_out_tmp[24] = r3;
817
818
0
        FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits,
819
0
                  u4_nonzero_coeff);
820
0
        pi2_out_tmp[32] = r4;
821
822
0
        FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits,
823
0
                  u4_nonzero_coeff);
824
0
        pi2_out_tmp[40] = r5;
825
826
0
        FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits,
827
0
                  u4_nonzero_coeff);
828
0
        pi2_out_tmp[48] = r6;
829
830
0
        FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits,
831
0
                  u4_nonzero_coeff);
832
0
        pi2_out_tmp[56] = r7;
833
834
0
        pi2_out_tmp++;
835
0
        pu2_scale_matrix++;
836
0
        pu2_threshold_matrix++;
837
0
    }
838
    /* Return total nonzero coefficients in the current sub block */
839
0
    *pu1_nnz = u4_nonzero_coeff;
840
0
}