Coverage Report

Created: 2023-06-07 07:34

/src/libhevc/encoder/ihevce_stasino_helpers.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ihevce_stasino_helpers.c
24
*
25
* @brief
26
*
27
* @author
28
*  Ittiam
29
*
30
* @par List of Functions:
31
*
32
* @remarks
33
*  None
34
*
35
*******************************************************************************
36
*/
37
38
/*****************************************************************************/
39
/* File Includes                                                             */
40
/*****************************************************************************/
41
/* System include files */
42
#include <stdio.h>
43
#include <stdlib.h>
44
#include <assert.h>
45
#include <string.h>
46
47
/* User include files */
48
#include "ihevc_typedefs.h"
49
#include "itt_video_api.h"
50
#include "ihevce_api.h"
51
52
#include "rc_cntrl_param.h"
53
#include "rc_frame_info_collector.h"
54
#include "rc_look_ahead_params.h"
55
56
#include "ihevc_defs.h"
57
#include "ihevc_structs.h"
58
#include "ihevc_platform_macros.h"
59
#include "ihevc_deblk.h"
60
#include "ihevc_itrans_recon.h"
61
#include "ihevc_chroma_itrans_recon.h"
62
#include "ihevc_chroma_intra_pred.h"
63
#include "ihevc_intra_pred.h"
64
#include "ihevc_inter_pred.h"
65
#include "ihevc_mem_fns.h"
66
#include "ihevc_padding.h"
67
#include "ihevc_weighted_pred.h"
68
#include "ihevc_sao.h"
69
#include "ihevc_resi_trans.h"
70
#include "ihevc_quant_iquant_ssd.h"
71
#include "ihevc_cabac_tables.h"
72
73
#include "ihevce_defs.h"
74
#include "ihevce_lap_enc_structs.h"
75
#include "ihevce_multi_thrd_structs.h"
76
#include "ihevce_me_common_defs.h"
77
#include "ihevce_had_satd.h"
78
#include "ihevce_error_codes.h"
79
#include "ihevce_bitstream.h"
80
#include "ihevce_cabac.h"
81
#include "ihevce_rdoq_macros.h"
82
#include "ihevce_function_selector.h"
83
#include "ihevce_enc_structs.h"
84
#include "ihevce_entropy_structs.h"
85
#include "ihevce_cmn_utils_instr_set_router.h"
86
#include "ihevce_enc_loop_structs.h"
87
#include "ihevce_stasino_helpers.h"
88
89
/*****************************************************************************/
90
/* Function Definitions                                                      */
91
/*****************************************************************************/
92
93
/**
94
*******************************************************************************
95
*
96
* @brief
97
*  This function calculates the variance of given data set.
98
*
99
* @par Description:
100
*  This function is mainly used to find the variance of the block of pixel values.
101
*  The block can be rectangular also. Single pass variance calculation
102
*  implementation.
103
*
104
* @param[in] p_input
105
*  The input buffer to calculate the variance.
106
*
107
* @param[out] pi4_mean
108
*  Pointer ot the mean of the datset
109
*
110
* @param[out] pi4_variance
111
*  Pointer tot he variabce of the data set
112
*
113
* @param[in] u1_is_hbd
114
*  1 if the data is in  high bit depth
115
*
116
* @param[in] stride
117
*  Stride for the input buffer
118
*
119
* @param[in] block_height
120
*  height of the pixel block
121
*
122
* @param[in] block_width
123
*  width of the pixel block
124
*
125
* @remarks
126
*  None
127
*
128
*******************************************************************************
129
*/
130
void ihevce_calc_variance(
131
    void *pv_input,
132
    WORD32 i4_stride,
133
    WORD32 *pi4_mean,
134
    UWORD32 *pu4_variance,
135
    UWORD8 u1_block_height,
136
    UWORD8 u1_block_width,
137
    UWORD8 u1_is_hbd,
138
    UWORD8 u1_disable_normalization)
139
0
{
140
0
    UWORD8 *pui1_buffer;  // pointer for 8 bit usecase
141
0
    WORD32 i, j;
142
0
    WORD32 total_elements;
143
144
0
    LWORD64 mean;
145
0
    ULWORD64 variance;
146
0
    ULWORD64 sum;
147
0
    ULWORD64 sq_sum;
148
149
    /* intialisation */
150
0
    total_elements = u1_block_height * u1_block_width;
151
0
    mean = 0;
152
0
    variance = 0;
153
0
    sum = 0;
154
0
    sq_sum = 0;
155
156
    /* handle the case of 8/10 bit depth separately */
157
0
    if(!u1_is_hbd)
158
0
    {
159
0
        pui1_buffer = (UWORD8 *)pv_input;
160
161
        /* loop over all the values in the block */
162
0
        for(i = 0; i < u1_block_height; i++)
163
0
        {
164
            /* loop over a row in the block */
165
0
            for(j = 0; j < u1_block_width; j++)
166
0
            {
167
0
                sum += pui1_buffer[i * i4_stride + j];
168
0
                sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]);
169
0
            }
170
0
        }
171
172
0
        if(!u1_disable_normalization)
173
0
        {
174
0
            mean = sum / total_elements;
175
0
            variance =
176
0
                ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
177
0
        }
178
0
        else
179
0
        {
180
0
            mean = sum;
181
0
            variance = ((total_elements * sq_sum) - (sum * sum));
182
0
        }
183
0
    }
184
185
    /* copy back the values to the output variables */
186
0
    *pi4_mean = mean;
187
0
    *pu4_variance = variance;
188
0
}
189
190
/**
191
*******************************************************************************
192
*
193
* @brief
194
*  This function calcluates the variance of given data set which is WORD16
195
*
196
* @par Description:
197
*  This function is mainly used to find the variance of the block of pixel values.
198
*  Single pass variance calculation implementation.
199
*
200
* @param[in] pv_input
201
*  The input buffer to calculate the variance.
202
*
203
*
204
* @param[in] stride
205
*  Stride for the input buffer
206
*
207
* @param[out] pi4_mean
208
*  Pointer ot the mean of the datset
209
*
210
* @param[out] pi4_variance
211
*  Pointer tot he variabce of the data set
212
*
213
* @param[in] block_height
214
*  height of the pixel block
215
*
216
* @param[in] block_width
217
*  width of the pixel block
218
*
219
*
220
* @remarks
221
*  None
222
*
223
*******************************************************************************/
224
void ihevce_calc_variance_signed(
225
    WORD16 *pv_input,
226
    WORD32 i4_stride,
227
    WORD32 *pi4_mean,
228
    UWORD32 *pu4_variance,
229
    UWORD8 u1_block_height,
230
    UWORD8 u1_block_width)
231
0
{
232
0
    WORD16 *pi2_buffer;  // poinbter for 10 bit use case
233
234
0
    WORD32 i, j;
235
0
    WORD32 total_elements;
236
237
0
    LWORD64 mean;
238
0
    LWORD64 variance;
239
0
    LWORD64 sum;
240
0
    LWORD64 sq_sum;
241
242
    /* intialisation */
243
0
    total_elements = u1_block_height * u1_block_width;
244
0
    mean = 0;
245
0
    variance = 0;
246
0
    sum = 0;
247
0
    sq_sum = 0;
248
249
0
    pi2_buffer = pv_input;
250
251
0
    for(i = 0; i < u1_block_height; i++)
252
0
    {
253
0
        for(j = 0; j < u1_block_width; j++)
254
0
        {
255
0
            sum += pi2_buffer[i * i4_stride + j];
256
0
            sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]);
257
0
        }
258
0
    }
259
260
0
    mean = sum;  /// total_elements;
261
0
    variance = ((total_elements * sq_sum) - (sum * sum));  // / (total_elements * (total_elements) )
262
263
    /* copy back the values to the output variables */
264
0
    *pi4_mean = mean;
265
0
    *pu4_variance = variance;
266
0
}
267
268
/**
269
*******************************************************************************
270
*
271
* @brief
272
*  This function calculates the variance of a chrominance plane for 420SP data
273
*
274
* @par Description:
275
*  This function is mainly used to find the variance of the block of pixel values.
276
*  The block can be rectangular also. Single pass variance calculation
277
*  implementation.
278
*
279
* @param[in] p_input
280
*  The input buffer to calculate the variance.
281
*
282
* @param[in] stride
283
*  Stride for the input buffer
284
*
285
* @param[out] pi4_mean
286
*  Pointer ot the mean of the datset
287
*
288
* @param[out] pi4_variance
289
*  Pointer tot he variabce of the data set
290
*
291
* @param[in] block_height
292
*  height of the pixel block
293
*
294
* @param[in] block_width
295
*  width of the pixel block
296
*
297
* @param[in] u1_is_hbd
298
*  1 if the data is in  high bit depth
299
*
300
* @param[in] e_chroma_plane
301
*  is U or V
302
*
303
* @remarks
304
*  None
305
*
306
*******************************************************************************
307
*/
308
void ihevce_calc_chroma_variance(
309
    void *pv_input,
310
    WORD32 i4_stride,
311
    WORD32 *pi4_mean,
312
    UWORD32 *pu4_variance,
313
    UWORD8 u1_block_height,
314
    UWORD8 u1_block_width,
315
    UWORD8 u1_is_hbd,
316
    CHROMA_PLANE_ID_T e_chroma_plane)
317
0
{
318
0
    UWORD8 *pui1_buffer;  // pointer for 8 bit usecase
319
0
    WORD32 i, j;
320
0
    WORD32 total_elements;
321
322
0
    LWORD64 mean;
323
0
    ULWORD64 variance;
324
0
    LWORD64 sum;
325
0
    LWORD64 sq_sum;
326
327
    /* intialisation */
328
0
    total_elements = u1_block_height * u1_block_width;
329
0
    mean = 0;
330
0
    variance = 0;
331
0
    sum = 0;
332
0
    sq_sum = 0;
333
334
    /* handle the case of 8/10 bit depth separately */
335
0
    if(!u1_is_hbd)
336
0
    {
337
0
        pui1_buffer = (UWORD8 *)pv_input;
338
339
0
        pui1_buffer += e_chroma_plane;
340
341
        /* loop over all the values in the block */
342
0
        for(i = 0; i < u1_block_height; i++)
343
0
        {
344
            /* loop over a row in the block */
345
0
            for(j = 0; j < u1_block_width; j++)
346
0
            {
347
0
                sum += pui1_buffer[i * i4_stride + j * 2];
348
0
                sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]);
349
0
            }
350
0
        }
351
352
0
        mean = sum / total_elements;
353
0
        variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
354
0
    }
355
356
    /* copy back the values to the output variables */
357
0
    *pi4_mean = mean;
358
0
    *pu4_variance = variance;
359
0
}
360
361
LWORD64 ihevce_inject_stim_into_distortion(
362
    void *pv_src,
363
    WORD32 i4_src_stride,
364
    void *pv_pred,
365
    WORD32 i4_pred_stride,
366
    LWORD64 i8_distortion,
367
    WORD32 i4_alpha_stim_multiplier,
368
    UWORD8 u1_blk_size,
369
    UWORD8 u1_is_hbd,
370
    UWORD8 u1_enable_psyRDOPT,
371
    CHROMA_PLANE_ID_T e_chroma_plane)
372
0
{
373
0
    if(!u1_enable_psyRDOPT)
374
0
    {
375
0
        UWORD32 u4_src_variance;
376
0
        UWORD32 u4_pred_variance;
377
0
        WORD32 i4_mean;
378
0
        WORD32 i4_noise_term;
379
380
0
        if(NULL_PLANE == e_chroma_plane)
381
0
        {
382
0
            ihevce_calc_variance(
383
0
                pv_src,
384
0
                i4_src_stride,
385
0
                &i4_mean,
386
0
                &u4_src_variance,
387
0
                u1_blk_size,
388
0
                u1_blk_size,
389
0
                u1_is_hbd,
390
0
                0);
391
392
0
            ihevce_calc_variance(
393
0
                pv_pred,
394
0
                i4_pred_stride,
395
0
                &i4_mean,
396
0
                &u4_pred_variance,
397
0
                u1_blk_size,
398
0
                u1_blk_size,
399
0
                u1_is_hbd,
400
0
                0);
401
0
        }
402
0
        else
403
0
        {
404
0
            ihevce_calc_chroma_variance(
405
0
                pv_src,
406
0
                i4_src_stride,
407
0
                &i4_mean,
408
0
                &u4_src_variance,
409
0
                u1_blk_size,
410
0
                u1_blk_size,
411
0
                u1_is_hbd,
412
0
                e_chroma_plane);
413
414
0
            ihevce_calc_chroma_variance(
415
0
                pv_pred,
416
0
                i4_pred_stride,
417
0
                &i4_mean,
418
0
                &u4_pred_variance,
419
0
                u1_blk_size,
420
0
                u1_blk_size,
421
0
                u1_is_hbd,
422
0
                e_chroma_plane);
423
0
        }
424
425
0
        i4_noise_term =
426
0
            ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance);
427
428
0
        MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
429
430
0
        return i8_distortion;
431
0
    }
432
0
    else
433
0
    {
434
0
        return i8_distortion;
435
0
    }
436
0
}
437
438
UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data(
439
    UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size)
440
0
{
441
0
    UWORD8 u1_num_noisy_children = 0;
442
0
    UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos;
443
444
0
    if(8 == u1_cu_size)
445
0
    {
446
0
        return pu1_is_8x8Blk_noisy[u1_start_index];
447
0
    }
448
449
0
    u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
450
0
        pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2);
451
452
0
    u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
453
0
        pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2);
454
455
0
    u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
456
0
        pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2);
457
458
0
    u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
459
0
        pu1_is_8x8Blk_noisy,
460
0
        u1_cu_x_pos + (u1_cu_size / 2),
461
0
        u1_cu_y_pos + (u1_cu_size / 2),
462
0
        u1_cu_size / 2);
463
464
0
    return (u1_num_noisy_children >= 2);
465
0
}
466
467
/*!
468
******************************************************************************
469
* \if Function name : ihevce_psy_rd_cost_croma \endif
470
*
471
* \brief
472
*    Calculates the psyco visual cost for RD opt. This is
473
*
474
* \param[in] pui4_source_satd
475
*   This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
476
* \param[in] *pui1_recon
477
*   This si the pointer to the pred data.
478
* \param[in] recon_stride
479
*   This si the pred stride
480
* \param[in] pic_type
481
*   Picture type.
482
* \param[in] layer_id
483
*   Indicates the temporal layer.
484
* \param[in] lambda
485
*   This is the weighting factor for the cost.
486
* \param[in] is_hbd
487
*   This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit.
488
* \param[in] sub_sampling_type
489
*   This is the chroma subsampling type. 11 - for 420 and 13 for 422
490
* \return
491
*    the cost for the psyRDopt
492
*
493
* \author
494
*  Ittiam
495
*
496
*****************************************************************************
497
*/
498
LWORD64 ihevce_psy_rd_cost_croma(
499
    LWORD64 *pui4_source_satd,
500
    void *p_recon,
501
    WORD32 recon_stride_vert,
502
    WORD32 recond_stride_horz,
503
    WORD32 cu_size_luma,
504
    WORD32 pic_type,
505
    WORD32 layer_id,
506
    WORD32 lambda,
507
    WORD32 start_index,
508
    WORD32 is_hbd,
509
    WORD32 sub_sampling_type,
510
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
511
0
{
512
    /* declare local variables to store the SATD values for the pred  for the current block. */
513
0
    LWORD64 psy_rd_cost;
514
0
    UWORD32 lambda_mod;
515
0
    WORD32 psy_factor;
516
517
    /* declare local variables */
518
0
    WORD32 i;
519
0
    WORD32 cu_total_size;
520
0
    WORD32 num_comp_had_blocks;
521
522
0
    UWORD8 *pu1_l0_block;
523
0
    UWORD8 *pu1_l0_block_prev;
524
0
    UWORD8 *pu1_recon;
525
0
    WORD32 ht_offset;
526
0
    WORD32 wd_offset;
527
0
    WORD32 cu_ht;
528
0
    WORD32 cu_wd;
529
530
0
    WORD32 num_horz_blocks;
531
532
0
    WORD16 pi2_residue_had[64];
533
    /* this is used as a buffer with all values equal to 0. This is emulate the case with
534
       pred being zero in HAD fucntion */
535
0
    UWORD8 ai1_zeros_buffer[64];
536
537
0
    WORD32 had_block_size;
538
0
    LWORD64 source_satd;  // to hold source for current 8x8 block
539
0
    LWORD64 recon_satd;  // holds the current recon 8x8 satd
540
541
0
    WORD32 index_for_src_satd;
542
543
0
    (void)recond_stride_horz;
544
0
    (void)pic_type;
545
0
    (void)layer_id;
546
0
    if(!is_hbd)
547
0
    {
548
0
        pu1_recon = (UWORD8 *)p_recon;
549
0
    }
550
551
    /**** initialize the variables ****/
552
0
    had_block_size = 4;
553
554
0
    if(sub_sampling_type == 1)  // 420
555
0
    {
556
0
        cu_ht = cu_size_luma / 2;
557
0
        cu_wd = cu_size_luma / 2;
558
0
    }
559
0
    else
560
0
    {
561
0
        cu_ht = cu_size_luma;
562
0
        cu_wd = cu_size_luma / 2;
563
0
    }
564
565
0
    num_horz_blocks = 2 * cu_wd / had_block_size;  //ctb_width / had_block_size;
566
0
    ht_offset = -had_block_size;
567
0
    wd_offset = 0;  //-had_block_size;
568
569
0
    cu_total_size = cu_ht * cu_wd;
570
0
    num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size);
571
572
0
    index_for_src_satd = start_index;
573
574
0
    for(i = 0; i < 64; i++)
575
0
    {
576
0
        ai1_zeros_buffer[i] = 0;
577
0
    }
578
579
0
    psy_factor = PSY_STRENGTH_CHROMA;
580
0
    psy_rd_cost = 0;
581
0
    lambda_mod = lambda * psy_factor;
582
583
    /************************************************************/
584
    /* loop over for every 4x4 blocks in the CU for Cb */
585
0
    for(i = 0; i < num_comp_had_blocks; i++)
586
0
    {
587
0
        if(i % num_horz_blocks == 0)
588
0
        {
589
0
            wd_offset = -had_block_size;
590
0
            ht_offset += had_block_size;
591
0
        }
592
0
        wd_offset += had_block_size;
593
594
        /* source satd for the current 8x8 block */
595
0
        source_satd = pui4_source_satd[index_for_src_satd];
596
597
0
        if(i % 2 != 0)
598
0
        {
599
0
            if(!is_hbd)
600
0
            {
601
0
                pu1_l0_block = pu1_l0_block_prev + 1;
602
0
            }
603
0
        }
604
0
        else
605
0
        {
606
0
            if(!is_hbd)
607
0
            {
608
                /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
609
0
                pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
610
0
                pu1_l0_block_prev = pu1_l0_block;
611
0
            }
612
0
        }
613
614
0
        if(had_block_size == 4)
615
0
        {
616
0
            if(!is_hbd)
617
0
            {
618
0
                recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit(
619
0
                    pu1_l0_block,
620
0
                    recon_stride_vert,
621
0
                    ai1_zeros_buffer,
622
0
                    had_block_size,
623
0
                    pi2_residue_had,
624
0
                    had_block_size);
625
0
            }
626
627
            /* get the additional cost function based on the absolute SATD diff of source and recon. */
628
0
            psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
629
630
0
            index_for_src_satd++;
631
632
0
            if((i % num_horz_blocks) == (num_horz_blocks - 1))
633
0
            {
634
0
                index_for_src_satd -= num_horz_blocks;
635
0
                index_for_src_satd +=
636
0
                    (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
637
0
            }
638
639
0
        }  // if had block size ==4
640
0
    }  // for loop for all 4x4 block in the cu
641
642
0
    psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT);
643
    /* reutrn the additional cost for the psy RD opt */
644
0
    return (psy_rd_cost);
645
0
}
646
647
/*!
648
******************************************************************************
649
* \if Function name : ihevce_psy_rd_cost \endif
650
*
651
* \brief
652
*    Calculates the psyco visual cost for RD opt. This is
653
*
654
* \param[in] pui4_source_satd
655
*   This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
656
* \param[in] *pui1_recon
657
*   This si the pointer to the pred data.
658
* \param[in] recon_stride
659
*   This si the pred stride
660
* \param[in] pic_type
661
*   Picture type.
662
* \param[in] layer_id
663
*   Indicates the temporal layer.
664
* \param[in] lambda
665
*   This is the weighting factor for the cost.
666
*
667
* \return
668
*    the cost for the psyRDopt
669
*
670
* \author
671
*  Ittiam
672
*
673
*****************************************************************************
674
*/
675
LWORD64 ihevce_psy_rd_cost(
676
    LWORD64 *pui4_source_satd,
677
    void *pv_recon,
678
    WORD32 recon_stride_vert,
679
    WORD32 recond_stride_horz,
680
    WORD32 cu_size,
681
    WORD32 pic_type,
682
    WORD32 layer_id,
683
    WORD32 lambda,
684
    WORD32 start_index,
685
    WORD32 is_hbd,
686
    UWORD32 u4_psy_strength,
687
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
688
0
{
689
    /* declare local variables to store the SATD values for the pred  for the current block. */
690
0
    LWORD64 psy_rd_cost;  // TODO : check if overflow is there.
691
0
    UWORD32 lambda_mod;
692
0
    WORD32 psy_factor;
693
694
    /* declare local variables */
695
0
    WORD32 i;
696
0
    WORD32 cu_total_size;
697
0
    WORD32 num_comp_had_blocks;
698
699
0
    UWORD8 *pu1_l0_block;
700
0
    UWORD8 *pu1_recon;
701
702
0
    WORD32 ht_offset;
703
0
    WORD32 wd_offset;
704
0
    WORD32 cu_ht;
705
0
    WORD32 cu_wd;
706
707
0
    WORD32 num_horz_blocks;
708
709
    //WORD16 pi2_residue_had[64];
710
0
    WORD16 pi2_residue_had_zscan[64];
711
    //WORD16 pi2_residue[64];
712
    /* this is used as a buffer with all values equal to 0. This is emulate the case with
713
       pred being zero in HAD fucntion */
714
0
    UWORD8 ai1_zeros_buffer[64];
715
716
0
    WORD32 had_block_size;
717
0
    LWORD64 source_satd;  // to hold source for current 8x8 block
718
0
    LWORD64 recon_satd;  // holds the current recon 8x8 satd
719
720
0
    WORD32 index_for_src_satd;
721
722
0
    (void)recond_stride_horz;
723
0
    (void)pic_type;
724
0
    (void)layer_id;
725
    /***** initialize the variables ****/
726
0
    had_block_size = 8;
727
0
    cu_ht = cu_size;
728
0
    cu_wd = cu_size;
729
730
0
    num_horz_blocks = cu_wd / had_block_size;  //ctb_width / had_block_size;
731
732
0
    ht_offset = -had_block_size;
733
0
    wd_offset = 0 - had_block_size;
734
735
0
    cu_total_size = cu_ht * cu_wd;
736
0
    num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size);
737
738
0
    index_for_src_satd = start_index;
739
740
0
    for(i = 0; i < 64; i++)
741
0
    {
742
0
        ai1_zeros_buffer[i] = 0;
743
0
    }
744
0
    psy_factor = u4_psy_strength;  //PSY_STRENGTH;
745
0
    psy_rd_cost = 0;
746
0
    lambda_mod = lambda * psy_factor;
747
748
0
    if(!is_hbd)
749
0
    {
750
0
        pu1_recon = (UWORD8 *)pv_recon;
751
0
    }
752
753
    /**************************************************************/
754
    /* loop over for every 8x8 blocks in the CU */
755
0
    for(i = 0; i < num_comp_had_blocks; i++)
756
0
    {
757
0
        if(i % num_horz_blocks == 0)
758
0
        {
759
0
            wd_offset = -had_block_size;
760
0
            ht_offset += had_block_size;
761
0
        }
762
0
        wd_offset += had_block_size;
763
764
        /* source satd for the current 8x8 block */
765
0
        source_satd = pui4_source_satd[index_for_src_satd];
766
767
0
        if(had_block_size == 8)
768
0
        {
769
            //WORD32 index;
770
            //WORD32 u4_satd;
771
            //WORD32 dst_strd = 8;
772
            //WORD32 i4_frm_qstep = 0;
773
            //WORD32 early_cbf;
774
0
            if(!is_hbd)
775
0
            {
776
                /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
777
0
                pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
778
779
0
                recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit(
780
0
                    pu1_l0_block,
781
0
                    recon_stride_vert,
782
0
                    ai1_zeros_buffer,
783
0
                    had_block_size,
784
0
                    pi2_residue_had_zscan,
785
0
                    had_block_size);
786
0
            }
787
788
            /* get the additional cost function based on the absolute SATD diff of source and recon. */
789
0
            psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
790
791
0
            index_for_src_satd++;
792
0
            if((i % num_horz_blocks) == (num_horz_blocks - 1))
793
0
            {
794
0
                index_for_src_satd -= num_horz_blocks;
795
0
                index_for_src_satd +=
796
0
                    (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
797
0
            }
798
0
        }  // if
799
0
    }  // for loop
800
0
    psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT);
801
802
    /* reutrn the additional cost for the psy RD opt */
803
0
    return (psy_rd_cost);
804
0
}
805
806
unsigned long ihevce_calc_stim_injected_variance(
807
    ULWORD64 *pu8_sigmaX,
808
    ULWORD64 *pu8_sigmaXSquared,
809
    ULWORD64 *u8_var,
810
    WORD32 i4_inv_wpred_wt,
811
    WORD32 i4_inv_wt_shift_val,
812
    WORD32 i4_wpred_log_wdc,
813
    WORD32 i4_part_id)
814
0
{
815
0
    ULWORD64 u8_X_Square, u8_temp_var;
816
0
    WORD32 i4_bits_req;
817
818
0
    const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
819
820
0
    u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]);
821
0
    u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square;
822
823
0
    if(i4_inv_wpred_wt != i4_default_src_wt)
824
0
    {
825
0
        i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val;
826
827
0
        u8_temp_var = SHR_NEG(
828
0
            (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt),
829
0
            (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2));
830
0
    }
831
832
0
    GETRANGE64(i4_bits_req, u8_temp_var);
833
834
0
    if(i4_bits_req > 27)
835
0
    {
836
0
        *u8_var = u8_temp_var >> (i4_bits_req - 27);
837
0
        return (i4_bits_req - 27);
838
0
    }
839
0
    else
840
0
    {
841
0
        *u8_var = u8_temp_var;
842
0
        return 0;
843
0
    }
844
0
}
845
846
unsigned long ihevce_calc_variance_for_diff_weights(
847
    ULWORD64 *pu8_sigmaX,
848
    ULWORD64 *pu8_sigmaXSquared,
849
    ULWORD64 *u8_var,
850
    WORD32 *pi4_inv_wt,
851
    WORD32 *pi4_inv_wt_shift_val,
852
    pu_result_t *ps_result,
853
    WORD32 i4_wpred_log_wdc,
854
    PART_ID_T *pe_part_id,
855
    UWORD8 u1_cu_size,
856
    UWORD8 u1_num_parts,
857
    UWORD8 u1_is_for_src)
858
0
{
859
0
    WORD32 i4_k;
860
0
    UWORD32 u4_wd, u4_ht;
861
0
    UWORD8 u1_num_base_blks;
862
0
    UWORD32 u4_num_pixels_in_part;
863
0
    UWORD8 u1_index;
864
0
    WORD32 i4_bits_req;
865
866
0
    UWORD8 u1_base_blk_size = 4;
867
0
    UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size;
868
0
    ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 };
869
0
    ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 };
870
0
    ULWORD64 u8_z;
871
872
0
    const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
873
874
0
    for(i4_k = 0; i4_k < u1_num_parts; i4_k++)
875
0
    {
876
0
        u4_wd = ps_result[i4_k].pu.b4_wd + 1;
877
0
        u4_ht = ps_result[i4_k].pu.b4_ht + 1;
878
0
        u1_num_base_blks = u4_wd * u4_ht;
879
0
        u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size;
880
881
0
        if(u1_is_for_src)
882
0
        {
883
0
            u1_index = pe_part_id[i4_k];
884
0
        }
885
0
        else
886
0
        {
887
0
            u1_index = i4_k;
888
0
        }
889
890
0
        u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part;
891
0
        u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index];
892
893
0
        if(u1_is_for_src)
894
0
        {
895
0
            if(pi4_inv_wt[i4_k] != i4_default_src_wt)
896
0
            {
897
0
                pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k];
898
0
                u8_temp_sigmaX[i4_k] = SHR_NEG(
899
0
                    (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]),
900
0
                    (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc));
901
0
                u8_temp_sigmaXsquared[i4_k] = SHR_NEG(
902
0
                    (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]),
903
0
                    (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2));
904
0
            }
905
0
        }
906
0
    }
907
908
0
    u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) -
909
0
           ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1]));
910
911
0
    GETRANGE64(i4_bits_req, u8_z);
912
913
0
    if(i4_bits_req > 27)
914
0
    {
915
0
        *u8_var = u8_z >> (i4_bits_req - 27);
916
0
        return (i4_bits_req - 27);
917
0
    }
918
0
    else
919
0
    {
920
0
        *u8_var = u8_z;
921
0
        return 0;
922
0
    }
923
0
}