Coverage Report

Created: 2025-07-11 06:43

/src/libhevc/encoder/ihevce_common_utils.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*!
21
******************************************************************************
22
* \file ihevce_common_utils.c
23
*
24
* \brief
25
*    Contains definitions of common utility functions used across encoder
26
*
27
* \date
28
*    18/09/2012
29
*
30
* \author
31
*    ittiam
32
*
33
* List of Functions
34
*  ihevce_copy_2d()
35
*  ihevce_hbd_copy_2d()
36
*  ihevce_2d_square_copy_luma()
37
*  ihevce_wt_avg_2d()
38
*  ihevce_itrans_recon_dc_compute()
39
*  ihevce_itrans_recon_dc()
40
*  ihevce_hbd_itrans_recon_dc()
41
*  ihevce_truncate_16bit_data_to_8bit()
42
*  ihevce_convert_16bit_recon_to_8bit()
43
*  ihevce_convert_16bit_input_to_8bit()
44
*  ihevce_find_num_clusters_of_identical_points_1D()
45
*  ihevce_hbd_compute_ssd()
46
*  ihevce_compare_pu_mv_t()
47
*  ihevce_set_pred_buf_as_free()
48
*  ihevce_get_free_pred_buf_indices()
49
*  ihevce_scale_mv()
50
*  ihevce_osal_alloc()
51
*  ihevce_osal_free()
52
*  ihevce_osal_init()
53
*  ihevce_osal_delete()
54
*  ihevce_sum_abs_seq()
55
*  ihevce_ssd_calculator()
56
*  ihevce_chroma_interleave_ssd_calculator()
57
*  ihevce_ssd_and_sad_calculator()
58
*  ihevce_chroma_interleave_2d_copy()
59
*  ihevce_hbd_chroma_interleave_2d_copy()
60
*  ihevce_hbd_chroma_interleave_ssd_calculator()
61
*  ihevce_get_chroma_eo_sao_params()
62
*  ihevce_get_chroma_eo_sao_params_hbd()
63
*  ihevce_compute_area_of_valid_cus_in_ctb()
64
*  ihevce_create_cuNode_children()
65
*  ihevce_cu_tree_init()
66
*
67
******************************************************************************
68
*/
69
70
/*****************************************************************************/
71
/* File Includes                                                             */
72
/*****************************************************************************/
73
74
/* System include files */
75
#include <stdio.h>
76
#include <stdlib.h>
77
#include <assert.h>
78
#include <string.h>
79
80
/* User include files */
81
#include "ihevc_typedefs.h"
82
#include "itt_video_api.h"
83
#include "ihevce_api.h"
84
85
#include "rc_cntrl_param.h"
86
#include "rc_frame_info_collector.h"
87
#include "rc_look_ahead_params.h"
88
89
#include "ihevc_defs.h"
90
#include "ihevc_debug.h"
91
#include "ihevc_structs.h"
92
#include "ihevc_platform_macros.h"
93
#include "ihevc_deblk.h"
94
#include "ihevc_itrans_recon.h"
95
#include "ihevc_chroma_itrans_recon.h"
96
#include "ihevc_chroma_intra_pred.h"
97
#include "ihevc_intra_pred.h"
98
#include "ihevc_inter_pred.h"
99
#include "ihevc_mem_fns.h"
100
#include "ihevc_padding.h"
101
#include "ihevc_weighted_pred.h"
102
#include "ihevc_sao.h"
103
#include "ihevc_resi_trans.h"
104
#include "ihevc_quant_iquant_ssd.h"
105
#include "ihevc_cabac_tables.h"
106
107
#include "ihevce_defs.h"
108
#include "ihevce_hle_interface.h"
109
#include "ihevce_lap_enc_structs.h"
110
#include "ihevce_multi_thrd_structs.h"
111
#include "ihevce_me_common_defs.h"
112
#include "ihevce_had_satd.h"
113
#include "ihevce_error_codes.h"
114
#include "ihevce_bitstream.h"
115
#include "ihevce_cabac.h"
116
#include "ihevce_rdoq_macros.h"
117
#include "ihevce_function_selector.h"
118
#include "ihevce_enc_structs.h"
119
#include "ihevce_entropy_structs.h"
120
#include "ihevce_cmn_utils_instr_set_router.h"
121
#include "ihevce_enc_loop_structs.h"
122
#include "ihevce_common_utils.h"
123
#include "ihevce_global_tables.h"
124
125
#include "cast_types.h"
126
#include "osal.h"
127
#include "osal_defaults.h"
128
129
/*****************************************************************************/
130
/* Function Definitions                                                      */
131
/*****************************************************************************/
132
133
/**
134
******************************************************************************
135
*
136
*  @brief Performs the 2D copy
137
*
138
*  @par   Description
139
*  This routine Performs the 2D copy
140
*
141
*  @param[inout]   pu1_dst
142
*  pointer to the destination buffer
143
*
144
*  @param[in]   dst_strd
145
*  destination stride in terms of the size of input/output unit
146
*
147
*  @param[inout]   pu1_src
148
*  pointer to the source buffer
149
*
150
*  @param[in]   src_strd
151
*  source stride in terms of the size of input/output unit
152
*
153
*  @param[in]   blk_wd
154
*  number of samples to copy in a row
155
*
156
*  @param[in]   blk_ht
157
*  number of rows to copy
158
*
159
******************************************************************************
160
*/
161
void ihevce_copy_2d(
162
    UWORD8 *pu1_dst,
163
    WORD32 dst_stride,
164
    UWORD8 *pu1_src,
165
    WORD32 src_stride,
166
    WORD32 blk_wd,
167
    WORD32 blk_ht)
168
31.2M
{
169
31.2M
    WORD32 i;
170
171
418M
    for(i = 0; i < blk_ht; i++)
172
386M
    {
173
386M
        memcpy(pu1_dst, pu1_src, blk_wd);
174
386M
        pu1_dst += dst_stride;
175
386M
        pu1_src += src_stride;
176
386M
    }
177
31.2M
}
178
179
/**
180
******************************************************************************
181
*
182
*  @brief Performs the 2D copy of luma data
183
*
184
*  @par   Description
185
*  This routine performs the 2D square copy of luma data
186
*
187
*  @param[inout]   p_dst
188
*  pointer to the destination buffer
189
*
190
*  @param[in]   dst_strd
191
*  destination stride in terms of the size of input/output unit
192
*
193
*  @param[inout]   p_src
194
*  pointer to the source buffer
195
*
196
*  @param[in]   src_strd
197
*  source stride in terms of the size of input/output unit
198
*
199
*  @param[in]   num_cols_to_copy
200
*  number of units in a line to copy from src to dst buffer
201
*  Assumption : num_cols_to_copy <= min (dst_strd, src_strd)
202
*
203
*  @param[in]   unit_size
204
*  size of the unit in bytes
205
*
206
*  @return      none
207
*
208
*  Assumptions : num_cols_to_copy = num_lines_to_copy,
209
*  num_lines_to_copy can have {4, 16, 32, 64}
210
*
211
******************************************************************************
212
*/
213
void ihevce_2d_square_copy_luma(
214
    void *p_dst,
215
    WORD32 dst_strd,
216
    void *p_src,
217
    WORD32 src_strd,
218
    WORD32 num_cols_to_copy,
219
    WORD32 unit_size)
220
18.3M
{
221
18.3M
    UWORD8 *pu1_dst = (UWORD8 *)p_dst;
222
18.3M
    UWORD8 *pu1_src = (UWORD8 *)p_src;
223
18.3M
    WORD32 i;
224
225
240M
    for(i = 0; i < num_cols_to_copy; i++)
226
222M
    {
227
222M
        memcpy(pu1_dst, pu1_src, (num_cols_to_copy * unit_size));
228
222M
        pu1_dst += (dst_strd * unit_size);
229
222M
        pu1_src += (src_strd * unit_size);
230
222M
    }
231
18.3M
}
232
233
/**
234
********************************************************************************
235
*
236
*  @brief  Weighted pred of 2 predictor buffers as per spec
237
*
238
*  @param[in] pu1_pred0 : Pred0 buffer
239
*
240
*  @param[in] pu1_pred1 : Pred1 buffer
241
*
242
*  @param[in] pred0_strd : Stride of pred0 buffer
243
*
244
*  @param[in] pred1_strd : Stride of pred1 buffer
245
*
246
*  @param[in] wd : Width of pred block
247
*
248
*  @param[in] ht : Height of pred block
249
*
250
*  @param[out] pu1_dst : Destination buffer that will hold result
251
*
252
*  @param[in] dst_strd : Stride of dest buffer
253
*
254
*  @param[in] w0 : Weighting factor of Pred0
255
*
256
*  @param[in] w1 : weighting factor of pred1
257
*
258
*  @param[in] o0 : offset for pred0
259
*
260
*  @param[in] o1 : offset for pred1
261
*
262
*  @param[in] log_wdc : shift factor as per spec
263
*
264
*  @return none
265
*
266
********************************************************************************
267
*/
268
void ihevce_wt_avg_2d(
269
    UWORD8 *pu1_pred0,
270
    UWORD8 *pu1_pred1,
271
    WORD32 pred0_strd,
272
    WORD32 pred1_strd,
273
    WORD32 wd,
274
    WORD32 ht,
275
    UWORD8 *pu1_dst,
276
    WORD32 dst_strd,
277
    WORD32 w0,
278
    WORD32 w1,
279
    WORD32 o0,
280
    WORD32 o1,
281
    WORD32 log_wdc)
282
3.28M
{
283
    /* Total Rounding term to be added, including offset */
284
3.28M
    WORD32 rnd = (o0 + o1 + 1) >> 1;  // << log_wdc;
285
    /* Downshift */
286
3.28M
    WORD32 shift = log_wdc + 1;
287
    /* loop counters */
288
3.28M
    WORD32 i, j;
289
290
    /* Dst = ((w0*p0 + w1*p1) + ((o0 + o1 + 1) << logWDc)) >> (logWDc + 1) */
291
    /* In above formula, the additive term is constant and is evaluated    */
292
    /* outside loop and stored as "rnd".                                   */
293
39.1M
    for(i = 0; i < ht; i++)
294
35.8M
    {
295
634M
        for(j = 0; j < wd; j++)
296
598M
        {
297
598M
            WORD32 tmp;
298
598M
            tmp = IHEVCE_WT_PRED(pu1_pred0[j], pu1_pred1[j], w0, w1, rnd, shift);
299
598M
            pu1_dst[j] = (UWORD8)(CLIP3(tmp, 0, 255));
300
598M
        }
301
35.8M
        pu1_pred0 += pred0_strd;
302
35.8M
        pu1_pred1 += pred1_strd;
303
35.8M
        pu1_dst += dst_strd;
304
35.8M
    }
305
3.28M
}
306
/**
307
******************************************************************************
308
*
309
*  @brief Performs the Recon for DC only coefficient case
310
*
311
*  @par   Description
312
*  This routine performs the Recon for DC only coefficient case
313
*
314
*  @param[inout]   pu1_dst
315
*  pointer to the destination buffer
316
*
317
*  @param[in]   pu1_pred
318
*  pointer to the pred buffer
319
*
320
*  @param[in]   dst_strd
321
*  destination stride
322
*
323
*  @param[in]   pred_strd
324
*  pred buffer stride
325
*
326
*  @param[in]   trans_size
327
*  transform size
328
*
329
* @param[in] col_mult
330
*  chroma multiplier
331
*
332
*  @param[in]   dc_value
333
*  residue value
334
*
335
*  @return      none
336
*
337
******************************************************************************
338
*/
339
static INLINE void ihevce_itrans_recon_dc_compute(
340
    UWORD8 *pu1_dst,
341
    UWORD8 *pu1_pred,
342
    WORD32 dst_strd,
343
    WORD32 pred_strd,
344
    WORD32 trans_size,
345
    WORD32 col_mult,
346
    WORD32 dc_value)
347
512k
{
348
512k
    WORD32 row, col;
349
350
5.52M
    for(row = 0; row < trans_size; row++)
351
5.01M
    {
352
87.7M
        for(col = 0; col < trans_size; col++)
353
82.7M
        {
354
82.7M
            pu1_dst[row * dst_strd + col * col_mult] =
355
82.7M
                CLIP_U8(pu1_pred[row * pred_strd + col * col_mult] + dc_value);
356
82.7M
        }
357
5.01M
    }
358
512k
}
359
360
/**
361
******************************************************************************
362
*
363
*  @brief Performs the IQ+IT+Recon for DC only coefficient case
364
*
365
*  @par   Description
366
*  This routine performs the IQ+IT+Recon for DC only coefficient case
367
*
368
*  @param[in]   pu1_pred
369
*  pointer to the pred buffer
370
*
371
*  @param[in]   pred_strd
372
*  pred buffer stride
373
*
374
*  @param[inout]   pu1_dst
375
*  pointer to the destination buffer
376
*
377
*  @param[in]   dst_strd
378
*  destination stride
379
*
380
*  @param[in]   trans_size
381
*  transform size
382
*
383
* @param[in] i2_deq_value
384
*  Dequant Coeffs
385
*
386
*  @param[in] chroma plane
387
*  -1 : luma, 0 : chroma U, 1 : chroma V
388
*
389
*  @return      none
390
*
391
******************************************************************************
392
*/
393
void ihevce_itrans_recon_dc(
394
    UWORD8 *pu1_pred,
395
    WORD32 pred_strd,
396
    UWORD8 *pu1_dst,
397
    WORD32 dst_strd,
398
    WORD32 trans_size,
399
    WORD16 i2_deq_value,
400
    CHROMA_PLANE_ID_T e_chroma_plane)
401
512k
{
402
512k
    WORD32 add, shift;
403
512k
    WORD32 dc_value;
404
512k
    UWORD8 *pu1_pred_tmp, *pu1_dst_tmp;
405
512k
    WORD32 col_mult;
406
407
512k
    assert(e_chroma_plane == NULL_PLANE || e_chroma_plane == U_PLANE || e_chroma_plane == V_PLANE);
408
512k
    if(e_chroma_plane == NULL_PLANE)
409
197k
    {
410
197k
        pu1_pred_tmp = pu1_pred;
411
197k
        pu1_dst_tmp = pu1_dst;
412
197k
        col_mult = 1;
413
197k
    }
414
315k
    else
415
315k
    {
416
315k
        col_mult = 2;
417
315k
        pu1_pred_tmp = pu1_pred + e_chroma_plane;
418
315k
        pu1_dst_tmp = pu1_dst + e_chroma_plane;
419
315k
    }
420
421
512k
    shift = IT_SHIFT_STAGE_1;
422
512k
    add = 1 << (shift - 1);
423
512k
    dc_value = CLIP_S16((i2_deq_value * 64 + add) >> shift);
424
512k
    shift = IT_SHIFT_STAGE_2;
425
512k
    add = 1 << (shift - 1);
426
512k
    dc_value = CLIP_S16((dc_value * 64 + add) >> shift);
427
512k
    ihevce_itrans_recon_dc_compute(
428
512k
        pu1_dst_tmp, pu1_pred_tmp, dst_strd, pred_strd, trans_size, col_mult, dc_value);
429
512k
}
430
431
/*!
432
******************************************************************************
433
* \if Function name : ihevce_find_num_clusters_of_identical_points_1D \endif
434
*
435
* \brief
436
*
437
*
438
*****************************************************************************
439
*/
440
WORD32 ihevce_find_num_clusters_of_identical_points_1D(
441
    UWORD8 *pu1_inp_array,
442
    UWORD8 *pu1_out_array,
443
    UWORD8 *pu1_freq_of_out_data_in_inp,
444
    WORD32 i4_num_inp_array_elements)
445
1.52M
{
446
1.52M
    WORD32 i;
447
1.52M
    UWORD8 u1_value = pu1_inp_array[0];
448
1.52M
    WORD32 i4_num_clusters = i4_num_inp_array_elements;
449
1.52M
    WORD32 i4_output_array_idx = 1;
450
451
1.52M
    pu1_freq_of_out_data_in_inp[0] = 1;
452
1.52M
    pu1_out_array[0] = u1_value;
453
454
1.52M
    if(1 == i4_num_inp_array_elements)
455
281k
    {
456
281k
        return 1;
457
281k
    }
458
459
4.02M
    for(i = 1; i < i4_num_inp_array_elements; i++)
460
2.77M
    {
461
2.77M
        if(pu1_inp_array[i] == u1_value)
462
1.10M
        {
463
1.10M
            pu1_freq_of_out_data_in_inp[0]++;
464
1.10M
            i4_num_clusters--;
465
1.10M
        }
466
1.67M
        else
467
1.67M
        {
468
1.67M
            pu1_out_array[i4_output_array_idx] = pu1_inp_array[i];
469
470
1.67M
            i4_output_array_idx++;
471
1.67M
        }
472
2.77M
    }
473
474
1.24M
    if(i4_num_clusters > 1)
475
868k
    {
476
868k
        WORD32 i4_num_sub_clusters;
477
478
868k
        i4_num_sub_clusters = ihevce_find_num_clusters_of_identical_points_1D(
479
868k
            &pu1_out_array[1],
480
868k
            &pu1_out_array[1],
481
868k
            &pu1_freq_of_out_data_in_inp[1],
482
868k
            i4_num_clusters - 1);
483
484
868k
        i4_num_clusters = 1 + i4_num_sub_clusters;
485
868k
    }
486
487
1.24M
    return i4_num_clusters;
488
1.52M
}
489
490
/**
491
*******************************************************************************
492
*
493
* @brief Compare Motion vectors function
494
*
495
* @par Description:
496
*   Checks if MVs and Reference idx are excatly matching.
497
*
498
* @param[inout] ps_1
499
*   motion vector 1 to be compared
500
*
501
* @param[in] ps_2
502
*   motion vector 2 to be compared
503
*
504
* @returns
505
*  0 : if not matching 1 : if matching
506
*
507
* @remarks
508
*
509
*******************************************************************************
510
*/
511
WORD32 ihevce_compare_pu_mv_t(
512
    pu_mv_t *ps_pu_mv_1, pu_mv_t *ps_pu_mv_2, WORD32 i4_pred_mode_1, WORD32 i4_pred_mode_2)
513
18.0M
{
514
18.0M
    WORD32 i4_l0_match, i4_l1_match;
515
18.0M
    WORD32 i4_pred_l0, i4_pred_l1;
516
517
18.0M
    i4_pred_l0 = (i4_pred_mode_1 != PRED_L1);
518
18.0M
    i4_pred_l1 = (i4_pred_mode_1 != PRED_L0);
519
520
18.0M
    if(i4_pred_mode_1 != i4_pred_mode_2)
521
3.24M
        return 0;
522
523
14.8M
    i4_l0_match = 0;
524
14.8M
    i4_l1_match = 0;
525
526
14.8M
    if(i4_pred_l0)
527
14.4M
    {
528
14.4M
        if(ps_pu_mv_1->i1_l0_ref_idx == ps_pu_mv_2->i1_l0_ref_idx)
529
11.2M
        {
530
11.2M
            if(0 == memcmp(&ps_pu_mv_1->s_l0_mv, &ps_pu_mv_2->s_l0_mv, sizeof(mv_t)))
531
7.81M
                i4_l0_match = 1;
532
11.2M
        }
533
14.4M
    }
534
14.8M
    if(i4_pred_l1)
535
1.18M
    {
536
1.18M
        if(ps_pu_mv_1->i1_l1_ref_idx == ps_pu_mv_2->i1_l1_ref_idx)
537
1.15M
        {
538
1.15M
            if(0 == memcmp(&ps_pu_mv_1->s_l1_mv, &ps_pu_mv_2->s_l1_mv, sizeof(mv_t)))
539
1.00M
                i4_l1_match = 1;
540
1.15M
        }
541
1.18M
    }
542
543
14.8M
    if(i4_pred_l0 && i4_pred_l1)
544
838k
        return (i4_l0_match & i4_l1_match);
545
13.9M
    else if(i4_pred_l0)
546
13.6M
        return i4_l0_match;
547
345k
    else
548
345k
        return i4_l1_match;
549
550
14.8M
} /* End of ihevce_compare_pu_mv_t */
551
552
/*!
553
******************************************************************************
554
* \if Function name : ihevce_set_pred_buf_as_free \endif
555
*
556
* \brief
557
*    Mark buffer as free
558
*
559
*****************************************************************************
560
*/
561
void ihevce_set_pred_buf_as_free(UWORD32 *pu4_idx_array, UWORD8 u1_buf_id)
562
62.8M
{
563
62.8M
    (*pu4_idx_array) &= ~(1 << u1_buf_id);
564
62.8M
}
565
566
/*!
567
******************************************************************************
568
* \if Function name : ihevce_get_free_pred_buf_indices \endif
569
*
570
* \brief
571
*    get free buffer indices
572
*
573
*****************************************************************************
574
*/
575
UWORD8 ihevce_get_free_pred_buf_indices(
576
    UWORD8 *pu1_idx_array, UWORD32 *pu4_bitfield, UWORD8 u1_num_bufs_requested)
577
34.1M
{
578
34.1M
    UWORD8 i;
579
580
34.1M
    UWORD8 u1_num_free_bufs_found = 0;
581
34.1M
    UWORD32 u4_local_bitfield = *pu4_bitfield;
582
583
34.1M
    ASSERT(u1_num_bufs_requested <= (32 - ihevce_num_ones_generic(u4_local_bitfield)));
584
585
115M
    for(i = 0; u1_num_free_bufs_found < u1_num_bufs_requested; i++)
586
81.4M
    {
587
81.4M
        if(!(u4_local_bitfield & (1 << i)))
588
43.7M
        {
589
43.7M
            pu1_idx_array[u1_num_free_bufs_found++] = i;
590
43.7M
            u4_local_bitfield |= (1 << i);
591
43.7M
        }
592
81.4M
    }
593
594
34.1M
    (*pu4_bitfield) = u4_local_bitfield;
595
596
34.1M
    return u1_num_free_bufs_found;
597
34.1M
}
598
599
/*!
600
******************************************************************************
601
* \if Function name : ihevce_scale_mv \endif
602
*
603
* \brief
604
*    Scale mv basing on displacement of POC
605
*
606
*****************************************************************************
607
*/
608
void ihevce_scale_mv(mv_t *ps_mv, WORD32 i4_poc_to, WORD32 i4_poc_from, WORD32 i4_curr_poc)
609
573k
{
610
573k
    WORD32 td, tb, tx;
611
573k
    WORD32 dist_scale_factor;
612
573k
    WORD32 mvx, mvy;
613
614
573k
    td = CLIP_S8(i4_curr_poc - i4_poc_from);
615
573k
    tb = CLIP_S8(i4_curr_poc - i4_poc_to);
616
617
573k
    tx = (16384 + (abs(td) >> 1)) / td;
618
619
573k
    dist_scale_factor = (tb * tx + 32) >> 6;
620
573k
    dist_scale_factor = CLIP3(dist_scale_factor, -4096, 4095);
621
622
573k
    mvx = ps_mv->i2_mvx;
623
573k
    mvy = ps_mv->i2_mvy;
624
625
573k
    mvx = SIGN(dist_scale_factor * mvx) * ((abs(dist_scale_factor * mvx) + 127) >> 8);
626
573k
    mvy = SIGN(dist_scale_factor * mvy) * ((abs(dist_scale_factor * mvy) + 127) >> 8);
627
628
573k
    ps_mv->i2_mvx = CLIP_S16(mvx);
629
573k
    ps_mv->i2_mvy = CLIP_S16(mvy);
630
573k
}
631
632
/*!
633
******************************************************************************
634
* \if Function name : ihevce_osal_alloc \endif
635
*
636
* \brief
637
*    Memory allocate call back function passed to OSAL
638
*
639
* \param[in] pv_handle : handle to hle ctxt
640
* \param[in] u4_size : size of memory required
641
*
642
* \return
643
*    Memory pointer
644
*
645
* \author
646
*  Ittiam
647
*
648
*****************************************************************************
649
*/
650
void *ihevce_osal_alloc(void *pv_handle, UWORD32 u4_size)
651
312k
{
652
312k
    ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_handle;
653
312k
    iv_mem_rec_t s_mem_tab;
654
655
    /* def init of memtab */
656
312k
    s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
657
312k
    s_mem_tab.i4_mem_alignment = 8;
658
312k
    s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
659
660
    /* allocate memory for required size */
661
312k
    s_mem_tab.i4_mem_size = u4_size;
662
663
312k
    ps_hle_ctxt->ihevce_mem_alloc(
664
312k
        ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_mem_tab);
665
666
312k
    return (s_mem_tab.pv_base);
667
312k
}
668
669
/*!
670
******************************************************************************
671
* \if Function name : ihevce_osal_free \endif
672
*
673
* \brief
674
*    Memory free call back function passed to OSAL
675
*
676
* \param[in] pv_handle : handle to hle ctxt
677
* \param[in] pv_mem : memory to be freed
678
*
679
* \return
680
*    none
681
*
682
* \author
683
*  Ittiam
684
*
685
*****************************************************************************
686
*/
687
void ihevce_osal_free(void *pv_handle, void *pv_mem)
688
312k
{
689
312k
    ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_handle;
690
312k
    iv_mem_rec_t s_mem_tab;
691
692
    /* def init of memtab */
693
312k
    s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
694
312k
    s_mem_tab.i4_mem_alignment = 8;
695
312k
    s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
696
697
    /* free memory */
698
312k
    s_mem_tab.pv_base = pv_mem;
699
700
312k
    ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_mem_tab);
701
702
312k
    return;
703
312k
}
704
705
/*!
706
******************************************************************************
707
* \if Function name : ihevce_osal_init \endif
708
*
709
* \brief
710
*    Function to initialise OSAL handle
711
*
712
* \return
713
*    None
714
*
715
* \author
716
*  Ittiam
717
*
718
*****************************************************************************
719
*/
720
WORD32 ihevce_osal_init(void *pv_hle_ctxt)
721
8.96k
{
722
    /* local variables */
723
8.96k
    ihevce_hle_ctxt_t *ps_hle_ctxt;
724
8.96k
    osal_cb_funcs_t s_cb_funcs;
725
8.96k
    WORD32 status = 0;
726
8.96k
    void *pv_osal_handle;
727
8.96k
    iv_mem_rec_t s_mem_tab;
728
729
8.96k
    ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt;
730
731
    /* def init of memtab */
732
8.96k
    s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
733
8.96k
    s_mem_tab.i4_mem_alignment = 8;
734
8.96k
    s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
735
736
    /* --------------------------------------------------------------------- */
737
    /*                      OSAL Hanndle create                              */
738
    /* --------------------------------------------------------------------- */
739
740
    /* Allocate memory for the handle */
741
8.96k
    s_mem_tab.i4_mem_size = OSAL_HANDLE_SIZE;
742
743
8.96k
    ps_hle_ctxt->ihevce_mem_alloc(
744
8.96k
        ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_mem_tab);
745
8.96k
    if(NULL == s_mem_tab.pv_base)
746
0
    {
747
0
        ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
748
0
            ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
749
0
            "IHEVCE ERROR: Error in OSAL initialization\n");
750
0
        return (-1);
751
0
    }
752
753
8.96k
    pv_osal_handle = s_mem_tab.pv_base;
754
755
    /* Initialize OSAL call back functions */
756
8.96k
    s_cb_funcs.mmr_handle = (void *)ps_hle_ctxt;
757
8.96k
    s_cb_funcs.osal_alloc = &ihevce_osal_alloc;
758
8.96k
    s_cb_funcs.osal_free = &ihevce_osal_free;
759
760
8.96k
    status = osal_init(pv_osal_handle);
761
8.96k
    if(OSAL_SUCCESS != status)
762
0
    {
763
0
        ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
764
0
            ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
765
0
            "IHEVCE ERROR: Error in OSAL initialization\n");
766
0
        return (-1);
767
0
    }
768
769
8.96k
    status = osal_register_callbacks(pv_osal_handle, &s_cb_funcs);
770
8.96k
    if(OSAL_SUCCESS != status)
771
0
    {
772
0
        ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
773
0
            ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
774
0
            "IHEVCE ERROR: Error in OSAL initialization\n");
775
0
        return (-1);
776
0
    }
777
8.96k
    ps_hle_ctxt->pv_osal_handle = pv_osal_handle;
778
779
8.96k
    return (0);
780
8.96k
}
781
782
/*!
783
******************************************************************************
784
* \if Function name : ihevce_osal_delete \endif
785
*
786
* \brief
787
*    Function to delete OSAL handle
788
*
789
* \return
790
*    None
791
*
792
* \author
793
*  Ittiam
794
*
795
*****************************************************************************
796
*/
797
WORD32 ihevce_osal_delete(void *pv_hle_ctxt)
798
8.96k
{
799
    /* local variables */
800
8.96k
    ihevce_hle_ctxt_t *ps_hle_ctxt;
801
8.96k
    void *pv_osal_handle;
802
8.96k
    iv_mem_rec_t s_mem_tab;
803
804
8.96k
    ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt;
805
8.96k
    pv_osal_handle = ps_hle_ctxt->pv_osal_handle;
806
807
    /* def init of memtab */
808
8.96k
    s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
809
8.96k
    s_mem_tab.i4_mem_alignment = 8;
810
8.96k
    s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
811
812
8.96k
    if(0 != osal_close(pv_osal_handle))
813
0
    {
814
0
        ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
815
0
            ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
816
0
            "IHEVCE ERROR>> Unable to close OSAL\n");
817
0
        return (-1);
818
0
    }
819
820
    /* free osal handle */
821
8.96k
    s_mem_tab.pv_base = pv_osal_handle;
822
823
8.96k
    ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_mem_tab);
824
825
8.96k
    return (0);
826
8.96k
}
827
828
/**
829
*******************************************************************************
830
*
831
* @brief
832
*  Compute SSD between two blocks (8 bit input)
833
*
834
* @par Description:
835
*
836
* @param[in] pu1_inp
837
*  UWORD8 pointer to the src block
838
*
839
* @param[in] pu1_ref
840
*  UWORD8 pointer to the ref block
841
*
842
* @param[in] inp_stride
843
*  UWORD32 Source stride
844
*
845
* @param[in] ref_stride
846
*  UWORD32 ref stride
847
*
848
* @param[in] wd
849
*  UWORD32 width of the block
850
*
851
* @param[in] ht
852
*  UWORD32 height of the block
853
*
854
* @returns SSD
855
*
856
* @remarks none
857
*
858
*******************************************************************************
859
*/
860
LWORD64 ihevce_ssd_calculator(
861
    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
862
    UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
863
17.9M
{
864
17.9M
    UWORD32 i, j;
865
17.9M
    LWORD64 ssd = 0;
866
17.9M
    UNUSED(chroma_plane);
867
273M
    for(i = 0; i < ht; i++)
868
255M
    {
869
8.18G
        for(j = 0; j < wd; j++)
870
7.93G
        {
871
7.93G
            ssd += (pu1_inp[j] - pu1_ref[j]) * (pu1_inp[j] - pu1_ref[j]);
872
7.93G
        }
873
874
255M
        pu1_inp += inp_stride;
875
255M
        pu1_ref += ref_stride;
876
255M
    }
877
878
17.9M
    return ssd;
879
17.9M
}
880
881
/**
882
*******************************************************************************
883
*
884
* @brief
885
*  Compute SSD between two blocks (8 bit input, chroma interleaved input)
886
*
887
* @par Description:
888
*
889
* @param[in] pu1_inp
890
*  UWORD8 pointer to the src block
891
*
892
* @param[in] pu1_ref
893
*  UWORD8 pointer to the ref block
894
*
895
* @param[in] inp_stride
896
*  UWORD32 Source stride
897
*
898
* @param[in] ref_stride
899
*  UWORD32 ref stride
900
*
901
* @param[in] wd
902
*  UWORD32 width of the block
903
*
904
* @param[in] ht
905
*  UWORD32 height of the block
906
*
907
* @returns SSD
908
*
909
* @remarks none
910
*
911
*******************************************************************************
912
*/
913
LWORD64 ihevce_chroma_interleave_ssd_calculator(
914
    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
915
    UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
916
22.6M
{
917
22.6M
    UWORD32 i, j;
918
22.6M
    LWORD64 ssd = 0;
919
22.6M
    pu1_inp += chroma_plane;
920
22.6M
    pu1_ref += chroma_plane;
921
922
    /* run a loop and find the ssd by doing diff followed by square */
923
185M
    for(i = 0; i < ht; i++)
924
162M
    {
925
1.68G
        for(j = 0; j < wd; j++)
926
1.52G
        {
927
1.52G
            WORD32 val;
928
929
            /* note that chroma is interleaved */
930
1.52G
            val = pu1_inp[j * 2] - pu1_ref[j * 2];
931
1.52G
            ssd += val * val;
932
1.52G
        }
933
        /* row level update */
934
162M
        pu1_inp += inp_stride;
935
162M
        pu1_ref += ref_stride;
936
162M
    }
937
938
22.6M
    return (ssd);
939
22.6M
}
940
941
/**
942
*******************************************************************************
943
*
944
* @brief
945
*  Compute SSD & SAD between two blocks (8 bit input)
946
*
947
* @par Description:
948
*
949
* @param[in] pu1_recon
950
*  UWORD8 pointer to the block 1
951
*
952
* @param[in] recon_strd
953
*  UWORD32 stride of block 1
954
*
955
* @param[in] pu1_src
956
*  UWORD8 pointer to the block 2
957
*
958
* @param[in] src_strd
959
*  UWORD32 stride of block 2
960
*
961
* @param[in] trans_size
962
*  UWORD32 block wd/ht
963
*
964
* @param[out] *pu4_blk_sad
965
*  UWORD32 block SAD
966
*
967
* @returns SSD
968
*
969
* @remarks none
970
*
971
*******************************************************************************
972
*/
973
LWORD64 ihevce_ssd_and_sad_calculator(
974
    UWORD8 *pu1_recon,
975
    WORD32 recon_strd,
976
    UWORD8 *pu1_src,
977
    WORD32 src_strd,
978
    WORD32 trans_size,
979
    UWORD32 *pu4_blk_sad)
980
2.08M
{
981
2.08M
    WORD32 i, j, sad = 0;
982
2.08M
    LWORD64 ssd = 0;
983
984
    /* run a loop and find the ssd by doing diff followed by square */
985
40.0M
    for(i = 0; i < trans_size; i++)
986
37.9M
    {
987
928M
        for(j = 0; j < trans_size; j++)
988
890M
        {
989
890M
            WORD32 val;
990
991
890M
            val = *pu1_src++ - *pu1_recon++;
992
890M
            ssd += val * val;
993
890M
            sad += abs(val);
994
890M
        }
995
        /* row level update */
996
37.9M
        pu1_src += src_strd - trans_size;
997
37.9M
        pu1_recon += recon_strd - trans_size;
998
37.9M
    }
999
2.08M
    *pu4_blk_sad = sad;
1000
1001
    /* The return value is of type WORD32 */
1002
2.08M
    ssd = CLIP3(ssd, 0, 0x7fffffff);
1003
1004
2.08M
    return (ssd);
1005
2.08M
}
1006
1007
/*!
1008
******************************************************************************
1009
* \if Function name : ihevce_chroma_interleave_2d_copy \endif
1010
*
1011
* \brief
1012
*    This function copies one plane (u/v) of interleaved chroma buffer from
1013
*    source to destination
1014
******************************************************************************
1015
*/
1016
void ihevce_chroma_interleave_2d_copy(
1017
    UWORD8 *pu1_uv_src_bp,
1018
    WORD32 src_strd,
1019
    UWORD8 *pu1_uv_dst_bp,
1020
    WORD32 dst_strd,
1021
    WORD32 w,
1022
    WORD32 h,
1023
    CHROMA_PLANE_ID_T e_chroma_plane)
1024
29.3M
{
1025
29.3M
    WORD32 i, j;
1026
1027
29.3M
    UWORD8 *pu1_src = (U_PLANE == e_chroma_plane) ? pu1_uv_src_bp : pu1_uv_src_bp + 1;
1028
29.3M
    UWORD8 *pu1_dst = (U_PLANE == e_chroma_plane) ? pu1_uv_dst_bp : pu1_uv_dst_bp + 1;
1029
1030
253M
    for(i = 0; i < h; i++)
1031
224M
    {
1032
2.46G
        for(j = 0; j < w; j++)
1033
2.23G
        {
1034
            /* note that chroma is interleaved */
1035
2.23G
            pu1_dst[j * 2] = pu1_src[j * 2];
1036
2.23G
        }
1037
1038
        /* row level update */
1039
224M
        pu1_src += src_strd;
1040
224M
        pu1_dst += dst_strd;
1041
224M
    }
1042
29.3M
}
1043
1044
/**
1045
*******************************************************************************
1046
*
1047
* @brief
1048
*     Gets edge offset params
1049
*
1050
* @par Description:
1051
*     Given the ctb and sao angle this function will calculate accumulated
1052
*     error between source and recon and the corresponding count for 4 edge
1053
*     indexes one each for peak,valley, half peak and half valley.
1054
*
1055
* @param[in]
1056
*   ps_sao_ctxt:   Pointer to SAO context
1057
*   eo_sao_class: specifies edge offset class
1058
*   pi4_acc_error_category: pointer to an array to store accumulated error between source and recon
1059
*   pi4_category_count    : pointer to an array to store number of peaks,valleys,half peaks and half valleys.
1060
* @returns
1061
*
1062
* @remarks
1063
*  None
1064
*
1065
*******************************************************************************/
1066
void ihevce_get_chroma_eo_sao_params(
1067
    void *pv_sao_ctxt,
1068
    WORD32 eo_sao_class,
1069
    WORD32 *pi4_acc_error_category,
1070
    WORD32 *pi4_category_count)
1071
158k
{
1072
158k
    WORD32 row_start, row_end, col_start, col_end, row, col;
1073
158k
    WORD32 row_offset = 0, col_offset = 0;
1074
158k
    WORD32 a, b, c, pel_error, edgeidx;
1075
158k
    sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt;
1076
1077
158k
    row_start = 0;
1078
158k
    row_end = ps_sao_ctxt->i4_sao_blk_ht >> 1;
1079
158k
    col_start = 0;
1080
158k
    col_end = ps_sao_ctxt->i4_sao_blk_wd;
1081
1082
158k
    if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG))
1083
71.1k
    {
1084
71.1k
        col_start = 2;
1085
71.1k
    }
1086
1087
158k
    if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) &&
1088
158k
       (eo_sao_class != SAO_EDGE_90_DEG))
1089
71.4k
    {
1090
71.4k
        col_end = ps_sao_ctxt->i4_sao_blk_wd - 2;
1091
71.4k
    }
1092
1093
158k
    if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG))
1094
42.7k
    {
1095
42.7k
        row_start = 1;
1096
42.7k
    }
1097
1098
158k
    if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) &&
1099
158k
       (eo_sao_class != SAO_EDGE_0_DEG))
1100
41.2k
    {
1101
41.2k
        row_end = row_end - 1;  //ps_sao_ctxt->i4_sao_blk_ht - 1;
1102
41.2k
    }
1103
1104
158k
    if(eo_sao_class == SAO_EDGE_0_DEG)
1105
109k
    {
1106
109k
        row_offset = 0;
1107
109k
        col_offset = 2;
1108
109k
    }
1109
48.8k
    else if(eo_sao_class == SAO_EDGE_90_DEG)
1110
23.5k
    {
1111
23.5k
        row_offset = 1;
1112
23.5k
        col_offset = 0;
1113
23.5k
    }
1114
25.2k
    else if(eo_sao_class == SAO_EDGE_135_DEG)
1115
13.0k
    {
1116
13.0k
        row_offset = 1;
1117
13.0k
        col_offset = 2;
1118
13.0k
    }
1119
12.2k
    else if(eo_sao_class == SAO_EDGE_45_DEG)
1120
12.2k
    {
1121
12.2k
        row_offset = 1;
1122
12.2k
        col_offset = -2;
1123
12.2k
    }
1124
1125
5.00M
    for(row = row_start; row < row_end; row++)
1126
4.84M
    {
1127
297M
        for(col = col_start; col < col_end; col++)
1128
292M
        {
1129
292M
            c = ps_sao_ctxt
1130
292M
                    ->pu1_cur_chroma_recon_buf[col + row * ps_sao_ctxt->i4_cur_chroma_recon_stride];
1131
292M
            a = ps_sao_ctxt->pu1_cur_chroma_recon_buf
1132
292M
                    [(col - col_offset) +
1133
292M
                     (row - row_offset) * ps_sao_ctxt->i4_cur_chroma_recon_stride];
1134
292M
            b = ps_sao_ctxt->pu1_cur_chroma_recon_buf
1135
292M
                    [(col + col_offset) +
1136
292M
                     (row + row_offset) * ps_sao_ctxt->i4_cur_chroma_recon_stride];
1137
292M
            pel_error =
1138
292M
                ps_sao_ctxt
1139
292M
                    ->pu1_cur_chroma_src_buf[col + row * ps_sao_ctxt->i4_cur_chroma_src_stride] -
1140
292M
                ps_sao_ctxt
1141
292M
                    ->pu1_cur_chroma_recon_buf[col + row * ps_sao_ctxt->i4_cur_chroma_recon_stride];
1142
292M
            edgeidx = 2 + SIGN(c - a) + SIGN(c - b);
1143
1144
292M
            if(pel_error != 0)
1145
41.2M
            {
1146
41.2M
                pi4_acc_error_category[edgeidx] += pel_error;
1147
41.2M
                pi4_category_count[edgeidx]++;
1148
41.2M
            }
1149
292M
        }
1150
4.84M
    }
1151
158k
}
1152
1153
/**
1154
*******************************************************************************
1155
*
1156
* @brief
1157
*     Gets edge offset params
1158
*
1159
* @par Description:
1160
*     Given the ctb and sao angle this function will calculate accumulated
1161
*     error between source and recon and the coresponding count for 4 edge
1162
*     indexes one each for peak,valley, half peak and half valley.
1163
*
1164
* @param[in]
1165
*   ps_sao_ctxt:   Pointer to SAO context
1166
*   eo_sao_class: specifies edge offset class
1167
*   pi4_acc_error_category: pointer to an array to store accumulated error between source and recon
1168
*   pi4_category_count    : pointer to an array to store number of peaks,valleys,half peaks and half valleys.
1169
* @returns
1170
*
1171
* @remarks
1172
*  None
1173
*
1174
*******************************************************************************/
1175
void ihevce_get_luma_eo_sao_params(
1176
    void *pv_sao_ctxt,
1177
    WORD32 eo_sao_class,
1178
    WORD32 *pi4_acc_error_category,
1179
    WORD32 *pi4_category_count)
1180
634k
{
1181
634k
    WORD32 row_start, row_end, col_start, col_end, row, col;
1182
634k
    WORD32 row_offset = 0, col_offset = 0;
1183
634k
    WORD32 a, b, c, pel_error, edgeidx;
1184
634k
    sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt;
1185
1186
634k
    row_start = 0;
1187
634k
    row_end = ps_sao_ctxt->i4_sao_blk_ht;
1188
634k
    col_start = 0;
1189
634k
    col_end = ps_sao_ctxt->i4_sao_blk_wd;
1190
1191
634k
    if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG))
1192
268k
    {
1193
268k
        col_start = 1;
1194
268k
    }
1195
1196
634k
    if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) &&
1197
634k
       (eo_sao_class != SAO_EDGE_90_DEG))
1198
268k
    {
1199
268k
        col_end = ps_sao_ctxt->i4_sao_blk_wd - 1;
1200
268k
    }
1201
1202
634k
    if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG))
1203
302k
    {
1204
302k
        row_start = 1;
1205
302k
    }
1206
1207
634k
    if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) &&
1208
634k
       (eo_sao_class != SAO_EDGE_0_DEG))
1209
302k
    {
1210
302k
        row_end = ps_sao_ctxt->i4_sao_blk_ht - 1;
1211
302k
    }
1212
1213
634k
    if(eo_sao_class == SAO_EDGE_0_DEG)
1214
158k
    {
1215
158k
        row_offset = 0;
1216
158k
        col_offset = 1;
1217
158k
    }
1218
476k
    else if(eo_sao_class == SAO_EDGE_90_DEG)
1219
158k
    {
1220
158k
        row_offset = 1;
1221
158k
        col_offset = 0;
1222
158k
    }
1223
317k
    else if(eo_sao_class == SAO_EDGE_135_DEG)
1224
158k
    {
1225
158k
        row_offset = 1;
1226
158k
        col_offset = 1;
1227
158k
    }
1228
158k
    else if(eo_sao_class == SAO_EDGE_45_DEG)
1229
158k
    {
1230
158k
        row_offset = 1;
1231
158k
        col_offset = -1;
1232
158k
    }
1233
1234
39.4M
    for(row = row_start; row < row_end; row++)
1235
38.8M
    {
1236
2.42G
        for(col = col_start; col < col_end; col++)
1237
2.38G
        {
1238
2.38G
            c = ps_sao_ctxt
1239
2.38G
                    ->pu1_cur_luma_recon_buf[col + row * ps_sao_ctxt->i4_cur_luma_recon_stride];
1240
2.38G
            a = ps_sao_ctxt->pu1_cur_luma_recon_buf
1241
2.38G
                    [(col - col_offset) +
1242
2.38G
                     (row - row_offset) * ps_sao_ctxt->i4_cur_luma_recon_stride];
1243
2.38G
            b = ps_sao_ctxt->pu1_cur_luma_recon_buf
1244
2.38G
                    [(col + col_offset) +
1245
2.38G
                     (row + row_offset) * ps_sao_ctxt->i4_cur_luma_recon_stride];
1246
2.38G
            pel_error =
1247
2.38G
                ps_sao_ctxt->pu1_cur_luma_src_buf[col + row * ps_sao_ctxt->i4_cur_luma_src_stride] -
1248
2.38G
                ps_sao_ctxt
1249
2.38G
                    ->pu1_cur_luma_recon_buf[col + row * ps_sao_ctxt->i4_cur_luma_recon_stride];
1250
2.38G
            edgeidx = 2 + SIGN(c - a) + SIGN(c - b);
1251
1252
2.38G
            if(pel_error != 0)
1253
276M
            {
1254
276M
                pi4_acc_error_category[edgeidx] += pel_error;
1255
276M
                pi4_category_count[edgeidx]++;
1256
276M
            }
1257
2.38G
        }
1258
38.8M
    }
1259
634k
}
1260
1261
/*!
1262
******************************************************************************
1263
* \if Function name : ihevce_compute_area_of_valid_cus_in_ctb \endif
1264
*
1265
* \brief
1266
*
1267
*
1268
*****************************************************************************
1269
*/
1270
WORD32 ihevce_compute_area_of_valid_cus_in_ctb(cur_ctb_cu_tree_t *ps_cu_tree)
1271
3.37M
{
1272
3.37M
    WORD32 i4_area;
1273
1274
3.37M
    if(NULL == ps_cu_tree)
1275
1.47M
    {
1276
1.47M
        return 0;
1277
1.47M
    }
1278
1279
1.89M
    if(ps_cu_tree->is_node_valid)
1280
1.09M
    {
1281
1.09M
        i4_area = ps_cu_tree->u1_cu_size * ps_cu_tree->u1_cu_size;
1282
1.09M
    }
1283
807k
    else
1284
807k
    {
1285
807k
        i4_area = ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_tl) +
1286
807k
                  ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_tr) +
1287
807k
                  ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_bl) +
1288
807k
                  ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_br);
1289
807k
    }
1290
1291
1.89M
    return i4_area;
1292
3.37M
}
1293
1294
/*!
1295
******************************************************************************
1296
* \if Function name : ihevce_create_cuNode_children \endif
1297
*
1298
* \brief
1299
*
1300
*
1301
*****************************************************************************
1302
*/
1303
static WORD32 ihevce_create_cuNode_children(
1304
    cur_ctb_cu_tree_t *ps_cu_tree_root,
1305
    cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
1306
    WORD32 nodes_already_created)
1307
3.08M
{
1308
3.08M
    cur_ctb_cu_tree_t *ps_tl;
1309
3.08M
    cur_ctb_cu_tree_t *ps_tr;
1310
3.08M
    cur_ctb_cu_tree_t *ps_bl;
1311
3.08M
    cur_ctb_cu_tree_t *ps_br;
1312
1313
3.08M
    ps_tl = ps_cu_tree_root + nodes_already_created;
1314
3.08M
    ps_tr = ps_tl + 1;
1315
3.08M
    ps_bl = ps_tr + 1;
1316
3.08M
    ps_br = ps_bl + 1;
1317
    /*
1318
    ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
1319
    ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
1320
    ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
1321
    ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
1322
    */
1323
3.08M
    ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
1324
3.08M
    ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
1325
3.08M
    ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
1326
3.08M
    ps_cu_tree_cur_node->ps_child_node_br = ps_br;
1327
1328
3.08M
    return 4;
1329
3.08M
}
1330
1331
/*!
1332
******************************************************************************
1333
* \if Function name : ihevce_cu_tree_init \endif
1334
*
1335
* \brief
1336
*
1337
*
1338
*****************************************************************************
1339
*/
1340
void ihevce_cu_tree_init(
1341
    cur_ctb_cu_tree_t *ps_cu_tree,
1342
    cur_ctb_cu_tree_t *ps_cu_tree_root,
1343
    WORD32 *pi4_nodes_created_in_cu_tree,
1344
    WORD32 tree_depth,
1345
    CU_POS_T e_grandparent_blk_pos,
1346
    CU_POS_T e_parent_blk_pos,
1347
    CU_POS_T e_cur_blk_pos)
1348
12.4M
{
1349
12.4M
    WORD32 cu_pos_x = 0;
1350
12.4M
    WORD32 cu_pos_y = 0;
1351
12.4M
    WORD32 cu_size = 0;
1352
1353
12.4M
    WORD32 children_nodes_required = 1;
1354
12.4M
    WORD32 node_validity = 0;
1355
1356
12.4M
    switch(tree_depth)
1357
12.4M
    {
1358
146k
    case 0:
1359
146k
    {
1360
        /* 64x64 block */
1361
146k
        cu_size = 64;
1362
146k
        cu_pos_x = 0;
1363
146k
        cu_pos_y = 0;
1364
1365
146k
        break;
1366
0
    }
1367
587k
    case 1:
1368
587k
    {
1369
        /* 32x32 block */
1370
587k
        cu_size = 32;
1371
1372
        /* Explanation for logic below - */
1373
        /* * pos_x and pos_y are in units of 8x8 CU's */
1374
        /* * pos_x = 0 for TL and BL children */
1375
        /* * pos_x = 4 for TR and BR children */
1376
        /* * pos_y = 0 for TL and TR children */
1377
        /* * pos_y = 4 for BL and BR children */
1378
587k
        cu_pos_x = (e_cur_blk_pos & 1) << 2;
1379
587k
        cu_pos_y = (e_cur_blk_pos & 2) << 1;
1380
1381
587k
        break;
1382
0
    }
1383
2.34M
    case 2:
1384
2.34M
    {
1385
        /* 16x16 block */
1386
2.34M
        WORD32 cu_pos_x_parent;
1387
2.34M
        WORD32 cu_pos_y_parent;
1388
1389
2.34M
        cu_size = 16;
1390
1391
        /* Explanation for logic below - */
1392
        /* See similar explanation above */
1393
2.34M
        cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
1394
2.34M
        cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
1395
2.34M
        cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
1396
2.34M
        cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
1397
1398
2.34M
        break;
1399
0
    }
1400
9.39M
    case 3:
1401
9.39M
    {
1402
        /* 8x8 block */
1403
9.39M
        WORD32 cu_pos_x_grandparent;
1404
9.39M
        WORD32 cu_pos_y_grandparent;
1405
1406
9.39M
        WORD32 cu_pos_x_parent;
1407
9.39M
        WORD32 cu_pos_y_parent;
1408
1409
9.39M
        cu_size = 8;
1410
1411
9.39M
        cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
1412
9.39M
        cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
1413
9.39M
        cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
1414
9.39M
        cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
1415
9.39M
        cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
1416
9.39M
        cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
1417
1418
9.39M
        children_nodes_required = 0;
1419
1420
9.39M
        break;
1421
0
    }
1422
12.4M
    }
1423
1424
    /* Fill the current cu_tree node */
1425
12.4M
    CU_TREE_NODE_FILL(ps_cu_tree, node_validity, cu_pos_x, cu_pos_y, cu_size, 1);
1426
1427
12.4M
    if(children_nodes_required)
1428
3.08M
    {
1429
3.08M
        tree_depth++;
1430
1431
3.08M
        (*pi4_nodes_created_in_cu_tree) += ihevce_create_cuNode_children(
1432
3.08M
            ps_cu_tree_root, ps_cu_tree, (*pi4_nodes_created_in_cu_tree));
1433
1434
3.08M
        ihevce_cu_tree_init(
1435
3.08M
            ps_cu_tree->ps_child_node_tl,
1436
3.08M
            ps_cu_tree_root,
1437
3.08M
            pi4_nodes_created_in_cu_tree,
1438
3.08M
            tree_depth,
1439
3.08M
            e_parent_blk_pos,
1440
3.08M
            e_cur_blk_pos,
1441
3.08M
            POS_TL);
1442
1443
3.08M
        ihevce_cu_tree_init(
1444
3.08M
            ps_cu_tree->ps_child_node_tr,
1445
3.08M
            ps_cu_tree_root,
1446
3.08M
            pi4_nodes_created_in_cu_tree,
1447
3.08M
            tree_depth,
1448
3.08M
            e_parent_blk_pos,
1449
3.08M
            e_cur_blk_pos,
1450
3.08M
            POS_TR);
1451
1452
3.08M
        ihevce_cu_tree_init(
1453
3.08M
            ps_cu_tree->ps_child_node_bl,
1454
3.08M
            ps_cu_tree_root,
1455
3.08M
            pi4_nodes_created_in_cu_tree,
1456
3.08M
            tree_depth,
1457
3.08M
            e_parent_blk_pos,
1458
3.08M
            e_cur_blk_pos,
1459
3.08M
            POS_BL);
1460
1461
3.08M
        ihevce_cu_tree_init(
1462
3.08M
            ps_cu_tree->ps_child_node_br,
1463
3.08M
            ps_cu_tree_root,
1464
3.08M
            pi4_nodes_created_in_cu_tree,
1465
3.08M
            tree_depth,
1466
3.08M
            e_parent_blk_pos,
1467
3.08M
            e_cur_blk_pos,
1468
3.08M
            POS_BR);
1469
3.08M
    }
1470
9.39M
    else
1471
9.39M
    {
1472
9.39M
        NULLIFY_THE_CHILDREN_NODES(ps_cu_tree);
1473
9.39M
    }
1474
12.4M
}