Coverage Report

Created: 2023-09-25 07:43

/src/libhevc/encoder/ihevce_decomp_pre_intra_pass.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_decomp_pre_intra_pass.c
24
*
25
* \brief
26
*    This file contains definitions related to frame decomposition done during
27
*    pre intra processing
28
*
29
* \date
30
*    19/02/2013
31
*
32
* \author
33
*    Ittiam
34
*
35
* List of Functions
36
*    ihevce_intra_populate_mode_bits_cost()
37
*    ihevce_8x8_sad_computer()
38
*    ihevce_4x4_sad_computer()
39
*    ihevce_ed_4x4_find_best_modes()
40
*    ihevce_ed_calc_4x4_blk()
41
*    ihevce_ed_calc_8x8_blk()
42
*    ihevce_ed_calc_incomplete_ctb()
43
*    ihevce_cu_level_qp_mod()
44
*    ihevce_ed_calc_ctb()
45
*    ihevce_ed_frame_init()
46
*    ihevce_scale_by_2()
47
*    ihevce_decomp_pre_intra_process_row()
48
*    ihevce_decomp_pre_intra_process()
49
*    ihevce_decomp_pre_intra_get_num_mem_recs()
50
*    ihevce_decomp_pre_intra_get_mem_recs()
51
*    ihevce_decomp_pre_intra_init()
52
*    ihevce_decomp_pre_intra_frame_init()
53
*    ihevce_merge_sort()
54
*    ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit()
55
*
56
******************************************************************************
57
*/
58
59
/*****************************************************************************/
60
/* File Includes                                                             */
61
/*****************************************************************************/
62
/* System include files */
63
#include <stdio.h>
64
#include <string.h>
65
#include <stdlib.h>
66
#include <assert.h>
67
#include <stdarg.h>
68
#include <stdint.h>
69
#include <math.h>
70
#include <limits.h>
71
72
/* User include files */
73
#include "ihevc_typedefs.h"
74
#include "itt_video_api.h"
75
#include "ihevce_api.h"
76
77
#include "rc_cntrl_param.h"
78
#include "rc_frame_info_collector.h"
79
#include "rc_look_ahead_params.h"
80
81
#include "ihevc_defs.h"
82
#include "ihevc_debug.h"
83
#include "ihevc_structs.h"
84
#include "ihevc_platform_macros.h"
85
#include "ihevc_deblk.h"
86
#include "ihevc_itrans_recon.h"
87
#include "ihevc_chroma_itrans_recon.h"
88
#include "ihevc_chroma_intra_pred.h"
89
#include "ihevc_intra_pred.h"
90
#include "ihevc_inter_pred.h"
91
#include "ihevc_mem_fns.h"
92
#include "ihevc_padding.h"
93
#include "ihevc_weighted_pred.h"
94
#include "ihevc_sao.h"
95
#include "ihevc_resi_trans.h"
96
#include "ihevc_quant_iquant_ssd.h"
97
#include "ihevc_cabac_tables.h"
98
99
#include "ihevce_defs.h"
100
#include "ihevce_hle_interface.h"
101
#include "ihevce_lap_enc_structs.h"
102
#include "ihevce_multi_thrd_structs.h"
103
#include "ihevce_multi_thrd_funcs.h"
104
#include "ihevce_me_common_defs.h"
105
#include "ihevce_had_satd.h"
106
#include "ihevce_error_codes.h"
107
#include "ihevce_bitstream.h"
108
#include "ihevce_cabac.h"
109
#include "ihevce_rdoq_macros.h"
110
#include "ihevce_function_selector.h"
111
#include "ihevce_enc_structs.h"
112
#include "ihevce_entropy_structs.h"
113
#include "ihevce_cmn_utils_instr_set_router.h"
114
#include "ihevce_ipe_instr_set_router.h"
115
#include "ihevce_decomp_pre_intra_structs.h"
116
#include "ihevce_decomp_pre_intra_pass.h"
117
#include "ihevce_enc_loop_structs.h"
118
#include "hme_datatype.h"
119
#include "hme_interface.h"
120
#include "hme_common_defs.h"
121
#include "ihevce_global_tables.h"
122
123
/*****************************************************************************/
124
/* Global variables                                                          */
125
/*****************************************************************************/
126
127
/**
128
*****************************************************************************
129
* @brief subset of intra modes to be evaluated during pre enc intra process
130
*****************************************************************************
131
*/
132
static const UWORD8 gau1_modes_to_eval[11] = { 0, 1, 26, 2, 6, 10, 14, 18, 22, 30, 34 };
133
134
/**
135
*****************************************************************************
136
* @brief  list of pointers to luma intra pred functions
137
*****************************************************************************
138
*/
139
pf_intra_pred g_apf_lum_ip[NUM_IP_FUNCS];
140
141
/*****************************************************************************/
142
/* Function Definitions                                                      */
143
/*****************************************************************************/
144
145
/*!
146
******************************************************************************
147
* \if Function name : ihevce_intra_populate_mode_bits_cost \endif
148
*
149
* \brief: look-up table of cost of signalling an intra mode in the
150
*  bitstream
151
*
152
*****************************************************************************
153
*/
154
static void ihevce_intra_populate_mode_bits_cost(UWORD16 *mode_bits_cost, WORD32 lambda)
155
0
{
156
0
    WORD32 i;
157
    // 5.5 * lambda
158
0
    UWORD16 five_bits_cost = COMPUTE_RATE_COST_CLIP30(11, lambda, (LAMBDA_Q_SHIFT + 1));
159
160
0
    for(i = 0; i < NUM_MODES; i++)
161
0
    {
162
0
        mode_bits_cost[i] = five_bits_cost;
163
0
    }
164
0
}
165
166
/*!
167
******************************************************************************
168
* \if Function name : ihevce_8x8_sad_computer \endif
169
*
170
* \brief: compute sad between 2 8x8 blocks
171
*
172
*****************************************************************************
173
*/
174
UWORD16 ihevce_8x8_sad_computer(UWORD8 *src, UWORD8 *pred, WORD32 src_strd, WORD32 pred_strd)
175
0
{
176
0
    UWORD16 sad = 0;
177
0
    WORD32 i, j;
178
179
0
    for(i = 0; i < 8; i++)
180
0
    {
181
0
        for(j = 0; j < 8; j++)
182
0
        {
183
0
            sad += ABS(src[j] - pred[j]);
184
0
        }
185
0
        src += src_strd;
186
0
        pred += pred_strd;
187
0
    }
188
189
0
    return sad;
190
0
}
191
192
/*!
193
******************************************************************************
194
* \if Function name : ihevce_4x4_sad_computer \endif
195
*
196
* \brief: compute sad between 2 4x4 blocks
197
*
198
*****************************************************************************
199
*/
200
UWORD16 ihevce_4x4_sad_computer(UWORD8 *src, UWORD8 *pred, WORD32 src_strd, WORD32 pred_strd)
201
0
{
202
0
    UWORD16 sad = 0;
203
0
    WORD32 i, j;
204
205
0
    for(i = 0; i < 4; i++)
206
0
    {
207
0
        for(j = 0; j < 4; j++)
208
0
        {
209
0
            sad += ABS(src[j] - pred[j]);
210
0
        }
211
0
        src += src_strd;
212
0
        pred += pred_strd;
213
0
    }
214
215
0
    return sad;
216
0
}
217
218
/*!
219
******************************************************************************
220
* \if Function name : ihevce_ed_4x4_find_best_modes \endif
221
*
222
* \brief: evaluate input 4x4 block for pre-selected list intra modes and
223
* return best sad, cost
224
*
225
*****************************************************************************
226
*/
227
void ihevce_ed_4x4_find_best_modes(
228
    UWORD8 *pu1_src,
229
    WORD32 src_stride,
230
    UWORD8 *ref,
231
    UWORD16 *mode_bits_cost,
232
    UWORD8 *pu1_best_modes,
233
    WORD32 *pu1_best_sad_costs,
234
    WORD32 u1_low_resol,
235
    FT_SAD_COMPUTER *pf_4x4_sad_computer)
236
0
{
237
0
    WORD32 i;
238
0
    UWORD8 mode = 0, best_amode = 0, best_nmode = 0;
239
0
    UWORD8 pred[16];
240
0
    WORD32 sad = 0;
241
0
    WORD32 sad_cost = 0;
242
0
    WORD32 best_asad_cost = 0xFFFFF;
243
0
    WORD32 best_nsad_cost = 0xFFFFF;
244
245
    /* If lower layers, l1 or l2, all the 11 modes are evaluated */
246
    /* If L0 layer, all modes excluding DC and Planar are evaluated */
247
0
    if(1 == u1_low_resol)
248
0
        i = 0;
249
0
    else
250
0
        i = 2;
251
252
    /* Find the best non-angular and angular mode till level 4 */
253
0
    for(; i < 11; i++)
254
0
    {
255
0
        mode = gau1_modes_to_eval[i];
256
0
        g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
257
0
        sad = pf_4x4_sad_computer(pu1_src, pred, src_stride, 4);
258
0
        sad_cost = sad + mode_bits_cost[mode];
259
0
        if(mode < 2)
260
0
        {
261
0
            if(sad_cost < best_nsad_cost)
262
0
            {
263
0
                best_nmode = mode;
264
0
                best_nsad_cost = sad_cost;
265
0
            }
266
0
        }
267
0
        else
268
0
        {
269
0
            if(sad_cost < best_asad_cost)
270
0
            {
271
0
                best_amode = mode;
272
0
                best_asad_cost = sad_cost;
273
0
            }
274
0
        }
275
0
    }
276
277
0
    pu1_best_modes[0] = best_amode;
278
0
    pu1_best_sad_costs[0] = best_asad_cost;
279
280
0
    if(1 == u1_low_resol)
281
0
    {
282
0
        pu1_best_modes[1] = best_nmode;
283
0
        pu1_best_sad_costs[1] = best_nsad_cost;
284
0
    }
285
0
}
286
287
/*!
288
******************************************************************************
289
* \if Function name : ihevce_ed_calc_4x4_blk \endif
290
*
291
* \brief: evaluate input 4x4 block for all intra modes and return best sad &
292
*  cost
293
*
294
*****************************************************************************
295
*/
296
static void ihevce_ed_calc_4x4_blk(
297
    ihevce_ed_blk_t *ps_ed,
298
    UWORD8 *pu1_src,
299
    WORD32 src_stride,
300
    UWORD8 *ref,
301
    UWORD16 *mode_bits_cost,
302
    WORD32 *pi4_best_satd,
303
    WORD32 i4_quality_preset,
304
    WORD32 *pi4_best_sad_cost,
305
    ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list)
306
0
{
307
0
    WORD32 i, i_end;
308
0
    UWORD8 mode, best_amode, best_nmode;
309
0
    UWORD8 pred[16];
310
0
    UWORD16 sad;
311
0
    WORD32 sad_cost = 0;
312
0
    WORD32 best_asad_cost = 0xFFFFF;
313
0
    WORD32 best_nsad_cost = 0xFFFFF;
314
0
    UWORD8 au1_best_modes[2];
315
0
    WORD32 ai4_best_sad_costs[2];
316
    /* L1/L2 resolution hence low resolution enable */
317
0
    const WORD32 u1_low_resol = 1;
318
0
    UWORD8 modes_to_eval[2];
319
320
0
    ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
321
0
        pu1_src,
322
0
        src_stride,
323
0
        ref,
324
0
        mode_bits_cost,
325
0
        au1_best_modes,
326
0
        ai4_best_sad_costs,
327
0
        u1_low_resol,
328
0
        ps_ipe_optimised_function_list->pf_4x4_sad_computer);
329
330
0
    best_nmode = au1_best_modes[1];
331
0
    best_amode = au1_best_modes[0];
332
0
    best_nsad_cost = ai4_best_sad_costs[1];
333
0
    best_asad_cost = ai4_best_sad_costs[0];
334
0
    *pi4_best_satd = best_asad_cost - mode_bits_cost[best_amode];
335
336
    /* Around best level 4 angular mode, search for best level 2 mode */
337
0
    modes_to_eval[0] = best_amode - 2;
338
0
    modes_to_eval[1] = best_amode + 2;
339
0
    i = 0;
340
0
    i_end = 2;
341
0
    if(best_amode == 2)
342
0
        i = 1;
343
0
    else if(best_amode == 34)
344
0
        i_end = 1;
345
0
    for(; i < i_end; i++)
346
0
    {
347
0
        mode = modes_to_eval[i];
348
0
        g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
349
0
        sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, pred, src_stride, 4);
350
0
        sad_cost = sad + mode_bits_cost[mode];
351
0
        if(sad_cost < best_asad_cost)
352
0
        {
353
0
            best_amode = mode;
354
0
            best_asad_cost = sad_cost;
355
0
            *pi4_best_satd = sad;
356
0
        }
357
0
    }
358
359
0
    if(i4_quality_preset < IHEVCE_QUALITY_P4)
360
0
    {
361
        /* Around best level 2 angular mode, search for best level 1 mode */
362
0
        modes_to_eval[0] = best_amode - 1;
363
0
        modes_to_eval[1] = best_amode + 1;
364
0
        i = 0;
365
0
        i_end = 2;
366
0
        if(best_amode == 2)
367
0
            i = 1;
368
0
        else if(best_amode == 34)
369
0
            i_end = 1;
370
0
        for(; i < i_end; i++)
371
0
        {
372
0
            mode = modes_to_eval[i];
373
0
            g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
374
0
            sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, pred, src_stride, 4);
375
0
            sad_cost = sad + mode_bits_cost[mode];
376
0
            if(sad_cost < best_asad_cost)
377
0
            {
378
0
                best_amode = mode;
379
0
                best_asad_cost = sad_cost;
380
0
                *pi4_best_satd = sad;
381
0
            }
382
0
        }
383
0
    }
384
385
0
    if(best_asad_cost < best_nsad_cost)
386
0
    {
387
0
        ps_ed->best_mode = best_amode;
388
0
        *pi4_best_sad_cost = best_asad_cost;
389
0
    }
390
0
    else
391
0
    {
392
0
        ps_ed->best_mode = best_nmode;
393
0
        *pi4_best_sad_cost = best_nsad_cost;
394
0
    }
395
0
    ps_ed->intra_or_inter = 0;
396
0
    ps_ed->merge_success = 0;
397
0
}
398
399
/*!
400
******************************************************************************
401
* \if Function name : ihevce_ed_calc_8x8_blk \endif
402
*
403
* \brief: evaluate input 8x8 block for intra modes basing on the intra mode
404
*  decisions made at 4x4 level. This function also makes a decision whether
405
*  to split blk in to 4x4 partitions or not.
406
*
407
*****************************************************************************
408
*/
409
static void ihevce_ed_calc_8x8_blk(
410
    ihevce_ed_ctxt_t *ps_ed_ctxt,
411
    ihevce_ed_blk_t *ps_ed_8x8,
412
    UWORD8 *pu1_src,
413
    WORD32 src_stride,
414
    WORD32 *nbr_flags_ptr,
415
    WORD32 lambda,
416
    WORD32 *pi4_best_satd,
417
    WORD32 i4_layer_id,
418
    WORD32 i4_quality_preset,
419
    WORD32 *pi4_best_sad_cost_8x8_l1_ipe,
420
    WORD32 *pi4_best_sad_8x8_l1_ipe,
421
    ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
422
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
423
0
{
424
0
    ihevce_ed_blk_t *ps_ed_4x4 = ps_ed_8x8;
425
0
    UWORD8 *pu1_src_arr[4];
426
0
    WORD32 ai4_4x4_best_sad_cost[4];
427
0
    WORD32 nbr_flags_c, nbr_flags_r;
428
0
    UWORD8 *pu1_src_4x4;
429
0
    WORD32 i, j;
430
0
    func_selector_t *ps_func_selector = ps_ed_ctxt->ps_func_selector;
431
0
    ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution =
432
0
        ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
433
434
    /* linearize ref samples for ipe of 8x8 block */
435
0
    nbr_flags_c = nbr_flags_ptr[0];
436
0
    nbr_flags_r = nbr_flags_ptr[1];
437
0
    if(CHECK_TR_AVAILABLE(nbr_flags_r))
438
0
    {
439
0
        SET_TR_AVAILABLE(nbr_flags_c);
440
0
    }
441
0
    else
442
0
    {
443
0
        SET_TR_UNAVAILABLE(nbr_flags_c);
444
0
    }
445
446
0
    pf_intra_pred_luma_ref_substitution(
447
0
        pu1_src - src_stride - 1,
448
0
        pu1_src - src_stride,
449
0
        pu1_src - 1,
450
0
        src_stride,
451
0
        8,
452
0
        nbr_flags_c,
453
0
        &ps_ed_ctxt->au1_ref_8x8[0][0],
454
0
        0);
455
456
0
    for(i = 0; i < 2; i++)
457
0
    {
458
0
        pu1_src_4x4 = pu1_src + i * 4 * src_stride;
459
0
        for(j = 0; j < 2; j++)
460
0
        {
461
0
            WORD32 i4_best_satd;
462
463
0
            pu1_src_arr[i * 2 + j] = pu1_src_4x4;
464
0
            nbr_flags_c = nbr_flags_ptr[i * 8 + j];
465
466
            /* linearize ref samples for ipe of 4x4 block */
467
0
            pf_intra_pred_luma_ref_substitution(
468
0
                pu1_src_4x4 - src_stride - 1,
469
0
                pu1_src_4x4 - src_stride,
470
0
                pu1_src_4x4 - 1,
471
0
                src_stride,
472
0
                4,
473
0
                nbr_flags_c,
474
0
                &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0],
475
0
                0);
476
477
            /* populates mode bits cost */
478
0
            ihevce_intra_populate_mode_bits_cost(
479
0
                &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0], lambda);
480
481
0
            ihevce_ed_calc_4x4_blk(
482
0
                ps_ed_4x4,
483
0
                pu1_src_4x4,
484
0
                src_stride,
485
0
                &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0],
486
0
                &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0],
487
0
                &i4_best_satd,
488
0
                i4_quality_preset,
489
0
                &ai4_4x4_best_sad_cost[i * 2 + j],
490
0
                ps_ipe_optimised_function_list);
491
492
0
            pu1_src_4x4 += 4;
493
0
            ps_ed_4x4 += 1;
494
0
        }
495
0
    }
496
497
    /* 8x8 merge */
498
0
    {
499
0
        UWORD8 pred[64];
500
0
        WORD32 merge_success;
501
0
        WORD32 sad, satd, cost;
502
0
        UWORD16 u2_sum_best_4x4_sad_cost = 0;
503
0
        UWORD16 u2_sum_best_4x4_satd_cost = 0;
504
0
        WORD32 i4_best_8x8_sad, i4_best_8x8_satd = 0;
505
0
        UWORD16 u2_best_8x8_cost = (UWORD16)(-1);
506
0
        UWORD8 u1_best_8x8_mode;
507
0
        UWORD8 modes_to_eval[6];
508
0
        UWORD8 u1_cond_4x4_satd;
509
0
        UWORD8 mode;
510
511
        /* init */
512
0
        ps_ed_4x4 = ps_ed_8x8;
513
0
        u1_best_8x8_mode = mode = ps_ed_4x4[0].best_mode;
514
0
        merge_success =
515
0
            (((ps_ed_4x4[0].best_mode == ps_ed_4x4[1].best_mode) +
516
0
              (ps_ed_4x4[0].best_mode == ps_ed_4x4[2].best_mode) +
517
0
              (ps_ed_4x4[0].best_mode == ps_ed_4x4[3].best_mode)) == 3);
518
0
        *pi4_best_satd = 0;
519
520
0
        for(i = 0; i < 4; i++)
521
0
        {
522
0
            u2_sum_best_4x4_sad_cost += ai4_4x4_best_sad_cost[i];
523
0
            modes_to_eval[i] = ps_ed_4x4[i].best_mode;
524
0
        }
525
526
0
        u1_cond_4x4_satd = ((1 == i4_layer_id) || (!merge_success && i4_quality_preset < IHEVCE_QUALITY_P4));
527
0
        if(u1_cond_4x4_satd)
528
0
        {
529
            /* Get SATD for 4x4 blocks */
530
0
            for(i = 0; i < 4; i++)
531
0
            {
532
0
                mode = modes_to_eval[i];
533
0
                g_apf_lum_ip[g_i4_ip_funcs[mode]](
534
0
                    &ps_ed_ctxt->au1_ref_full_ctb[i][0], 0, &pred[0], 4, 4, mode);
535
536
0
                satd = ps_cmn_utils_optimised_function_list->pf_HAD_4x4_8bit(
537
0
                    pu1_src_arr[i], src_stride, &pred[0], 4, NULL, 0);
538
539
0
                (ps_ed_4x4 + i)->i4_4x4_satd = satd;
540
541
0
                u2_sum_best_4x4_satd_cost +=
542
0
                    (satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
543
0
                *pi4_best_satd += satd;
544
0
            }
545
0
        }
546
547
0
        if(!merge_success)
548
0
        {
549
0
            UWORD8 i1_start; /* no of modes to evaluate */
550
0
            UWORD8 ai1_modes[6];
551
0
            WORD32 i4_merge_success_stage2 = 0;
552
553
            /* Prepare 6 candidates for 8x8 block. Two are DC and planar */
554
0
            ai1_modes[4] = 0;
555
0
            ai1_modes[5] = 1;
556
0
            i1_start = 4;
557
558
            /* Assign along with removing duplicates rest 4 candidates. */
559
0
            for(i = 3; i >= 0; i--)
560
0
            {
561
0
                WORD8 i1_fresh_mode_flag = 1;
562
563
0
                mode = modes_to_eval[i];
564
                /* Check if duplicate already exists in ai1_modes */
565
0
                for(j = i1_start; j < 6; j++)
566
0
                {
567
0
                    if(mode == ai1_modes[j])
568
0
                        i1_fresh_mode_flag = 0;
569
0
                }
570
0
                if(i1_fresh_mode_flag)
571
0
                {
572
0
                    i1_start--;
573
0
                    ai1_modes[i1_start] = mode;
574
0
                }
575
0
            }
576
577
0
            if(i4_quality_preset < IHEVCE_QUALITY_P4)
578
0
            {
579
                // 7.5 * lambda to incorporate transform flags
580
0
                u2_sum_best_4x4_satd_cost +=
581
0
                    (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)));
582
583
                /* loop over all modes for calculating SATD */
584
0
                for(i = i1_start; i < 6; i++)
585
0
                {
586
0
                    mode = ai1_modes[i];
587
0
                    g_apf_lum_ip[g_i4_ip_funcs[mode]](
588
0
                        &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode);
589
590
0
                    satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
591
0
                        pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0);
592
593
0
                    cost = satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode];
594
595
                    /* Update data corresponding to least 8x8 cost */
596
0
                    if(cost <= u2_best_8x8_cost)
597
0
                    {
598
0
                        u2_best_8x8_cost = cost;
599
0
                        i4_best_8x8_satd = satd;
600
0
                        u1_best_8x8_mode = mode;
601
0
                    }
602
0
                }
603
604
                /* 8x8 vs 4x4 decision based on SATD values */
605
0
                if((u2_best_8x8_cost <= u2_sum_best_4x4_satd_cost) || (u2_best_8x8_cost <= 300))
606
0
                {
607
0
                    i4_merge_success_stage2 = 1;
608
0
                }
609
610
                /* Find the SAD based cost for 8x8 block for best mode */
611
0
                if(1 == i4_layer_id)
612
0
                {
613
0
                    UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
614
0
                    WORD32 i4_best_8x8_sad_curr;
615
616
0
                    g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]](
617
0
                        &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, i4_best_8x8_mode);
618
619
0
                    i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
620
0
                        pu1_src_arr[0], &pred[0], src_stride, 8);
621
622
0
                    *pi4_best_sad_cost_8x8_l1_ipe =
623
0
                        i4_best_8x8_sad_curr +
624
0
                        ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode];
625
0
                    *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
626
0
                }
627
0
            }
628
0
            else /*If high_speed or extreme speed*/
629
0
            {
630
                // 7.5 * lambda to incorporate transform flags
631
0
                u2_sum_best_4x4_sad_cost +=
632
0
                    (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)));
633
634
                /*Loop over all modes for calculating SAD*/
635
0
                for(i = i1_start; i < 6; i++)
636
0
                {
637
0
                    mode = ai1_modes[i];
638
0
                    g_apf_lum_ip[g_i4_ip_funcs[mode]](
639
0
                        &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode);
640
641
0
                    sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
642
0
                        pu1_src_arr[0], &pred[0], src_stride, 8);
643
644
0
                    cost = sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode];
645
646
                    /*Find the data correspoinding to least cost */
647
0
                    if(cost <= u2_best_8x8_cost)
648
0
                    {
649
0
                        u2_best_8x8_cost = cost;
650
0
                        i4_best_8x8_sad = sad;
651
0
                        u1_best_8x8_mode = mode;
652
0
                    }
653
0
                }
654
655
                /* 8x8 vs 4x4 decision based on SAD values */
656
0
                if((u2_best_8x8_cost <= u2_sum_best_4x4_sad_cost) || (u2_best_8x8_cost <= 300))
657
0
                {
658
0
                    i4_merge_success_stage2 = 1;
659
0
                    if(1 == i4_layer_id)
660
0
                    {
661
0
                        g_apf_lum_ip[g_i4_ip_funcs[u1_best_8x8_mode]](
662
0
                            &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, u1_best_8x8_mode);
663
0
                        i4_best_8x8_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
664
0
                            pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0);
665
0
                    }
666
0
                }
667
668
0
                if(1 == i4_layer_id)
669
0
                {
670
0
                    *pi4_best_sad_cost_8x8_l1_ipe = u2_best_8x8_cost;
671
0
                    *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad;
672
0
                }
673
0
            }
674
0
            if(i4_merge_success_stage2)
675
0
            {
676
0
                ps_ed_4x4->merge_success = 1;
677
0
                ps_ed_4x4->best_merge_mode = u1_best_8x8_mode;
678
0
                *pi4_best_satd = i4_best_8x8_satd;
679
0
            }
680
0
        }
681
0
        else
682
0
        {
683
0
            ps_ed_4x4->merge_success = 1;
684
0
            ps_ed_4x4->best_merge_mode = u1_best_8x8_mode;
685
686
0
            if(1 == i4_layer_id)
687
0
            {
688
0
                mode = u1_best_8x8_mode;
689
0
                g_apf_lum_ip[g_i4_ip_funcs[mode]](
690
0
                    &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode);
691
692
0
                i4_best_8x8_sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
693
0
                    pu1_src_arr[0], &pred[0], src_stride, 8);
694
695
0
                *pi4_best_sad_cost_8x8_l1_ipe =
696
0
                    i4_best_8x8_sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode];
697
0
                *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad;
698
699
0
                i4_best_8x8_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
700
0
                    pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0);
701
0
            }
702
0
            *pi4_best_satd = i4_best_8x8_satd;
703
0
        }
704
0
    }
705
0
}
706
707
/*!
708
******************************************************************************
709
* \if Function name : ihevce_ed_calc_ctb \endif
710
*
711
* \brief: performs L1/L2 8x8 and 4x4 intra mode analysis
712
*
713
*****************************************************************************
714
*/
715
void ihevce_ed_calc_ctb(
716
    ihevce_ed_ctxt_t *ps_ed_ctxt,
717
    ihevce_ed_blk_t *ps_ed_ctb,
718
    ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
719
    UWORD8 *pu1_src,
720
    WORD32 src_stride,
721
    WORD32 num_4x4_blks_x,
722
    WORD32 num_4x4_blks_y,
723
    WORD32 *nbr_flags,
724
    WORD32 i4_layer_id,
725
    ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
726
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
727
0
{
728
0
    ihevce_ed_blk_t *ps_ed_8x8;
729
0
    UWORD8 *pu1_src_8x8;
730
0
    WORD32 *nbr_flags_ptr;
731
0
    WORD32 lambda = ps_ed_ctxt->lambda;
732
0
    WORD32 i, j;
733
0
    WORD32 z_scan_idx = 0;
734
0
    WORD32 z_scan_act_idx = 0;
735
736
0
    if(i4_layer_id == 1)
737
0
    {
738
0
        WORD32 i4_i;
739
740
0
        for(i4_i = 0; i4_i < 64; i4_i++)
741
0
        {
742
0
            (ps_ed_ctb + i4_i)->i4_4x4_satd = -1;
743
0
        }
744
745
0
        for(i4_i = 0; i4_i < 16; i4_i++)
746
0
        {
747
0
            ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2;
748
0
            ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF;
749
0
            ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2;
750
0
            ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2;
751
0
        }
752
753
0
        for(i4_i = 0; i4_i < 4; i4_i++)
754
0
        {
755
0
            ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2;
756
0
            ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2;
757
0
            ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2;
758
0
        }
759
0
        ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2;
760
0
        ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2;
761
0
        ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2;
762
0
        ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2;
763
764
0
        for(i4_i = 0; i4_i < 16; i4_i++)
765
0
        {
766
0
            ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -1;
767
0
            ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -1;
768
0
            ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -1;
769
0
            ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -1;
770
771
0
            ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -1;
772
773
0
            ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -1;
774
0
            ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -1;
775
0
            ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -1;
776
0
        }
777
0
    }
778
779
0
    ASSERT((num_4x4_blks_x & 1) == 0);
780
0
    ASSERT((num_4x4_blks_y & 1) == 0);
781
0
    for(i = 0; i < num_4x4_blks_y / 2; i++)
782
0
    {
783
0
        pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride;
784
0
        nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i;
785
786
0
        for(j = 0; j < num_4x4_blks_x / 2; j++)
787
0
        {
788
0
            WORD32 i4_best_satd;
789
0
            WORD32 i4_best_sad_cost_8x8_l1_ipe;
790
0
            WORD32 i4_best_sad_8x8_l1_ipe;
791
792
0
            z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2];
793
0
            z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
794
0
            ASSERT(z_scan_act_idx <= 15);
795
796
0
            ps_ed_8x8 = ps_ed_ctb + z_scan_idx;
797
0
            ihevce_ed_calc_8x8_blk(
798
0
                ps_ed_ctxt,
799
0
                ps_ed_8x8,
800
0
                pu1_src_8x8,
801
0
                src_stride,
802
0
                nbr_flags_ptr,
803
0
                lambda,
804
0
                &i4_best_satd,
805
0
                i4_layer_id,
806
0
                ps_ed_ctxt->i4_quality_preset,
807
0
                &i4_best_sad_cost_8x8_l1_ipe,
808
0
                &i4_best_sad_8x8_l1_ipe,
809
0
                ps_ipe_optimised_function_list,
810
0
                ps_cmn_utils_optimised_function_list);
811
0
            ASSERT(i4_best_satd >= 0);
812
813
0
            if(i4_layer_id == 1)
814
0
            {
815
0
                ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] =
816
0
                    i4_best_sad_cost_8x8_l1_ipe;
817
0
                ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe;
818
0
                ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd;
819
0
                ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
820
0
                ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
821
0
            }
822
0
            pu1_src_8x8 += 8;
823
0
            nbr_flags_ptr += 2;
824
0
        }
825
0
    }
826
0
}
827
828
float fast_log2(float val)
829
0
{
830
0
    union { float val; int32_t x; } u = { val };
831
0
    float log_2 = (float)(((u.x >> 23) & 255) - 128);
832
833
0
    u.x &= ~(255 << 23);
834
0
    u.x += 127 << 23;
835
0
    log_2 += ((-1.0f / 3) * u.val + 2) * u.val - 2.0f / 3;
836
0
    return log_2;
837
0
}
838
839
/*!
840
******************************************************************************
841
* \if Function name : ihevce_cu_level_qp_mod \endif
842
*
843
* \brief: Performs CU level QP modulation
844
*
845
*****************************************************************************
846
*/
847
WORD32 ihevce_cu_level_qp_mod(
848
    WORD32 frm_qscale,
849
    WORD32 cu_satd,
850
    long double frm_avg_activity,
851
    float f_mod_strength,
852
    WORD32 *pi4_act_factor,
853
    WORD32 *pi4_q_scale_mod,
854
    rc_quant_t *rc_quant_ctxt)
855
0
{
856
0
    WORD32 cu_qscale;
857
0
    WORD32 cu_qp;
858
859
0
    *pi4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR);
860
0
    if(cu_satd != -1 && (WORD32)frm_avg_activity != 0)
861
0
    {
862
0
        ULWORD64 sq_cur_satd = (cu_satd * cu_satd);
863
0
        float log2_sq_cur_satd = fast_log2(1 + sq_cur_satd);
864
0
        WORD32 qp_offset = f_mod_strength * (log2_sq_cur_satd - frm_avg_activity);
865
866
0
        ASSERT(USE_SQRT_AVG_OF_SATD_SQR);
867
0
        qp_offset = CLIP3(qp_offset, MIN_QP_MOD_OFFSET, MAX_QP_MOD_OFFSET);
868
0
        *pi4_act_factor *= gad_look_up_activity[qp_offset + ABS(MIN_QP_MOD_OFFSET)];
869
0
        ASSERT(*pi4_act_factor > 0);
870
0
        cu_qscale = ((frm_qscale * (*pi4_act_factor)) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1)));
871
0
        cu_qscale >>= QP_LEVEL_MOD_ACT_FACTOR;
872
0
    }
873
0
    else
874
0
    {
875
0
        cu_qscale = frm_qscale;
876
0
    }
877
0
    cu_qscale = CLIP3(cu_qscale, rc_quant_ctxt->i2_min_qscale, rc_quant_ctxt->i2_max_qscale);
878
0
    cu_qp = rc_quant_ctxt->pi4_qscale_to_qp[cu_qscale];
879
0
    cu_qp = CLIP3(cu_qp, rc_quant_ctxt->i2_min_qp, rc_quant_ctxt->i2_max_qp);
880
0
    *pi4_q_scale_mod = cu_qscale;
881
882
0
    return (cu_qp);
883
0
}
884
885
/*!
886
******************************************************************************
887
* \if Function name : ihevce_ed_frame_init \endif
888
*
889
* \brief: Initialize frame context for early decision
890
*
891
*****************************************************************************
892
*/
893
void ihevce_ed_frame_init(void *pv_ed_ctxt, WORD32 i4_layer_no)
894
0
{
895
0
    ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
896
897
0
    g_apf_lum_ip[IP_FUNC_MODE_0] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
898
0
    g_apf_lum_ip[IP_FUNC_MODE_1] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
899
0
    g_apf_lum_ip[IP_FUNC_MODE_2] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
900
0
    g_apf_lum_ip[IP_FUNC_MODE_3TO9] =
901
0
        ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
902
0
    g_apf_lum_ip[IP_FUNC_MODE_10] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
903
0
    g_apf_lum_ip[IP_FUNC_MODE_11TO17] =
904
0
        ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
905
0
    g_apf_lum_ip[IP_FUNC_MODE_18_34] =
906
0
        ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
907
0
    g_apf_lum_ip[IP_FUNC_MODE_19TO25] =
908
0
        ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
909
0
    g_apf_lum_ip[IP_FUNC_MODE_26] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
910
0
    g_apf_lum_ip[IP_FUNC_MODE_27TO33] =
911
0
        ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
912
913
0
    if(i4_layer_no == 1)
914
0
    {
915
0
        ps_ed_ctxt->i8_sum_best_satd = 0;
916
0
        ps_ed_ctxt->i8_sum_sq_best_satd = 0;
917
0
    }
918
0
}
919
920
/**
921
********************************************************************************
922
*
923
*  @brief  downscales by 2 in horz and vertical direction, creates output of
924
*          size wd/2 * ht/2
925
*
926
*  @param[in]  pu1_src : source pointer
927
*  @param[in]  src_stride : source stride
928
*  @param[out] pu1_dst : destination pointer. Starting of a row.
929
*  @param[in]  dst_stride : destination stride
930
*  @param[in]  wd : width
931
*  @param[in]  ht : height
932
*  @param[in]  pu1_wkg_mem : working memory (atleast of size CEIL16(wd) * ht))
933
*  @param[in]  ht_offset : height offset of the block to be scaled
934
*  @param[in]  block_ht : height of the block to be scaled
935
*  @param[in]  wd_offset : width offset of the block to be scaled
936
*  @param[in]  block_wd : width of the block to be scaled
937
*
938
*  @return void
939
*
940
*  @remarks Assumption made block_ht should me multiple of 2. LANCZOS_SCALER
941
*
942
********************************************************************************
943
*/
944
void ihevce_scaling_filter_mxn(
945
    UWORD8 *pu1_src,
946
    WORD32 src_strd,
947
    UWORD8 *pu1_scrtch,
948
    WORD32 scrtch_strd,
949
    UWORD8 *pu1_dst,
950
    WORD32 dst_strd,
951
    WORD32 ht,
952
    WORD32 wd)
953
0
{
954
0
#define FILT_TAP_Q 8
955
0
#define N_TAPS 7
956
0
    const WORD16 i4_ftaps[N_TAPS] = { -18, 0, 80, 132, 80, 0, -18 };
957
0
    WORD32 i, j;
958
0
    WORD32 tmp;
959
0
    UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd;
960
0
    UWORD8 *pu1_scrtch_tmp = pu1_scrtch;
961
962
    /* horizontal filtering */
963
0
    for(i = -3; i < ht + 2; i++)
964
0
    {
965
0
        for(j = 0; j < wd; j += 2)
966
0
        {
967
0
            tmp = (i4_ftaps[3] * pu1_src_tmp[j] +
968
0
                   i4_ftaps[2] * (pu1_src_tmp[j - 1] + pu1_src_tmp[j + 1]) +
969
0
                   i4_ftaps[1] * (pu1_src_tmp[j + 2] + pu1_src_tmp[j - 2]) +
970
0
                   i4_ftaps[0] * (pu1_src_tmp[j + 3] + pu1_src_tmp[j - 3]) +
971
0
                   (1 << (FILT_TAP_Q - 1))) >>
972
0
                  FILT_TAP_Q;
973
0
            pu1_scrtch_tmp[j >> 1] = CLIP_U8(tmp);
974
0
        }
975
0
        pu1_scrtch_tmp += scrtch_strd;
976
0
        pu1_src_tmp += src_strd;
977
0
    }
978
    /* vertical filtering */
979
0
    pu1_scrtch_tmp = pu1_scrtch + 3 * scrtch_strd;
980
0
    for(i = 0; i < ht; i += 2)
981
0
    {
982
0
        for(j = 0; j < (wd >> 1); j++)
983
0
        {
984
0
            tmp =
985
0
                (i4_ftaps[3] * pu1_scrtch_tmp[j] +
986
0
                 i4_ftaps[2] * (pu1_scrtch_tmp[j + scrtch_strd] + pu1_scrtch_tmp[j - scrtch_strd]) +
987
0
                 i4_ftaps[1] *
988
0
                     (pu1_scrtch_tmp[j + 2 * scrtch_strd] + pu1_scrtch_tmp[j - 2 * scrtch_strd]) +
989
0
                 i4_ftaps[0] *
990
0
                     (pu1_scrtch_tmp[j + 3 * scrtch_strd] + pu1_scrtch_tmp[j - 3 * scrtch_strd]) +
991
0
                 (1 << (FILT_TAP_Q - 1))) >>
992
0
                FILT_TAP_Q;
993
0
            pu1_dst[j] = CLIP_U8(tmp);
994
0
        }
995
0
        pu1_dst += dst_strd;
996
0
        pu1_scrtch_tmp += (scrtch_strd << 1);
997
0
    }
998
0
}
999
1000
void ihevce_scale_by_2(
1001
    UWORD8 *pu1_src,
1002
    WORD32 src_strd,
1003
    UWORD8 *pu1_dst,
1004
    WORD32 dst_strd,
1005
    WORD32 wd,
1006
    WORD32 ht,
1007
    UWORD8 *pu1_wkg_mem,
1008
    WORD32 ht_offset,
1009
    WORD32 block_ht,
1010
    WORD32 wd_offset,
1011
    WORD32 block_wd,
1012
    FT_COPY_2D *pf_copy_2d,
1013
    FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn)
1014
0
{
1015
0
#define N_TAPS 7
1016
0
#define MAX_BLK_SZ (MAX_CTB_SIZE + ((N_TAPS >> 1) << 1))
1017
0
    UWORD8 au1_cpy[MAX_BLK_SZ * MAX_BLK_SZ];
1018
0
    UWORD32 cpy_strd = MAX_BLK_SZ;
1019
0
    UWORD8 *pu1_cpy = au1_cpy + cpy_strd * (N_TAPS >> 1) + (N_TAPS >> 1);
1020
1021
0
    UWORD8 *pu1_in, *pu1_out;
1022
0
    WORD32 in_strd, wkg_mem_strd;
1023
1024
0
    WORD32 row_start, row_end;
1025
0
    WORD32 col_start, col_end;
1026
0
    WORD32 i, fun_select;
1027
0
    WORD32 ht_tmp, wd_tmp;
1028
0
    FT_SCALING_FILTER_BY_2 *ihevce_scaling_filters[2];
1029
1030
0
    assert((wd & 1) == 0);
1031
0
    assert((ht & 1) == 0);
1032
0
    assert(block_wd <= MAX_CTB_SIZE);
1033
0
    assert(block_ht <= MAX_CTB_SIZE);
1034
1035
    /* function pointers for filtering different dimensions */
1036
0
    ihevce_scaling_filters[0] = ihevce_scaling_filter_mxn;
1037
0
    ihevce_scaling_filters[1] = pf_scaling_filter_mxn;
1038
1039
    /* handle boundary blks */
1040
0
    col_start = (wd_offset < (N_TAPS >> 1)) ? 1 : 0;
1041
0
    row_start = (ht_offset < (N_TAPS >> 1)) ? 1 : 0;
1042
0
    col_end = ((wd_offset + block_wd) > (wd - (N_TAPS >> 1))) ? 1 : 0;
1043
0
    row_end = ((ht_offset + block_ht) > (ht - (N_TAPS >> 1))) ? 1 : 0;
1044
0
    if(col_end && (wd % block_wd != 0))
1045
0
    {
1046
0
        block_wd = (wd % block_wd);
1047
0
    }
1048
0
    if(row_end && (ht % block_ht != 0))
1049
0
    {
1050
0
        block_ht = (ht % block_ht);
1051
0
    }
1052
1053
    /* boundary blks needs to be padded, copy src to tmp buffer */
1054
0
    if(col_start || col_end || row_end || row_start)
1055
0
    {
1056
0
        UWORD8 *pu1_src_tmp = pu1_src + wd_offset + ht_offset * src_strd;
1057
1058
0
        pu1_cpy -= (3 * (1 - col_start) + cpy_strd * 3 * (1 - row_start));
1059
0
        pu1_src_tmp -= (3 * (1 - col_start) + src_strd * 3 * (1 - row_start));
1060
0
        ht_tmp = block_ht + 3 * (1 - row_start) + 3 * (1 - row_end);
1061
0
        wd_tmp = block_wd + 3 * (1 - col_start) + 3 * (1 - col_end);
1062
0
        pf_copy_2d(pu1_cpy, cpy_strd, pu1_src_tmp, src_strd, wd_tmp, ht_tmp);
1063
0
        pu1_in = au1_cpy + cpy_strd * 3 + 3;
1064
0
        in_strd = cpy_strd;
1065
0
    }
1066
0
    else
1067
0
    {
1068
0
        pu1_in = pu1_src + wd_offset + ht_offset * src_strd;
1069
0
        in_strd = src_strd;
1070
0
    }
1071
1072
    /*top padding*/
1073
0
    if(row_start)
1074
0
    {
1075
0
        UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3;
1076
1077
0
        pu1_cpy = au1_cpy + cpy_strd * (3 - 1);
1078
0
        memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
1079
0
        pu1_cpy -= cpy_strd;
1080
0
        memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
1081
0
        pu1_cpy -= cpy_strd;
1082
0
        memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
1083
0
    }
1084
1085
    /*bottom padding*/
1086
0
    if(row_end)
1087
0
    {
1088
0
        UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3 + (block_ht - 1) * cpy_strd;
1089
1090
0
        pu1_cpy = pu1_cpy_tmp + cpy_strd;
1091
0
        memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
1092
0
        pu1_cpy += cpy_strd;
1093
0
        memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
1094
0
        pu1_cpy += cpy_strd;
1095
0
        memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
1096
0
    }
1097
1098
    /*left padding*/
1099
0
    if(col_start)
1100
0
    {
1101
0
        UWORD8 *pu1_cpy_tmp = au1_cpy + 3;
1102
1103
0
        pu1_cpy = au1_cpy;
1104
0
        for(i = 0; i < block_ht + 6; i++)
1105
0
        {
1106
0
            pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
1107
0
            pu1_cpy += cpy_strd;
1108
0
            pu1_cpy_tmp += cpy_strd;
1109
0
        }
1110
0
    }
1111
1112
    /*right padding*/
1113
0
    if(col_end)
1114
0
    {
1115
0
        UWORD8 *pu1_cpy_tmp = au1_cpy + 3 + block_wd - 1;
1116
1117
0
        pu1_cpy = au1_cpy + 3 + block_wd;
1118
0
        for(i = 0; i < block_ht + 6; i++)
1119
0
        {
1120
0
            pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
1121
0
            pu1_cpy += cpy_strd;
1122
0
            pu1_cpy_tmp += cpy_strd;
1123
0
        }
1124
0
    }
1125
1126
0
    wkg_mem_strd = block_wd >> 1;
1127
0
    pu1_out = pu1_dst + (wd_offset >> 1);
1128
0
    fun_select = (block_wd % 16 == 0);
1129
0
    ihevce_scaling_filters[fun_select](
1130
0
        pu1_in, in_strd, pu1_wkg_mem, wkg_mem_strd, pu1_out, dst_strd, block_ht, block_wd);
1131
1132
    /* Left padding of 16 for 1st block of every row */
1133
0
    if(wd_offset == 0)
1134
0
    {
1135
0
        UWORD8 u1_val;
1136
0
        WORD32 pad_wd = 16;
1137
0
        WORD32 pad_ht = block_ht >> 1;
1138
0
        UWORD8 *dst = pu1_dst;
1139
1140
0
        for(i = 0; i < pad_ht; i++)
1141
0
        {
1142
0
            u1_val = dst[0];
1143
0
            memset(&dst[-pad_wd], u1_val, pad_wd);
1144
0
            dst += dst_strd;
1145
0
        }
1146
0
    }
1147
1148
0
    if(wd == wd_offset + block_wd)
1149
0
    {
1150
        /* Right padding of (16 + (CEIL16(wd/2))-wd/2) for last block of every row */
1151
        /* Right padding is done only after processing of last block of that row is done*/
1152
0
        UWORD8 u1_val;
1153
0
        WORD32 pad_wd = 16 + CEIL16((wd >> 1)) - (wd >> 1) + 4;
1154
0
        WORD32 pad_ht = block_ht >> 1;
1155
0
        UWORD8 *dst = pu1_dst + (wd >> 1) - 1;
1156
1157
0
        for(i = 0; i < pad_ht; i++)
1158
0
        {
1159
0
            u1_val = dst[0];
1160
0
            memset(&dst[1], u1_val, pad_wd);
1161
0
            dst += dst_strd;
1162
0
        }
1163
1164
0
        if(ht_offset == 0)
1165
0
        {
1166
            /* Top padding of 16 is done for 1st row only after we reach end of that row */
1167
0
            pad_wd = dst_strd;
1168
0
            pad_ht = 16;
1169
0
            dst = pu1_dst - 16;
1170
0
            for(i = 1; i <= pad_ht; i++)
1171
0
            {
1172
0
                memcpy(dst - (i * dst_strd), dst, pad_wd);
1173
0
            }
1174
0
        }
1175
1176
        /* Bottom padding of (16 + (CEIL16(ht/2)) - ht/2) is done only if we have
1177
         reached end of frame */
1178
0
        if(ht - ht_offset - block_ht == 0)
1179
0
        {
1180
0
            pad_wd = dst_strd;
1181
0
            pad_ht = 16 + CEIL16((ht >> 1)) - (ht >> 1) + 4;
1182
0
            dst = pu1_dst + (((block_ht >> 1) - 1) * dst_strd) - 16;
1183
0
            for(i = 1; i <= pad_ht; i++)
1184
0
                memcpy(dst + (i * dst_strd), dst, pad_wd);
1185
0
        }
1186
0
    }
1187
0
}
1188
1189
/*!
1190
******************************************************************************
1191
* \if Function name : ihevce_decomp_pre_intra_process_row \endif
1192
*
1193
* \brief
1194
*  Row level function which down scales a given row by 2 in horz and vertical
1195
*  direction creates output of size wd/2 * ht/2. When decomposition is done
1196
*  from L1 to L2 pre intra analysis is done on L1
1197
*
1198
*****************************************************************************
1199
*/
1200
void ihevce_decomp_pre_intra_process_row(
1201
    UWORD8 *pu1_src,
1202
    WORD32 src_stride,
1203
    UWORD8 *pu1_dst_decomp,
1204
    WORD32 dst_stride,
1205
    WORD32 layer_wd,
1206
    WORD32 layer_ht,
1207
    UWORD8 *pu1_wkg_mem,
1208
    WORD32 ht_offset,
1209
    WORD32 block_ht,
1210
    WORD32 block_wd,
1211
    WORD32 num_col_blks,
1212
    WORD32 layer_no,
1213
    ihevce_ed_ctxt_t *ps_ed_ctxt,
1214
    ihevce_ed_blk_t *ps_ed_row,
1215
    ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row,
1216
    WORD32 num_4x4_blks_ctb_y,
1217
    WORD32 num_4x4_blks_last_ctb_x,
1218
    WORD32 skip_decomp,
1219
    WORD32 skip_pre_intra,
1220
    WORD32 row_block_no,
1221
    ctb_analyse_t *ps_ctb_analyse,
1222
    ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
1223
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
1224
0
{
1225
0
    WORD32 do_pre_intra_analysis = ((layer_no == 1) || (layer_no == 2)) && (!skip_pre_intra);
1226
0
    WORD32 col_block_no;
1227
0
    WORD32 i, j;
1228
1229
0
    if(!skip_decomp)
1230
0
    {
1231
0
        ctb_analyse_t *ps_ctb_analyse_curr = ps_ctb_analyse + row_block_no * num_col_blks;
1232
1233
0
        for(col_block_no = 0; col_block_no < num_col_blks; col_block_no++)
1234
0
        {
1235
0
            ihevce_scale_by_2(
1236
0
                pu1_src,
1237
0
                src_stride,
1238
0
                pu1_dst_decomp,
1239
0
                dst_stride,
1240
0
                layer_wd,
1241
0
                layer_ht,
1242
0
                pu1_wkg_mem,
1243
0
                ht_offset,
1244
0
                block_ht,
1245
0
                block_wd * col_block_no,
1246
0
                block_wd,
1247
0
                ps_cmn_utils_optimised_function_list->pf_copy_2d,
1248
0
                ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
1249
1250
            /* Disable noise detection */
1251
0
            memset(
1252
0
                ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
1253
0
                0,
1254
0
                sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
1255
1256
0
            ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
1257
1258
0
            ps_ctb_analyse_curr++;
1259
0
        }
1260
0
    }
1261
1262
0
    if(do_pre_intra_analysis)
1263
0
    {
1264
0
        ihevce_ed_blk_t *ps_ed_ctb = ps_ed_row;
1265
0
        ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctb_l1_row;
1266
0
        WORD32 *nbr_flags_ptr = &ps_ed_ctxt->ai4_nbr_flags[0];
1267
0
        UWORD8 *pu1_src_pre_intra = pu1_src + (ht_offset * src_stride);
1268
0
        WORD32 num_4x4_blks_in_ctb = block_wd >> 2;
1269
0
        WORD32 src_inc_pre_intra = num_4x4_blks_in_ctb * 4;
1270
0
        WORD32 inc_ctb = num_4x4_blks_in_ctb * num_4x4_blks_in_ctb;
1271
1272
        /* To analyse any given CTB we need to set the availability flags of the
1273
         * following neighbouring CTB: BL,L,TL,T,TR */
1274
        /* copy the neighbor flags for a general ctb (ctb inside the frame); not any corners */
1275
0
        memcpy(
1276
0
            ps_ed_ctxt->ai4_nbr_flags,
1277
0
            gau4_nbr_flags_8x8_4x4blks,
1278
0
            sizeof(gau4_nbr_flags_8x8_4x4blks));
1279
1280
        /* set top flags unavailable for first ctb row */
1281
0
        if(ht_offset == 0)
1282
0
        {
1283
0
            for(j = 0; j < num_4x4_blks_in_ctb; j++)
1284
0
            {
1285
0
                SET_T_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
1286
0
                SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
1287
0
                SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
1288
0
            }
1289
0
        }
1290
1291
        /* set bottom left flags as not available for last row */
1292
0
        if(ht_offset + block_ht >= layer_ht)
1293
0
        {
1294
0
            for(j = 0; j < num_4x4_blks_in_ctb; j++)
1295
0
            {
1296
0
                SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(num_4x4_blks_ctb_y - 1) * 8 + j]);
1297
0
            }
1298
0
        }
1299
1300
        /* set left flags unavailable for 1st ctb col */
1301
0
        for(j = 0; j < num_4x4_blks_ctb_y; j++)
1302
0
        {
1303
0
            SET_L_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
1304
0
            SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
1305
0
            SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
1306
0
        }
1307
1308
0
        for(col_block_no = 0; col_block_no < num_col_blks; col_block_no++)
1309
0
        {
1310
0
            if(col_block_no == 1)
1311
0
            {
1312
                /* For the rest of the ctbs, set left flags available */
1313
0
                for(j = 0; j < num_4x4_blks_ctb_y; j++)
1314
0
                {
1315
0
                    SET_L_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
1316
0
                }
1317
0
                for(j = 0; j < num_4x4_blks_ctb_y - 1; j++)
1318
0
                {
1319
0
                    SET_BL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
1320
0
                    SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(j + 1) * 8]);
1321
0
                }
1322
0
                if(ht_offset != 0)
1323
0
                {
1324
0
                    SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[0]);
1325
0
                }
1326
0
            }
1327
1328
0
            if(col_block_no == num_col_blks - 1)
1329
0
            {
1330
                /* set top right flags unavailable for last ctb col */
1331
0
                for(i = 0; i < num_4x4_blks_ctb_y; i++)
1332
0
                {
1333
0
                    SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[i * 8 + num_4x4_blks_last_ctb_x - 1]);
1334
0
                }
1335
0
            }
1336
1337
            /* Call intra analysis for the ctb */
1338
0
            ihevce_ed_calc_ctb(
1339
0
                ps_ed_ctxt,
1340
0
                ps_ed_ctb,
1341
0
                ps_ed_ctb_l1,
1342
0
                pu1_src_pre_intra,
1343
0
                src_stride,
1344
0
                (col_block_no == num_col_blks - 1) ? num_4x4_blks_last_ctb_x : num_4x4_blks_in_ctb,
1345
0
                num_4x4_blks_ctb_y,
1346
0
                nbr_flags_ptr,
1347
0
                layer_no,
1348
0
                ps_ipe_optimised_function_list,
1349
0
                ps_cmn_utils_optimised_function_list);
1350
0
            pu1_src_pre_intra += src_inc_pre_intra;
1351
0
            ps_ed_ctb += inc_ctb;
1352
0
            ps_ed_ctb_l1 += 1;
1353
0
        }
1354
0
    }
1355
0
}
1356
1357
/*!
1358
******************************************************************************
1359
* \if Function name : ihevce_decomp_pre_intra_process \endif
1360
*
1361
* \brief
1362
*  Frame level function to decompose given layer L0 into coarser layers and
1363
*  perform intra analysis on layers below L0
1364
*
1365
*****************************************************************************
1366
*/
1367
void ihevce_decomp_pre_intra_process(
1368
    void *pv_ctxt,
1369
    ihevce_lap_output_params_t *ps_lap_out_prms,
1370
    frm_ctb_ctxt_t *ps_frm_ctb_prms,
1371
    void *pv_multi_thrd_ctxt,
1372
    WORD32 thrd_id,
1373
    WORD32 i4_ping_pong)
1374
0
{
1375
0
    ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt;
1376
0
    ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thrd_id];
1377
0
    multi_thrd_ctxt_t *ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
1378
0
    WORD32 i4_num_layers = ps_ctxt->i4_num_layers;
1379
0
    UWORD8 *pu1_wkg_mem = ps_ctxt->au1_wkg_mem;
1380
0
    ihevce_ed_ctxt_t *ps_ed_ctxt = ps_ctxt->ps_ed_ctxt;
1381
0
    ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1;
1382
0
    ihevce_ed_blk_t *ps_ed;
1383
0
    WORD32 i4_layer_no;
1384
0
    WORD32 end_of_layer;
1385
0
    UWORD8 *pu1_src, *pu1_dst;
1386
0
    WORD32 src_stride, dst_stride;
1387
0
    WORD32 i4_layer_wd, i4_layer_ht;
1388
0
    WORD32 ht_offset, block_ht, row_block_no, num_row_blocks;
1389
0
    WORD32 block_wd, num_col_blks;
1390
0
    WORD32 skip_decomp, skip_pre_intra;
1391
0
    WORD32 inc_ctb;
1392
1393
0
    ASSERT(i4_num_layers >= 3);
1394
0
    ps_ctxt->as_layers[0].pu1_inp = (UWORD8 *)ps_lap_out_prms->s_input_buf.pv_y_buf;
1395
0
    ps_ctxt->as_layers[0].i4_inp_stride = ps_lap_out_prms->s_input_buf.i4_y_strd;
1396
0
    ps_ctxt->as_layers[0].i4_actual_wd = ps_lap_out_prms->s_input_buf.i4_y_wd;
1397
0
    ps_ctxt->as_layers[0].i4_actual_ht = ps_lap_out_prms->s_input_buf.i4_y_ht;
1398
1399
    /* This loop does decomp & intra by picking jobs from job queue */
1400
0
    for(i4_layer_no = 0; i4_layer_no < i4_num_layers; i4_layer_no++)
1401
0
    {
1402
0
        WORD32 idx = 0;
1403
1404
0
        src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride;
1405
0
        pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp;
1406
0
        i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd;
1407
0
        i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht;
1408
0
        pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp;
1409
0
        dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride;
1410
0
        block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd;
1411
0
        block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht;
1412
0
        num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks;
1413
0
        num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks;
1414
0
        inc_ctb = (block_wd >> 2) * (block_wd >> 2);
1415
0
        end_of_layer = 0;
1416
0
        skip_pre_intra = 1;
1417
0
        skip_decomp = 0;
1418
0
        if(i4_layer_no >= (ps_ctxt->i4_num_layers - 1))
1419
0
        {
1420
0
            skip_decomp = 1;
1421
0
        }
1422
1423
        /* ------------ Loop over all the CTB rows & perform Decomp --------------- */
1424
0
        while(0 == end_of_layer)
1425
0
        {
1426
0
            job_queue_t *ps_pre_enc_job;
1427
0
            WORD32 num_4x4_blks_ctb_y = 0, num_4x4_blks_last_ctb_x = 0;
1428
1429
            /* Get the current row from the job queue */
1430
0
            ps_pre_enc_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
1431
0
                pv_multi_thrd_ctxt, (DECOMP_JOB_LYR0 + i4_layer_no), 1, i4_ping_pong);
1432
1433
            /* If all rows are done, set the end of layer flag to 1, */
1434
0
            if(NULL == ps_pre_enc_job)
1435
0
            {
1436
0
                end_of_layer = 1;
1437
0
            }
1438
0
            else
1439
0
            {
1440
                /* Obtain the current row's details from the job */
1441
0
                row_block_no = ps_pre_enc_job->s_job_info.s_decomp_job_info.i4_vert_unit_row_no;
1442
0
                ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = row_block_no;
1443
0
                ht_offset = row_block_no * block_ht;
1444
1445
0
                if(row_block_no < (num_row_blocks))
1446
0
                {
1447
0
                    pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp +
1448
0
                              ((block_ht >> 1) * dst_stride * row_block_no);
1449
1450
                    /* call the row level processing function */
1451
0
                    ihevce_decomp_pre_intra_process_row(
1452
0
                        pu1_src,
1453
0
                        src_stride,
1454
0
                        pu1_dst,
1455
0
                        dst_stride,
1456
0
                        i4_layer_wd,
1457
0
                        i4_layer_ht,
1458
0
                        pu1_wkg_mem,
1459
0
                        ht_offset,
1460
0
                        block_ht,
1461
0
                        block_wd,
1462
0
                        num_col_blks,
1463
0
                        i4_layer_no,
1464
0
                        ps_ed_ctxt,
1465
0
                        ps_ed,
1466
0
                        ps_ed_ctb_l1,
1467
0
                        num_4x4_blks_ctb_y,
1468
0
                        num_4x4_blks_last_ctb_x,
1469
0
                        skip_decomp,
1470
0
                        skip_pre_intra,
1471
0
                        row_block_no,
1472
0
                        ps_ctxt->ps_ctb_analyse,
1473
0
                        &ps_ctxt->s_ipe_optimised_function_list,
1474
0
                        &ps_ctxt->s_cmn_opt_func);
1475
0
                }
1476
0
                idx++;
1477
                /* set the output dependency */
1478
0
                ihevce_pre_enc_grp_job_set_out_dep(
1479
0
                    pv_multi_thrd_ctxt, ps_pre_enc_job, i4_ping_pong);
1480
0
            }
1481
0
        }
1482
0
        ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = idx;
1483
1484
        /* ------------ For the same rows perform preintra if required --------------- */
1485
0
        ihevce_ed_frame_init(ps_ed_ctxt, i4_layer_no);
1486
1487
0
        if((1 == i4_layer_no) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
1488
0
        {
1489
0
            WORD32 vert_ctr, ctb_ctr, i;
1490
0
            WORD32 ctb_ctr_blks = ps_ctxt->as_layers[1].i4_num_col_blks;
1491
0
            WORD32 vert_ctr_blks = ps_ctxt->as_layers[1].i4_num_row_blks;
1492
1493
0
            if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1494
0
               (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
1495
0
            {
1496
0
                for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
1497
0
                {
1498
0
                    ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 =
1499
0
                        ps_ctxt->ps_ed_ctb_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
1500
1501
0
                    for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
1502
0
                    {
1503
0
                        ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
1504
1505
0
                        for(i = 0; i < 16; i++)
1506
0
                        {
1507
0
                            ps_ed_ctb_curr_l1->i4_best_sad_cost_8x8_l1_ipe[i] = 0x7fffffff;
1508
0
                            ps_ed_ctb_curr_l1->i4_best_sad_8x8_l1_ipe[i] = 0x7fffffff;
1509
0
                        }
1510
0
                    }
1511
0
                }
1512
0
            }
1513
0
        }
1514
1515
0
#if DISABLE_L2_IPE_IN_PB_L1_IN_B
1516
0
        if(((2 == i4_layer_no) && (ps_lap_out_prms->i4_pic_type == IV_I_FRAME ||
1517
0
                                   ps_lap_out_prms->i4_pic_type == IV_IDR_FRAME)) ||
1518
0
           ((1 == i4_layer_no) &&
1519
0
            (ps_lap_out_prms->i4_temporal_lyr_id <= TEMPORAL_LAYER_DISABLE)) ||
1520
0
           ((IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset) && (0 != i4_layer_no)))
1521
#else
1522
        if((0 != i4_layer_no) &&
1523
           (1 != ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
1524
                  (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))))
1525
#endif
1526
0
        {
1527
0
            WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
1528
1529
0
            ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no];
1530
0
            if(0 == i4_layer_no)
1531
0
            {
1532
0
                ps_ed_ctxt->ps_ed_pic = NULL;
1533
0
                ps_ed_ctxt->ps_ed = NULL;
1534
0
                ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
1535
0
                ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
1536
0
            }
1537
0
            else if(1 == i4_layer_no)
1538
0
            {
1539
0
                ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf;
1540
0
                ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf;
1541
0
                ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1;
1542
0
                ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1;
1543
0
            }
1544
0
            else if(2 == i4_layer_no)
1545
0
            {
1546
0
                ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf;
1547
0
                ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf;
1548
0
                ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
1549
0
                ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
1550
0
            }
1551
1552
0
            skip_decomp = 1;
1553
0
            skip_pre_intra = 0;
1554
1555
0
            for(idx = 0; idx < i4_num_rows; idx++)
1556
0
            {
1557
0
                WORD32 num_4x4_blks_ctb_y = 0, num_4x4_blks_last_ctb_x = 0;
1558
1559
                /* Obtain the current row's details from the job */
1560
0
                row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
1561
0
                ht_offset = row_block_no * block_ht;
1562
1563
0
                if(row_block_no < (num_row_blocks))
1564
0
                {
1565
0
                    pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp +
1566
0
                              ((block_ht >> 1) * dst_stride * row_block_no);
1567
1568
0
                    if(i4_layer_no == 1 || i4_layer_no == 2)
1569
0
                    {
1570
0
                        ps_ed = ps_ed_ctxt->ps_ed + (row_block_no * inc_ctb * (num_col_blks));
1571
0
                        ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1 + (row_block_no * num_col_blks);
1572
0
                        ps_ed_ctxt->i4_quality_preset = ps_ctxt->i4_quality_preset;
1573
0
                        num_4x4_blks_last_ctb_x = block_wd >> 2;
1574
0
                        num_4x4_blks_ctb_y = block_ht >> 2;
1575
0
                        if(row_block_no == num_row_blocks - 1)
1576
0
                        {
1577
0
                            if(i4_layer_ht % block_ht)
1578
0
                            {
1579
0
                                num_4x4_blks_ctb_y = ((i4_layer_ht % block_ht) + 3) >> 2;
1580
0
                            }
1581
0
                        }
1582
0
                        if(i4_layer_wd % block_wd)
1583
0
                        {
1584
0
                            num_4x4_blks_last_ctb_x = ((i4_layer_wd % block_wd) + 3) >> 2;
1585
0
                        }
1586
0
                    }
1587
1588
                    /* call the row level processing function */
1589
0
                    ihevce_decomp_pre_intra_process_row(
1590
0
                        pu1_src,
1591
0
                        src_stride,
1592
0
                        pu1_dst,
1593
0
                        dst_stride,
1594
0
                        i4_layer_wd,
1595
0
                        i4_layer_ht,
1596
0
                        pu1_wkg_mem,
1597
0
                        ht_offset,
1598
0
                        block_ht,
1599
0
                        block_wd,
1600
0
                        num_col_blks,
1601
0
                        i4_layer_no,
1602
0
                        ps_ed_ctxt,
1603
0
                        ps_ed,
1604
0
                        ps_ed_ctb_l1,
1605
0
                        num_4x4_blks_ctb_y,
1606
0
                        num_4x4_blks_last_ctb_x,
1607
0
                        skip_decomp,
1608
0
                        skip_pre_intra,
1609
0
                        row_block_no,
1610
0
                        NULL,
1611
0
                        &ps_ctxt->s_ipe_optimised_function_list,
1612
0
                        &ps_ctxt->s_cmn_opt_func);
1613
0
                }
1614
1615
0
                if(1 == i4_layer_no)
1616
0
                {
1617
0
                    ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
1618
0
                }
1619
0
            }
1620
0
            for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
1621
0
            {
1622
0
                ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
1623
0
            }
1624
0
            ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
1625
0
        }
1626
1627
0
#if DISABLE_L2_IPE_IN_PB_L1_IN_B
1628
0
        if((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
1629
0
           (((i4_layer_no == 2) && (ps_lap_out_prms->i4_pic_type == ISLICE)) ||
1630
0
            ((i4_layer_no == 1) && (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))))
1631
0
        {
1632
0
            WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
1633
0
            if(1 == i4_layer_no)
1634
0
            {
1635
0
                for(idx = 0; idx < i4_num_rows; idx++)
1636
0
                {
1637
0
                    row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
1638
1639
0
                    {
1640
0
                        ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
1641
0
                    }
1642
0
                }
1643
0
            }
1644
0
            for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
1645
0
            {
1646
0
                ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
1647
0
            }
1648
0
            ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
1649
0
        }
1650
#else
1651
        if((i4_layer_no != 0) && ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
1652
                                  (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))
1653
        {
1654
            WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
1655
            for(idx = 0; idx < i4_num_rows; idx++)
1656
            {
1657
                row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
1658
                if(1 == i4_layer_no)
1659
                {
1660
                    ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
1661
                }
1662
            }
1663
            for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
1664
            {
1665
                ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
1666
            }
1667
            ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
1668
        }
1669
#endif
1670
0
    }
1671
0
}
1672
1673
/*!
1674
************************************************************************
1675
* \brief
1676
*    return number of records used by decomp pre intra
1677
*
1678
************************************************************************
1679
*/
1680
WORD32 ihevce_decomp_pre_intra_get_num_mem_recs(void)
1681
0
{
1682
0
    return (NUM_DECOMP_PRE_INTRA_MEM_RECS);
1683
0
}
1684
1685
/*!
1686
************************************************************************
1687
* @brief
1688
*    return each record attributes of  decomp pre intra
1689
************************************************************************
1690
*/
1691
WORD32 ihevce_decomp_pre_intra_get_mem_recs(
1692
    iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space)
1693
0
{
1694
    /* memories should be requested assuming worst case requirememnts */
1695
1696
    /* Module context structure */
1697
0
    ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_size = sizeof(ihevce_decomp_pre_intra_master_ctxt_t);
1698
0
    ps_mem_tab[DECOMP_PRE_INTRA_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
1699
0
    ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_alignment = 8;
1700
1701
    /* Thread context structure */
1702
0
    ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_size =
1703
0
        i4_num_proc_thrds * sizeof(ihevce_decomp_pre_intra_ctxt_t);
1704
0
    ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
1705
0
    ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_alignment = 8;
1706
1707
    /* early decision context structure */
1708
0
    ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ed_ctxt_t);
1709
0
    ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
1710
0
    ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_alignment = 8;
1711
1712
0
    return (NUM_DECOMP_PRE_INTRA_MEM_RECS);
1713
0
}
1714
1715
/*!
1716
************************************************************************
1717
* @brief
1718
*    Init decomp pre intra context
1719
************************************************************************
1720
*/
1721
void *ihevce_decomp_pre_intra_init(
1722
    iv_mem_rec_t *ps_mem_tab,
1723
    ihevce_static_cfg_params_t *ps_init_prms,
1724
    WORD32 i4_num_proc_thrds,
1725
    func_selector_t *ps_func_selector,
1726
    WORD32 i4_resolution_id,
1727
    UWORD8 u1_is_popcnt_available)
1728
0
{
1729
0
    ihevce_decomp_pre_intra_master_ctxt_t *ps_mstr_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_CTXT].pv_base;
1730
0
    ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].pv_base;
1731
0
    ihevce_ed_ctxt_t *ps_ed_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].pv_base;
1732
0
    ihevce_tgt_params_t *ps_tgt_prms = &ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id];
1733
0
    WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
1734
0
    WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
1735
0
    WORD32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1736
0
    WORD32 n_tot_layers;
1737
0
    WORD32 i, j, k;
1738
1739
    /* Get the height and width of each layer */
1740
0
    *a_wd = ps_tgt_prms->i4_width + SET_CTB_ALIGN(ps_tgt_prms->i4_width, min_cu_size);
1741
0
    *a_ht = ps_tgt_prms->i4_height + SET_CTB_ALIGN(ps_tgt_prms->i4_height, min_cu_size);
1742
0
    n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
1743
0
    ps_mstr_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
1744
0
    for(i = 0; i < ps_mstr_ctxt->i4_num_proc_thrds; i++)
1745
0
    {
1746
0
        ps_mstr_ctxt->aps_decomp_pre_intra_thrd_ctxt[i] = ps_ctxt;
1747
0
        ps_ctxt->i4_num_layers = n_tot_layers;
1748
0
        ps_ctxt->ps_ed_ctxt = ps_ed_ctxt;
1749
0
        for(j = 0; j < n_tot_layers; j++)
1750
0
        {
1751
            /** If CTB size= 64, decomp_blk_wd = 64 for L0, 32 for L1 , 16 for L2, 8 for L3 */
1752
0
            WORD32 max_ctb_size = 1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size;
1753
0
            WORD32 decomp_blk_wd = max_ctb_size >> j;
1754
0
            WORD32 decomp_blk_ht = max_ctb_size >> j;
1755
1756
0
            ps_ctxt->as_layers[j].i4_actual_wd = a_wd[j];
1757
0
            ps_ctxt->as_layers[j].i4_actual_ht = a_ht[j];
1758
0
            if(0 == j)
1759
0
            {
1760
0
                ps_ctxt->as_layers[j].i4_padded_ht = a_ht[j];
1761
0
                ps_ctxt->as_layers[j].i4_padded_wd = a_wd[j];
1762
0
            }
1763
0
            else
1764
0
            {
1765
0
                ps_ctxt->as_layers[j].i4_padded_ht = a_ht[j] + 32 + 4;
1766
0
                ps_ctxt->as_layers[j].i4_padded_wd = a_wd[j] + 32 + 4;
1767
0
            }
1768
0
            ps_ctxt->as_layers[j].pu1_inp = NULL;
1769
0
            ps_ctxt->as_layers[j].i4_inp_stride = 0;
1770
0
            ps_ctxt->as_layers[j].i4_decomp_blk_ht = decomp_blk_ht;
1771
0
            ps_ctxt->as_layers[j].i4_decomp_blk_wd = decomp_blk_wd;
1772
0
            ps_ctxt->as_layers[j].i4_num_row_blks = ((a_ht[j] + (decomp_blk_ht - 1)) / decomp_blk_ht);
1773
0
            ps_ctxt->as_layers[j].i4_num_col_blks = ((a_wd[j] + (decomp_blk_wd - 1)) / decomp_blk_wd);
1774
0
            for(k = 0; k < MAX_NUM_CTB_ROWS_FRM; k++)
1775
0
            {
1776
0
                ps_ctxt->as_layers[j].ai4_curr_row_no[k] = -1;
1777
0
            }
1778
0
            ps_ctxt->as_layers[j].i4_num_rows_processed = 0;
1779
0
        }
1780
0
        ps_ctxt->i4_quality_preset = ps_tgt_prms->i4_quality_preset;
1781
0
        if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
1782
0
        {
1783
0
            ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
1784
0
        }
1785
0
        if(ps_init_prms->s_coding_tools_prms.i4_vqet &
1786
0
           (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
1787
0
        {
1788
0
            if(ps_init_prms->s_coding_tools_prms.i4_vqet &
1789
0
               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))
1790
0
            {
1791
0
                ps_ctxt->i4_enable_noise_detection = 1;
1792
0
            }
1793
0
            else
1794
0
            {
1795
0
                ps_ctxt->i4_enable_noise_detection = 0;
1796
0
            }
1797
0
        }
1798
0
        else
1799
0
        {
1800
0
            ps_ctxt->i4_enable_noise_detection = 0;
1801
0
        }
1802
0
        ihevce_cmn_utils_instr_set_router(
1803
0
            &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
1804
0
        ihevce_ipe_instr_set_router(
1805
0
            &ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type);
1806
1807
0
        ps_ed_ctxt->ps_func_selector = ps_func_selector;
1808
1809
0
        ps_ctxt++;
1810
0
        ps_ed_ctxt++;
1811
0
    }
1812
    /* return the handle to caller */
1813
0
    return ((void *)ps_mstr_ctxt);
1814
0
}
1815
1816
/*!
1817
************************************************************************
1818
* @brief
1819
*    Init decomp pre intra layer buffers
1820
************************************************************************
1821
*/
1822
void ihevce_decomp_pre_intra_frame_init(
1823
    void *pv_ctxt,
1824
    UWORD8 **ppu1_decomp_lyr_bufs,
1825
    WORD32 *pi4_lyr_buf_stride,
1826
    ihevce_ed_blk_t *ps_layer1_buf,
1827
    ihevce_ed_blk_t *ps_layer2_buf,
1828
    ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
1829
    WORD32 i4_ol_sad_lambda_qf,
1830
    ctb_analyse_t *ps_ctb_analyse)
1831
0
{
1832
0
    ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt;
1833
0
    ihevce_decomp_pre_intra_ctxt_t *ps_ctxt;
1834
0
    WORD32 i, j;
1835
1836
0
    for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
1837
0
    {
1838
0
        ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i];
1839
1840
        /* L0 layer (actual input) is registered in process call */
1841
0
        for(j = 1; j < ps_ctxt->i4_num_layers; j++)
1842
0
        {
1843
0
            ps_ctxt->as_layers[j].i4_inp_stride = pi4_lyr_buf_stride[j - 1];
1844
0
            ps_ctxt->as_layers[j].pu1_inp = ppu1_decomp_lyr_bufs[j - 1];
1845
1846
            /* Populating the buffer pointers for layer1 and layer2 buffers to store the
1847
            structure for each 4x4 block after pre intra analysis on their respective layers */
1848
0
            if(j == 1)
1849
0
            {
1850
0
                WORD32 sad_lambda_l1 = (3 * i4_ol_sad_lambda_qf >> 2);
1851
0
                WORD32 temp = 1 << LAMBDA_Q_SHIFT;
1852
0
                WORD32 lambda = ((temp) > sad_lambda_l1) ? temp : sad_lambda_l1;
1853
1854
0
                ps_ctxt->ps_layer1_buf = ps_layer1_buf;
1855
0
                ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1;
1856
0
                ps_ctxt->ai4_lambda[j] = lambda;
1857
0
            }
1858
0
            else if(j == 2)
1859
0
            {
1860
0
                WORD32 sad_lambda_l2 = i4_ol_sad_lambda_qf >> 1;
1861
0
                WORD32 temp = 1 << LAMBDA_Q_SHIFT;
1862
0
                WORD32 lambda = ((temp) > sad_lambda_l2) ? temp : sad_lambda_l2;
1863
1864
0
                ps_ctxt->ps_layer2_buf = ps_layer2_buf;
1865
0
                ps_ctxt->ai4_lambda[j] = lambda;
1866
0
            }
1867
0
            else
1868
0
            {
1869
0
                ps_ctxt->ai4_lambda[j] = -1;
1870
0
            }
1871
0
        }
1872
1873
        /* make the ps_ctb_analyse refernce as a part of the private context */
1874
0
        ps_ctxt->ps_ctb_analyse = ps_ctb_analyse;
1875
0
    }
1876
0
}
1877
1878
/**
1879
*******************************************************************************
1880
*
1881
* @brief Merge Sort function.
1882
*
1883
* @par Description:
1884
*     This function sorts the data in the input array in ascending
1885
*     order using merge sort algorithm. Intermediate data obtained in
1886
*     merge sort are stored in output 2-D array.
1887
*
1888
* @param[in]
1889
*   pi4_input_val  :   Input 1-D array
1890
*   aai4_output_val:   Output 2-D array containing elements sorted in sets of
1891
*                      4,16,64 etc.
1892
*   i4_length      : length of the array
1893
*   i4_ip_sort_level: Input sort level. Specifies the level upto which array is sorted.
1894
*                     It should be 1 if the array is unsorted. Should be 4 if array is sorted
1895
*                     in sets of 4.
1896
*   i4_op_sort_level: Output sort level. Specify the level upto which sorting is required.
1897
*                     If it is given as length of array it sorts for whole array.
1898
*
1899
*******************************************************************************
1900
*/
1901
void ihevce_merge_sort(
1902
    WORD32 *pi4_input_val,
1903
    WORD32 aai4_output_val[][64],
1904
    WORD32 i4_length,
1905
    WORD32 i4_ip_sort_level,
1906
    WORD32 i4_op_sort_level)
1907
0
{
1908
0
    WORD32 i, j, k;
1909
0
    WORD32 count, level;
1910
0
    WORD32 temp[64];
1911
0
    WORD32 *pi4_temp_buf_cpy;
1912
0
    WORD32 *pi4_temp = &temp[0];
1913
0
    WORD32 calc_level;
1914
1915
0
    pi4_temp_buf_cpy = pi4_temp;
1916
1917
0
    GETRANGE(calc_level, i4_op_sort_level / i4_ip_sort_level);
1918
1919
0
    calc_level = calc_level - 1;
1920
1921
    /*** This function is written under the assumption that we need only intermediate values of
1922
    sort in the range of 4,16,64 etc. ***/
1923
0
    ASSERT((calc_level % 2) == 0);
1924
1925
    /** One iteration of this for loop does 1 sets of sort and produces one intermediate value in 2 iterations **/
1926
0
    for(level = 0; level < calc_level; level++)
1927
0
    {
1928
        /** Merges adjacent sets of elements based on current sort level **/
1929
0
        for(count = 0; count < i4_length; (count = count + (i4_ip_sort_level * 2)))
1930
0
        {
1931
0
            i = 0;
1932
0
            j = 0;
1933
0
            if(pi4_input_val[i4_ip_sort_level - 1] < pi4_input_val[i4_ip_sort_level])
1934
0
            {
1935
                /*** Condition for early exit ***/
1936
0
                memcpy(&pi4_temp[0], pi4_input_val, sizeof(WORD32) * i4_ip_sort_level * 2);
1937
0
            }
1938
0
            else
1939
0
            {
1940
0
                for(k = 0; k < (i4_ip_sort_level * 2); k++)
1941
0
                {
1942
0
                    if((i < i4_ip_sort_level) && (j < i4_ip_sort_level))
1943
0
                    {
1944
0
                        if(pi4_input_val[i] > pi4_input_val[j + i4_ip_sort_level])
1945
0
                        {
1946
                            /** copy to output array **/
1947
0
                            pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level];
1948
0
                            j++;
1949
0
                        }
1950
0
                        else
1951
0
                        {
1952
                            /** copy to output array **/
1953
0
                            pi4_temp[k] = pi4_input_val[i];
1954
0
                            i++;
1955
0
                        }
1956
0
                    }
1957
0
                    else if(i == i4_ip_sort_level)
1958
0
                    {
1959
                        /** copy the remaining data to output array **/
1960
0
                        pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level];
1961
0
                        j++;
1962
0
                    }
1963
0
                    else
1964
0
                    {
1965
                        /** copy the remaining data to output array **/
1966
0
                        pi4_temp[k] = pi4_input_val[i];
1967
0
                        i++;
1968
0
                    }
1969
0
                }
1970
0
            }
1971
0
            pi4_input_val += (i4_ip_sort_level * 2);
1972
0
            pi4_temp += (i4_ip_sort_level * 2);
1973
0
        }
1974
0
        pi4_input_val = pi4_temp - i4_length;
1975
1976
0
        if(level % 2)
1977
0
        {
1978
            /** Assign a temp address for storing next sort level output as we will not need this data as output **/
1979
0
            pi4_temp = pi4_temp_buf_cpy;
1980
0
        }
1981
0
        else
1982
0
        {
1983
            /** Assign address for storing the intermediate data into output 2-D array **/
1984
0
            pi4_temp = aai4_output_val[level / 2];
1985
0
        }
1986
0
        i4_ip_sort_level *= 2;
1987
0
    }
1988
0
}
1989
1990
/*!
1991
************************************************************************
1992
* @brief
1993
*   Calculate the average activities at 16*16 (8*8 in L1) and 32*32
1994
*   (8*8 in L2) block sizes. As this function accumulates activities
1995
*   across blocks of a frame, this needs to be called by only one thread
1996
*   and only after ensuring the processing of entire frame is done
1997
************************************************************************
1998
*/
1999
void ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit(
2000
    void *pv_pre_intra_ctxt,
2001
    pre_enc_me_ctxt_t *ps_curr_out,
2002
    frm_ctb_ctxt_t *ps_frm_ctb_prms)
2003
0
{
2004
0
    ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_pre_intra_ctxt;
2005
0
    ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0];
2006
2007
0
    ULWORD64 u8_frame_8x8_sum_act_sqr = 0;
2008
0
    LWORD64 ai8_frame_8x8_sum_act_sqr[2] = { 0, 0 };
2009
0
    WORD32 ai4_frame_8x8_sum_act[2] = { 0, 0 };
2010
0
    WORD32 ai4_frame_8x8_sum_blks[2] = { 0, 0 };
2011
2012
0
    LWORD64 ai8_frame_16x16_sum_act_sqr[3] = { 0, 0, 0 };
2013
0
    WORD32 ai4_frame_16x16_sum_act[3] = { 0, 0, 0 };
2014
0
    WORD32 ai4_frame_16x16_sum_blks[3] = { 0, 0, 0 };
2015
2016
0
    LWORD64 ai8_frame_32x32_sum_act_sqr[3] = { 0, 0, 0 };
2017
0
    WORD32 ai4_frame_32x32_sum_act[3] = { 0, 0, 0 };
2018
0
    WORD32 ai4_frame_32x32_sum_blks[3] = { 0, 0, 0 };
2019
2020
0
    ihevce_ed_ctb_l1_t *ps_ed_ctb_pic_l1 = ps_curr_out->ps_ed_ctb_l1;
2021
0
    ihevce_ed_blk_t *ps_ed_blk_l1 = ps_curr_out->ps_layer1_buf;
2022
0
    WORD32 ctb_wd = ps_ctxt->as_layers[1].i4_decomp_blk_wd;
2023
0
    WORD32 h_ctb_cnt = ps_ctxt->as_layers[1].i4_num_col_blks;
2024
0
    WORD32 v_ctb_cnt = ps_ctxt->as_layers[1].i4_num_row_blks;
2025
0
    WORD32 sub_blk_cnt = ((ctb_wd >> 2) * (ctb_wd >> 2));
2026
0
    WORD32 i4_avg_noise_satd;
2027
0
    WORD32 ctb_ctr, vert_ctr;
2028
0
    WORD32 i, j, k;
2029
2030
0
    {
2031
        /* Calculate min noise threshold */
2032
        /* Min noise threshold is calculated by taking average of lowest 1% satd val in
2033
         * the complete 4x4 frame satds */
2034
0
#define MAX_SATD 64
2035
0
#define SATD_NOISE_FLOOR_THRESHOLD 16
2036
0
#define MIN_BLKS 2
2037
0
        WORD32 i4_layer_wd = ps_ctxt->as_layers[1].i4_actual_wd;
2038
0
        WORD32 i4_layer_ht = ps_ctxt->as_layers[1].i4_actual_ht;
2039
0
        WORD32 i4_min_blk = ((MIN_BLKS * (i4_layer_wd >> 1) * (i4_layer_ht >> 1)) / 100);
2040
0
        WORD32 i4_total_blks = 0;
2041
0
        WORD32 satd_hist[MAX_SATD];
2042
0
        LWORD64 i8_acc_satd = 0;
2043
2044
0
        memset(satd_hist, 0, sizeof(satd_hist));
2045
0
        for(i = 0; i < sub_blk_cnt * h_ctb_cnt * v_ctb_cnt; i++)
2046
0
        {
2047
0
            if(ps_ed_blk_l1[i].i4_4x4_satd >= 0 && ps_ed_blk_l1[i].i4_4x4_satd < MAX_SATD)
2048
0
            {
2049
0
                satd_hist[ps_ed_blk_l1[i].i4_4x4_satd]++;
2050
0
            }
2051
0
        }
2052
0
        for(i = 0; i < MAX_SATD && i4_total_blks <= i4_min_blk; i++)
2053
0
        {
2054
0
            i4_total_blks += satd_hist[i];
2055
0
            i8_acc_satd += (i * satd_hist[i]);
2056
0
        }
2057
0
        if(i4_total_blks < i4_min_blk)
2058
0
        {
2059
0
            i4_avg_noise_satd = SATD_NOISE_FLOOR_THRESHOLD;
2060
0
        }
2061
0
        else
2062
0
        {
2063
0
            i4_avg_noise_satd = (WORD32)(i8_acc_satd + (i4_total_blks >> 1)) / i4_total_blks;
2064
0
        }
2065
0
        ps_curr_out->i4_avg_noise_thrshld_4x4 = i4_avg_noise_satd;
2066
0
    }
2067
2068
0
    for(vert_ctr = 0; vert_ctr < v_ctb_cnt; vert_ctr++)
2069
0
    {
2070
0
        ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 =
2071
0
            ps_ed_ctb_pic_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
2072
0
        ihevce_ed_blk_t *ps_ed = ps_ed_blk_l1 + (vert_ctr * sub_blk_cnt * h_ctb_cnt);
2073
2074
0
        for(ctb_ctr = 0; ctb_ctr < h_ctb_cnt; ctb_ctr++, ps_ed += sub_blk_cnt)
2075
0
        {
2076
0
            ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
2077
0
            WORD8 b8_satd_eval[4];
2078
0
            WORD32 ai4_satd_4x4[64];
2079
0
            WORD32 ai4_satd_8x8[16];  // derived from accumulating 4x4 satds
2080
0
            WORD32 ai4_satd_16x16[4] = { 0 };  // derived from accumulating 8x8 satds
2081
0
            WORD32 i4_satd_32x32 = 0;  // derived from accumulating 8x8 satds
2082
            /* This 2-D array will contain 4x4 satds sorted in ascending order in sets
2083
             * of 4, 16, 64  For example : '5 10 2 7 6 12 3 1' array input will return
2084
             * '2 5 7 10 1 3 6 12' if sorted in sets of 4 */
2085
0
            WORD32 aai4_sort_4_16_64_satd[3][64];
2086
            /* This 2-D array will contain 8x8 satds sorted in ascending order in sets of
2087
             * 4, 16***/
2088
0
            WORD32 aai4_sort_4_16_satd[2][64];
2089
2090
0
            memset(b8_satd_eval, 1, sizeof(b8_satd_eval));
2091
0
            for(i = 0; i < 4; i++)
2092
0
            {
2093
0
                ihevce_ed_blk_t *ps_ed_b32 = &ps_ed[i * 16];
2094
2095
0
                for(j = 0; j < 4; j++)
2096
0
                {
2097
0
                    ihevce_ed_blk_t *ps_ed_b16 = &ps_ed_b32[j * 4];
2098
0
                    WORD32 satd_sum = 0;
2099
0
                    WORD32 blk_cnt = 0;
2100
2101
0
                    for(k = 0; k < 4; k++)
2102
0
                    {
2103
0
                        ihevce_ed_blk_t *ps_ed_b4 = &ps_ed_b16[k];
2104
2105
0
                        if(-1 != ps_ed_b4->i4_4x4_satd)
2106
0
                        {
2107
0
#define SUB_NOISE_THRSHLD 0
2108
#if SUB_NOISE_THRSHLD
2109
                            ps_ed_b4->i4_4x4_satd = ps_ed_b4->i4_4x4_satd - i4_avg_noise_satd;
2110
                            if(ps_ed_b4->i4_4x4_satd < 0)
2111
                            {
2112
                                ps_ed_b4->i4_4x4_satd = 0;
2113
                            }
2114
#else
2115
0
                            if(ps_ed_b4->i4_4x4_satd < i4_avg_noise_satd)
2116
0
                            {
2117
0
                                ps_ed_b4->i4_4x4_satd = i4_avg_noise_satd;
2118
0
                            }
2119
0
#endif
2120
0
                            blk_cnt++;
2121
0
                            satd_sum += ps_ed_b4->i4_4x4_satd;
2122
0
                        }
2123
0
                        ai4_satd_4x4[i * 16 + j * 4 + k] = ps_ed_b4->i4_4x4_satd;
2124
0
                    }
2125
0
                    ASSERT(blk_cnt == 0 || blk_cnt == 4);
2126
0
                    if(blk_cnt == 0)
2127
0
                    {
2128
0
                        satd_sum = -1;
2129
0
                    }
2130
0
                    ai4_satd_8x8[i * 4 + j] = satd_sum;
2131
0
                    ai4_satd_16x16[i] += satd_sum;
2132
0
                    i4_satd_32x32 += satd_sum;
2133
0
                    ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j] = satd_sum;
2134
0
                }
2135
0
            }
2136
2137
0
            {
2138
                /* This function will sort 64 elements in array ai4_satd_4x4 in ascending order
2139
                 *  to 3 arrays in sets of 4, 16, 64 into the 2-D array aai4_min_4_16_64_satd */
2140
0
                WORD32 array_length = sizeof(ai4_satd_4x4) / sizeof(WORD32);
2141
0
                ihevce_merge_sort(
2142
0
                    &ai4_satd_4x4[0], aai4_sort_4_16_64_satd, array_length, 1, 64);
2143
2144
                /* This function will sort 64 elements in array ai4_satd_8x8 in ascending order
2145
                 *  to 2 arrays in sets of 4, 16 into the 2-D array aai4_sum_4_16_satd_ctb */
2146
0
                array_length = sizeof(ai4_satd_8x8) / sizeof(WORD32);
2147
0
                ihevce_merge_sort(
2148
0
                    &ai4_satd_8x8[0], aai4_sort_4_16_satd, array_length, 1, 16);
2149
0
            }
2150
2151
            /* Populate avg satd to calculate modulation index and activity factors */
2152
            /* 16x16 */
2153
0
            for(i = 0; i < 4; i++)
2154
0
            {
2155
0
                for(j = 0; j < 4; j++)
2156
0
                {
2157
0
                    WORD32 satd_sum = ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j];
2158
0
                    WORD32 satd_min = aai4_sort_4_16_64_satd[0][i * 16 + j * 4 + MEDIAN_CU_TU];
2159
2160
0
                    ASSERT(-2 != satd_sum);
2161
0
                    ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] = satd_min;
2162
2163
0
                    if(-1 != satd_sum)
2164
0
                    {
2165
0
                        ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = satd_sum;
2166
0
                        ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = satd_min;
2167
2168
0
                        u8_frame_8x8_sum_act_sqr += (satd_sum * satd_sum);
2169
0
                        ai4_frame_8x8_sum_act[0] += satd_sum;
2170
0
                        ai8_frame_8x8_sum_act_sqr[0] += (satd_sum * satd_sum);
2171
0
                        ai4_frame_8x8_sum_blks[0] += 1;
2172
0
                        ai4_frame_8x8_sum_act[1] += satd_min;
2173
0
                        ai8_frame_8x8_sum_act_sqr[1] += (satd_min * satd_min);
2174
0
                        ai4_frame_8x8_sum_blks[1] += 1;
2175
0
                    }
2176
0
                    else
2177
0
                    {
2178
0
                        ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = -1;
2179
0
                        ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = -1;
2180
0
                        b8_satd_eval[i] = 0;
2181
0
                    }
2182
0
                }
2183
2184
0
                if(b8_satd_eval[i])
2185
0
                {
2186
0
                    ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = ai4_satd_16x16[i];
2187
0
                    ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = aai4_sort_4_16_satd[0][i * 4 + MEDIAN_CU_TU];
2188
0
                    ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = aai4_sort_4_16_64_satd[1][i * 16 + MEDIAN_CU_TU_BY_2];
2189
2190
0
                    for(k = 0; k < 3; k++)
2191
0
                    {
2192
0
                        WORD32 satd = ps_ed_ctb_curr_l1->i4_16x16_satd[i][k];
2193
2194
0
                        ai4_frame_16x16_sum_act[k] += satd;
2195
0
                        ai8_frame_16x16_sum_act_sqr[k] += (satd * satd);
2196
0
                        ai4_frame_16x16_sum_blks[k] += 1;
2197
0
                    }
2198
0
                }
2199
0
                else
2200
0
                {
2201
0
                    ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = -1;
2202
0
                    ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = -1;
2203
0
                    ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = -1;
2204
0
                }
2205
0
            }
2206
2207
            /*32x32*/
2208
0
            if(b8_satd_eval[0] && b8_satd_eval[1] && b8_satd_eval[2] && b8_satd_eval[3])
2209
0
            {
2210
0
                WORD32 aai4_sort_4_satd[1][64];
2211
0
                WORD32 array_length = sizeof(ai4_satd_16x16) / sizeof(WORD32);
2212
0
                WORD32 satd;
2213
2214
                /* Sort 4 elements in ascending order */
2215
0
                ihevce_merge_sort(ai4_satd_16x16, aai4_sort_4_satd, array_length, 1, 4);
2216
2217
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = aai4_sort_4_satd[0][MEDIAN_CU_TU];
2218
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = aai4_sort_4_16_satd[1][MEDIAN_CU_TU_BY_2];
2219
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = aai4_sort_4_16_64_satd[2][MEDIAN_CU_TU_BY_4];
2220
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = i4_satd_32x32;
2221
2222
0
                for(k = 0; k < 3; k++)
2223
0
                {
2224
0
                    WORD32 satd = ps_ed_ctb_curr_l1->i4_32x32_satd[0][k];
2225
2226
0
                    ai4_frame_32x32_sum_act[k] += satd;
2227
0
                    ai8_frame_32x32_sum_act_sqr[k] += (satd * satd);
2228
0
                    ai4_frame_32x32_sum_blks[k] += 1;
2229
0
                }
2230
0
            }
2231
0
            else
2232
0
            {
2233
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = -1;
2234
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = -1;
2235
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = -1;
2236
0
                ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = -1;
2237
0
            }
2238
0
        }
2239
0
    }
2240
2241
0
    for(i = 0; i < 2; i++)
2242
0
    {
2243
        /*8x8*/
2244
0
#if USE_SQRT_AVG_OF_SATD_SQR
2245
0
        ps_curr_out->i8_curr_frame_8x8_sum_act[i] = ai8_frame_8x8_sum_act_sqr[i];
2246
#else
2247
        ps_curr_out->i8_curr_frame_8x8_sum_act[i] = ai4_frame_8x8_sum_act[i];
2248
#endif
2249
0
        ps_curr_out->i4_curr_frame_8x8_sum_act_for_strength[i] = ai4_frame_8x8_sum_act[i];
2250
0
        ps_curr_out->i4_curr_frame_8x8_num_blks[i] = ai4_frame_8x8_sum_blks[i];
2251
0
        ps_curr_out->u8_curr_frame_8x8_sum_act_sqr = u8_frame_8x8_sum_act_sqr;
2252
2253
        /*16x16*/
2254
0
#if USE_SQRT_AVG_OF_SATD_SQR
2255
0
        ps_curr_out->i8_curr_frame_16x16_sum_act[i] = ai8_frame_16x16_sum_act_sqr[i];
2256
#else
2257
        ps_curr_out->i8_curr_frame_16x16_sum_act[i] = ai4_frame_16x16_sum_act[i];
2258
#endif
2259
0
        ps_curr_out->i4_curr_frame_16x16_num_blks[i] = ai4_frame_16x16_sum_blks[i];
2260
2261
        /*32x32*/
2262
0
#if USE_SQRT_AVG_OF_SATD_SQR
2263
0
        ps_curr_out->i8_curr_frame_32x32_sum_act[i] = ai8_frame_32x32_sum_act_sqr[i];
2264
#else
2265
        ps_curr_out->i8_curr_frame_32x32_sum_act[i] = ai4_frame_32x32_sum_act[i];
2266
#endif
2267
0
        ps_curr_out->i4_curr_frame_32x32_num_blks[i] = ai4_frame_32x32_sum_blks[i];
2268
0
    }
2269
2270
    /*16x16*/
2271
0
#if USE_SQRT_AVG_OF_SATD_SQR
2272
0
    ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai8_frame_16x16_sum_act_sqr[2];
2273
#else
2274
    ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai4_frame_16x16_sum_act[2];
2275
#endif
2276
0
    ps_curr_out->i4_curr_frame_16x16_num_blks[2] = ai4_frame_16x16_sum_blks[2];
2277
2278
    /*32x32*/
2279
0
#if USE_SQRT_AVG_OF_SATD_SQR
2280
0
    ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai8_frame_32x32_sum_act_sqr[2];
2281
#else
2282
    ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai4_frame_32x32_sum_act[2];
2283
#endif
2284
0
    ps_curr_out->i4_curr_frame_32x32_num_blks[2] = ai4_frame_32x32_sum_blks[2];
2285
0
}
2286
2287
/*!
2288
************************************************************************
2289
* @brief
2290
*  accumulate L1 intra satd across all threads.
2291
*  Note: call to this function has to be made after all threads have
2292
*  finished preintra processing
2293
*
2294
************************************************************************
2295
*/
2296
LWORD64 ihevce_decomp_pre_intra_get_frame_satd(void *pv_ctxt, WORD32 *wd, WORD32 *ht)
2297
0
{
2298
0
    ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt;
2299
0
    ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0];
2300
0
    LWORD64 satd_sum = ps_ctxt->ps_ed_ctxt->i8_sum_best_satd;
2301
0
    WORD32 i;
2302
2303
0
    *wd = ps_ctxt->as_layers[1].i4_actual_wd;
2304
0
    *ht = ps_ctxt->as_layers[1].i4_actual_ht;
2305
0
    for(i = 1; i < ps_master_ctxt->i4_num_proc_thrds; i++)
2306
0
    {
2307
0
        ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i];
2308
0
        satd_sum += ps_ctxt->ps_ed_ctxt->i8_sum_best_satd;
2309
0
    }
2310
2311
0
    return satd_sum;
2312
0
}
2313
2314
LWORD64 ihevce_decomp_pre_intra_get_frame_satd_squared(void *pv_ctxt, WORD32 *wd, WORD32 *ht)
2315
0
{
2316
0
    ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt;
2317
0
    ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0];
2318
0
    LWORD64 satd_sum = ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd;
2319
0
    WORD32 i;
2320
2321
0
    *wd = ps_ctxt->as_layers[1].i4_actual_wd;
2322
0
    *ht = ps_ctxt->as_layers[1].i4_actual_ht;
2323
0
    for(i = 1; i < ps_master_ctxt->i4_num_proc_thrds; i++)
2324
0
    {
2325
0
        ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i];
2326
0
        satd_sum += ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd;
2327
0
    }
2328
2329
0
    return satd_sum;
2330
0
}