Coverage Report

Created: 2026-05-24 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_sao.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ihevce_sao.c
24
*
25
* @brief
26
*  Contains definition for the ctb level sao function
27
*
28
* @author
29
*  Ittiam
30
*
31
* @par List of Functions:
32
*  ihevce_sao_set_avilability()
33
*  ihevce_sao_ctb()
34
*  ihevce_sao_analyse()
35
*
36
* @remarks
37
*  None
38
*
39
*******************************************************************************
40
*/
41
42
/*****************************************************************************/
43
/* File Includes                                                             */
44
/*****************************************************************************/
45
/* System include files */
46
#include <stdio.h>
47
#include <string.h>
48
#include <stdlib.h>
49
#include <assert.h>
50
#include <stdarg.h>
51
#include <math.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_structs.h"
64
#include "ihevc_platform_macros.h"
65
#include "ihevc_deblk.h"
66
#include "ihevc_itrans_recon.h"
67
#include "ihevc_chroma_itrans_recon.h"
68
#include "ihevc_chroma_intra_pred.h"
69
#include "ihevc_intra_pred.h"
70
#include "ihevc_inter_pred.h"
71
#include "ihevc_mem_fns.h"
72
#include "ihevc_padding.h"
73
#include "ihevc_weighted_pred.h"
74
#include "ihevc_sao.h"
75
#include "ihevc_resi_trans.h"
76
#include "ihevc_quant_iquant_ssd.h"
77
#include "ihevc_cabac_tables.h"
78
79
#include "ihevce_defs.h"
80
#include "ihevce_lap_enc_structs.h"
81
#include "ihevce_multi_thrd_structs.h"
82
#include "ihevce_me_common_defs.h"
83
#include "ihevce_had_satd.h"
84
#include "ihevce_error_codes.h"
85
#include "ihevce_bitstream.h"
86
#include "ihevce_cabac.h"
87
#include "ihevce_rdoq_macros.h"
88
#include "ihevce_function_selector.h"
89
#include "ihevce_enc_structs.h"
90
#include "ihevce_entropy_structs.h"
91
#include "ihevce_cmn_utils_instr_set_router.h"
92
#include "ihevce_enc_loop_structs.h"
93
#include "ihevce_cabac_rdo.h"
94
#include "ihevce_sao.h"
95
96
/*****************************************************************************/
97
/* Function Definitions                                                      */
98
/*****************************************************************************/
99
100
/**
101
*******************************************************************************
102
*
103
* @brief
104
*     ihevce_sao_set_avilability
105
*
106
* @par Description:
107
*     Sets the availability flag for SAO.
108
*
109
* @param[in]
110
*   ps_sao_ctxt:   Pointer to SAO context
111
* @returns
112
*
113
* @remarks
114
*  None
115
*
116
*******************************************************************************
117
*/
118
void ihevce_sao_set_avilability(
119
    UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
120
68.7k
{
121
68.7k
    WORD32 i;
122
123
68.7k
    WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x;
124
68.7k
    WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y;
125
126
618k
    for(i = 0; i < 8; i++)
127
550k
    {
128
550k
        pu1_avail[i] = 255;
129
550k
    }
130
131
    /* SAO_note_01: If the CTB lies on a tile or a slice boundary and
132
    in-loop filtering is enabled at tile and slice boundary, then SAO must
133
    be performed at tile/slice boundaries also.
134
    Hence the boundary checks should be based on frame position of CTB
135
    rather than s_ctb_nbr_avail_flags.u1_left_avail flags.
136
    Search for <SAO_note_01> in workspace to know more */
137
    /* Availaibility flags for first col*/
138
68.7k
    if(ctb_x_pos == ps_tile_params->i4_first_ctb_x)
139
26.1k
    {
140
26.1k
        pu1_avail[0] = 0;
141
26.1k
        pu1_avail[4] = 0;
142
26.1k
        pu1_avail[6] = 0;
143
26.1k
    }
144
145
    /* Availaibility flags for last col*/
146
68.7k
    if((ctb_x_pos + 1) ==
147
68.7k
       (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit))
148
28.8k
    {
149
28.8k
        pu1_avail[1] = 0;
150
28.8k
        pu1_avail[5] = 0;
151
28.8k
        pu1_avail[7] = 0;
152
28.8k
    }
153
154
    /* Availaibility flags for first row*/
155
68.7k
    if(ctb_y_pos == ps_tile_params->i4_first_ctb_y)
156
25.6k
    {
157
25.6k
        pu1_avail[2] = 0;
158
25.6k
        pu1_avail[4] = 0;
159
25.6k
        pu1_avail[5] = 0;
160
25.6k
    }
161
162
    /* Availaibility flags for last row*/
163
68.7k
    if((ctb_y_pos + 1) ==
164
68.7k
       (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit))
165
27.9k
    {
166
27.9k
        pu1_avail[3] = 0;
167
27.9k
        pu1_avail[6] = 0;
168
27.9k
        pu1_avail[7] = 0;
169
27.9k
    }
170
68.7k
}
171
172
/**
173
*******************************************************************************
174
*
175
* @brief
176
*   Sao CTB level function.
177
*
178
* @par Description:
179
*   For a given CTB, sao is done. Both the luma and chroma
180
*   blocks are processed
181
*
182
* @param[in]
183
*   ps_sao_ctxt:   Pointer to SAO context
184
*
185
* @returns
186
*
187
* @remarks
188
*  None
189
*
190
*******************************************************************************
191
*/
192
void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
193
64.7k
{
194
64.7k
    sao_enc_t *ps_sao;
195
64.7k
    UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2];
196
64.7k
    UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf;
197
64.7k
    UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf;
198
64.7k
    UWORD8 *pu1_src_luma, *pu1_src_chroma;
199
64.7k
    WORD32 luma_src_stride, ctb_size;
200
64.7k
    WORD32 chroma_src_stride;
201
64.7k
    UWORD8 au1_avail_luma[8], au1_avail_chroma[8];
202
64.7k
    WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma;
203
64.7k
    UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma;
204
64.7k
    UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma;
205
64.7k
    UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma;
206
64.7k
    UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
207
208
64.7k
    ps_sao = ps_sao_ctxt->ps_sao;
209
210
64.7k
    ASSERT(
211
64.7k
        (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) &&
212
64.7k
        (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7));
213
64.7k
    ASSERT(
214
64.7k
        (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) &&
215
64.7k
        (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7));
216
64.7k
    ASSERT(
217
64.7k
        (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) &&
218
64.7k
        (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7));
219
64.7k
    ASSERT(
220
64.7k
        (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) &&
221
64.7k
        (ps_sao->b5_cr_band_pos <= 28));
222
223
64.7k
    if(ps_sao_ctxt->i1_slice_sao_luma_flag)
224
64.7k
    {
225
        /*initialize the src pointer to current row*/
226
64.7k
        luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
227
228
64.7k
        ctb_size = ps_sao_ctxt->i4_ctb_size;
229
230
        /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
231
64.7k
        ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used  */
232
64.7k
        sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd;
233
64.7k
        sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht;
234
235
64.7k
        pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
236
        /* Pointer to the top luma buffer corresponding to the current ctb row*/
237
64.7k
        pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma;
238
239
        /* Pointer to left luma buffer corresponding to the current ctb row*/
240
64.7k
        pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch;
241
242
        /* Pointer to the top right luma buffer corresponding to the current ctb row*/
243
64.7k
        pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd;
244
245
        /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
246
64.7k
        pu1_src_bot_left_luma =
247
64.7k
            ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride -
248
64.7k
            1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
249
64.7k
            (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
250
251
        /* Back up the top left pixel for (x+1, y+1)th ctb*/
252
64.7k
        u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1);
253
64.7k
        pu1_top_left_luma = pu1_src_top_luma_buf - 1;
254
255
64.7k
        if(SAO_BAND == ps_sao->b3_y_type_idx)
256
0
        {
257
0
            ihevc_sao_band_offset_luma(
258
0
                pu1_src_luma,
259
0
                luma_src_stride,
260
0
                pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
261
0
                pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
262
0
                pu1_src_top_luma_buf - 1, /* Top left*/
263
0
                ps_sao->b5_y_band_pos,
264
0
                ps_sao->u1_y_offset,
265
0
                sao_blk_wd,
266
0
                sao_blk_ht);
267
268
0
            if((ps_sao_ctxt->i4_ctb_y > 0))
269
0
            {
270
0
                *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
271
0
            }
272
0
        }
273
64.7k
        else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG)
274
52.5k
        {
275
            /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
276
            * corresponding to EO category 1 and 2 which should be always positive
277
            * And 3rd and 4th offsets are always inferred as offsets corresponding to
278
            * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
279
            */
280
            // clang-format off
281
52.5k
            ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0));
282
52.5k
            ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0));
283
            // clang-format on
284
285
52.5k
            ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params);
286
287
52.5k
            ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2](
288
52.5k
                pu1_src_luma,
289
52.5k
                luma_src_stride,
290
52.5k
                pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
291
52.5k
                pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
292
52.5k
                pu1_top_left_luma, /* Top left*/
293
52.5k
                pu1_src_top_right_luma, /* Top right*/
294
52.5k
                pu1_src_bot_left_luma, /* Bottom left*/
295
52.5k
                au1_avail_luma,
296
52.5k
                ps_sao->u1_y_offset,
297
52.5k
                sao_blk_wd,
298
52.5k
                sao_blk_ht);
299
300
52.5k
            if((ps_sao_ctxt->i4_ctb_y > 0))
301
32.1k
            {
302
32.1k
                *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
303
32.1k
            }
304
52.5k
        }
305
64.7k
    }
306
307
64.7k
    if(ps_sao_ctxt->i1_slice_sao_chroma_flag)
308
28.5k
    {
309
        /*initialize the src pointer to current row*/
310
28.5k
        chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
311
28.5k
        ctb_size = ps_sao_ctxt->i4_ctb_size;
312
313
        /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
314
        //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2;
315
28.5k
        ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used  */
316
28.5k
        ps_sao->u1_cr_offset[0] = 0;
317
28.5k
        sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd;
318
28.5k
        sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1);
319
320
28.5k
        pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
321
        /* Pointer to the top luma buffer corresponding to the current ctb row*/
322
28.5k
        pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma;
323
        // clang-format off
324
        /* Pointer to left luma buffer corresponding to the current ctb row*/
325
28.5k
        pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch;  //ps_sao_ctxt->au1_sao_src_left_chroma;
326
        // clang-format on
327
        /* Pointer to the top right chroma buffer corresponding to the current ctb row*/
328
28.5k
        pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma;
329
330
        /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
331
28.5k
        pu1_src_bot_left_chroma =
332
28.5k
            ps_sao_ctxt->pu1_frm_chroma_recon_buf +
333
28.5k
            (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 +
334
28.5k
            (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
335
28.5k
             (ctb_size >> !u1_is_422)) +
336
28.5k
            (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
337
338
        /* Back up the top left pixel for (x+1, y+1)th ctb*/
339
28.5k
        u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2);
340
28.5k
        u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1);
341
28.5k
        pu1_top_left_chroma = pu1_src_top_chroma_buf - 2;
342
343
28.5k
        if(SAO_BAND == ps_sao->b3_cb_type_idx)
344
0
        {
345
0
            ihevc_sao_band_offset_chroma(
346
0
                pu1_src_chroma,
347
0
                chroma_src_stride,
348
0
                pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
349
0
                pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
350
0
                pu1_top_left_chroma, /* Top left*/
351
0
                ps_sao->b5_cb_band_pos,
352
0
                ps_sao->b5_cr_band_pos,
353
0
                ps_sao->u1_cb_offset,
354
0
                ps_sao->u1_cr_offset,
355
0
                sao_wd_chroma,
356
0
                sao_ht_chroma);
357
358
0
            if((ps_sao_ctxt->i4_ctb_y > 0))
359
0
            {
360
0
                *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
361
0
                *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
362
0
            }
363
0
        }
364
28.5k
        else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG)
365
16.2k
        {
366
            /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
367
            * corresponding to EO category 1 and 2 which should be always positive
368
            * And 3rd and 4th offsets are always inferred as offsets corresponding to
369
            * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
370
            */
371
16.2k
            ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0));
372
16.2k
            ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0));
373
374
16.2k
            ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0));
375
16.2k
            ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0));
376
377
16.2k
            ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params);
378
379
16.2k
            ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](
380
16.2k
                pu1_src_chroma,
381
16.2k
                chroma_src_stride,
382
16.2k
                pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
383
16.2k
                pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
384
16.2k
                pu1_top_left_chroma, /* Top left*/
385
16.2k
                pu1_src_top_right_chroma, /* Top right*/
386
16.2k
                pu1_src_bot_left_chroma, /* Bottom left*/
387
16.2k
                au1_avail_chroma,
388
16.2k
                ps_sao->u1_cb_offset,
389
16.2k
                ps_sao->u1_cr_offset,
390
16.2k
                sao_wd_chroma,
391
16.2k
                sao_ht_chroma);
392
393
16.2k
            if((ps_sao_ctxt->i4_ctb_y > 0))
394
10.9k
            {
395
10.9k
                *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
396
10.9k
                *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
397
10.9k
            }
398
16.2k
        }
399
28.5k
    }
400
64.7k
}
401
402
/**
403
*******************************************************************************
404
*
405
* @brief
406
*   CTB level function to do SAO analysis.
407
*
408
* @par Description:
409
*   For a given CTB, sao analysis is done for both luma and chroma.
410
*
411
*
412
* @param[in]
413
*   ps_sao_ctxt:   Pointer to SAO context
414
*   ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop
415
*
416
* @returns
417
*
418
* @remarks
419
*  None
420
*
421
* @Assumptions:
422
*   1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be
423
*      almost same as cabac state of (x,y)th ctb.
424
*   2) Distortion is calculated in spatial domain but lamda used to calculate the cost is
425
*      in freq domain.
426
*******************************************************************************
427
*/
428
void ihevce_sao_analyse(
429
    sao_ctxt_t *ps_sao_ctxt,
430
    ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
431
    UWORD32 *pu4_frame_rdopt_header_bits,
432
    ihevce_tile_params_t *ps_tile_params)
433
9.06k
{
434
9.06k
    UWORD8 *pu1_luma_scratch_buf;
435
9.06k
    UWORD8 *pu1_chroma_scratch_buf;
436
9.06k
    UWORD8 *pu1_src_luma, *pu1_recon_luma;
437
9.06k
    UWORD8 *pu1_src_chroma, *pu1_recon_chroma;
438
9.06k
    WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht;
439
9.06k
    WORD32 chroma_src_stride, chroma_recon_stride;
440
9.06k
    WORD32 i4_luma_scratch_buf_stride;
441
9.06k
    WORD32 i4_chroma_scratch_buf_stride;
442
9.06k
    sao_ctxt_t s_sao_ctxt;
443
9.06k
    UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0;
444
9.06k
    LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf;
445
9.06k
    WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0;
446
9.06k
    WORD32 curr_buf_idx, best_buf_idx, best_cand_idx;
447
9.06k
    WORD32 row;
448
9.06k
    WORD32 edgeidx;
449
9.06k
    WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 };
450
9.06k
    sao_enc_t s_best_luma_chroma_cand;
451
9.06k
    WORD32 best_ctb_sao_bits = 0;
452
9.06k
#if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2)
453
9.06k
    UWORD8 u1_force_no_offset =
454
9.06k
        ps_sao_ctxt
455
9.06k
            ->ps_ctb_data
456
9.06k
                [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y]
457
9.06k
            .s_ctb_noise_params.i4_noise_present;
458
9.06k
#endif
459
9.06k
    UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
460
461
9.06k
    *pu4_frame_rdopt_header_bits = 0;
462
463
9.06k
    ctb_size = ps_sao_ctxt->i4_ctb_size;
464
9.06k
    ctb_wd = ps_sao_ctxt->i4_sao_blk_wd;
465
9.06k
    ctb_ht = ps_sao_ctxt->i4_sao_blk_ht;
466
467
9.06k
    s_sao_ctxt = ps_sao_ctxt[0];
468
469
    /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/
470
9.06k
    memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t));
471
472
    /* Initialize the pointer and strides for luma buffers*/
473
9.06k
    pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
474
9.06k
    luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
475
476
9.06k
    pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf;
477
9.06k
    luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride;
478
9.06k
    i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
479
480
    /* Initialize the pointer and strides for luma buffers*/
481
9.06k
    pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
482
9.06k
    chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
483
484
9.06k
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf;
485
9.06k
    chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride;
486
9.06k
    i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
487
488
9.06k
    i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf;
489
9.06k
    i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf;
490
491
    /*****************************************************/
492
    /********************RDO FOR LUMA CAND****************/
493
    /*****************************************************/
494
495
#if !DISABLE_SAO_WHEN_NOISY
496
    if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
497
#else
498
9.06k
    if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset)
499
9.06k
#endif
500
9.06k
    {
501
        /* Candidate for Edge offset SAO*/
502
        /* Following is the convention for curr pixel and
503
        * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */
504
        /*
505
        * 0 deg :  a c b     90 deg:  a       135 deg: a          45 deg:     a
506
        *                             c                  c                  c
507
        *                             b                    b              b
508
        */
509
510
        /* 0 deg SAO CAND*/
511
        /* Reset the error and edge count*/
512
54.3k
        for(edgeidx = 0; edgeidx < 5; edgeidx++)
513
45.3k
        {
514
45.3k
            acc_error_category[edgeidx] = 0;
515
45.3k
            category_count[edgeidx] = 0;
516
45.3k
        }
517
518
        /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/
519
        // clang-format off
520
9.06k
        ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG,
521
9.06k
                acc_error_category, category_count);
522
        // clang-format on
523
        // clang-format off
524
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG;
525
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
526
9.06k
                ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
527
9.06k
                : 0;
528
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
529
9.06k
                ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
530
9.06k
                : 0;
531
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
532
9.06k
                ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
533
9.06k
                : 0;
534
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4]
535
9.06k
                ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
536
9.06k
                : 0;
537
        // clang-format on
538
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
539
        // clang-format off
540
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
541
        // clang-format on
542
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
543
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
544
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
545
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
546
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
547
548
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
549
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
550
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
551
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
552
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
553
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
554
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
555
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
556
557
9.06k
        num_luma_rdo_cand++;
558
559
        /* 90 degree SAO CAND*/
560
54.3k
        for(edgeidx = 0; edgeidx < 5; edgeidx++)
561
45.3k
        {
562
45.3k
            acc_error_category[edgeidx] = 0;
563
45.3k
            category_count[edgeidx] = 0;
564
45.3k
        }
565
566
        /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/
567
        // clang-format off
568
9.06k
        ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG,
569
9.06k
                acc_error_category, category_count);
570
571
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG;
572
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
573
9.06k
                ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
574
9.06k
                : 0;
575
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
576
9.06k
                ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
577
9.06k
                : 0;
578
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
579
9.06k
                ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
580
9.06k
                : 0;
581
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
582
9.06k
                ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
583
9.06k
                : 0;
584
        // clang-format on
585
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
586
587
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
588
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
589
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
590
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
591
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
592
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
593
594
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
595
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
596
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
597
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
598
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
599
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
600
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
601
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
602
603
9.06k
        num_luma_rdo_cand++;
604
605
        /* 135 degree SAO CAND*/
606
54.3k
        for(edgeidx = 0; edgeidx < 5; edgeidx++)
607
45.3k
        {
608
45.3k
            acc_error_category[edgeidx] = 0;
609
45.3k
            category_count[edgeidx] = 0;
610
45.3k
        }
611
612
        /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/
613
        // clang-format off
614
9.06k
        ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG,
615
9.06k
                acc_error_category, category_count);
616
617
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG;
618
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
619
9.06k
                ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
620
9.06k
                : 0;
621
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
622
9.06k
                ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
623
9.06k
                : 0;
624
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
625
9.06k
                ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
626
9.06k
                : 0;
627
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
628
9.06k
                ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
629
9.06k
                : 0;
630
        // clang-format on
631
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
632
633
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
634
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
635
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
636
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
637
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
638
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
639
640
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
641
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
642
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
643
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
644
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
645
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
646
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
647
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
648
649
9.06k
        num_luma_rdo_cand++;
650
651
        /* 45 degree SAO CAND*/
652
54.3k
        for(edgeidx = 0; edgeidx < 5; edgeidx++)
653
45.3k
        {
654
45.3k
            acc_error_category[edgeidx] = 0;
655
45.3k
            category_count[edgeidx] = 0;
656
45.3k
        }
657
658
        /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/
659
        // clang-format off
660
9.06k
        ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG,
661
9.06k
                acc_error_category, category_count);
662
663
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG;
664
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
665
9.06k
                ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
666
9.06k
                : 0;
667
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
668
9.06k
                ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
669
9.06k
                : 0;
670
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
671
9.06k
                ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
672
9.06k
                : 0;
673
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
674
9.06k
                ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
675
9.06k
                : 0;
676
        // clang-format on
677
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
678
679
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
680
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
681
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
682
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
683
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
684
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
685
686
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
687
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
688
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
689
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
690
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
691
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
692
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
693
9.06k
        ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
694
695
9.06k
        num_luma_rdo_cand++;
696
697
        /* First cand will be best cand after 1st iteration*/
698
9.06k
        curr_buf_idx = 0;
699
9.06k
        best_buf_idx = 1;
700
9.06k
        best_cost = 0xFFFFFFFF;
701
9.06k
        best_cand_idx = 0;
702
703
        /*Back up the top pixels for (x,y+1)th ctb*/
704
9.06k
        if(!ps_sao_ctxt->i4_is_last_ctb_row)
705
5.28k
        {
706
5.28k
            memcpy(
707
5.28k
                ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
708
5.28k
                pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
709
5.28k
                ps_sao_ctxt->i4_sao_blk_wd);
710
5.28k
        }
711
712
45.3k
        for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++)
713
36.2k
        {
714
36.2k
            s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
715
716
            /* This memcpy is required because cabac uses parameters from this structure
717
            * to evaluate bits and this structure ptr is sent to cabac through
718
            * "ihevce_cabac_rdo_encode_sao" function
719
            */
720
36.2k
            memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
721
722
            /* Copy the left pixels to the scratch buffer for evry rdo cand because its
723
            overwritten by the sao leaf level function for next ctb*/
724
36.2k
            memcpy(
725
36.2k
                s_sao_ctxt.au1_left_luma_scratch,
726
36.2k
                ps_sao_ctxt->au1_sao_src_left_luma,
727
36.2k
                ps_sao_ctxt->i4_sao_blk_ht);
728
729
            /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
730
            overwritten by the sao leaf level function for next ctb*/
731
36.2k
            memcpy(
732
36.2k
                s_sao_ctxt.au1_top_luma_scratch,
733
36.2k
                ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
734
36.2k
                ps_sao_ctxt->i4_sao_blk_wd + 2);
735
36.2k
            s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
736
737
36.2k
            pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
738
739
36.2k
            ASSERT(
740
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
741
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
742
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
743
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
744
36.2k
            ASSERT(
745
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
746
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
747
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
748
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
749
36.2k
            ASSERT(
750
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
751
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
752
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
753
36.2k
                (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
754
36.2k
            ASSERT(
755
36.2k
                (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
756
36.2k
                (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
757
36.2k
                (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
758
759
            /* Copy the deblocked recon data to scratch buffer to do sao*/
760
761
36.2k
            ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
762
36.2k
                pu1_luma_scratch_buf,
763
36.2k
                i4_luma_scratch_buf_stride,
764
36.2k
                pu1_recon_luma,
765
36.2k
                luma_recon_stride,
766
36.2k
                SCRATCH_BUF_STRIDE,
767
36.2k
                ctb_ht + 1);
768
769
36.2k
            s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
770
36.2k
            s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
771
772
36.2k
            s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
773
36.2k
            s_sao_ctxt.i1_slice_sao_chroma_flag = 0;
774
775
36.2k
            ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
776
777
            /* Calculate the distortion between sao'ed ctb and original src ctb*/
778
            // clang-format off
779
36.2k
            distortion =
780
36.2k
                ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
781
36.2k
                        s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
782
36.2k
                        s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE);
783
            // clang-format on
784
785
36.2k
            ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
786
36.2k
            ctb_bits = ihevce_cabac_rdo_encode_sao(
787
36.2k
                ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
788
789
            /* Calculate the cost as D+(lamda)*R   */
790
36.2k
            curr_cost = distortion +
791
36.2k
                        COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
792
793
36.2k
            if(curr_cost < best_cost)
794
12.2k
            {
795
12.2k
                best_cost = curr_cost;
796
12.2k
                best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
797
12.2k
                best_cand_idx = rdo_cand;
798
12.2k
                curr_buf_idx = !curr_buf_idx;
799
12.2k
            }
800
36.2k
        }
801
802
        /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
803
        * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
804
        */
805
9.06k
        s_best_luma_chroma_cand.b3_y_type_idx =
806
9.06k
            ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx;
807
9.06k
        s_best_luma_chroma_cand.u1_y_offset[1] =
808
9.06k
            ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1];
809
9.06k
        s_best_luma_chroma_cand.u1_y_offset[2] =
810
9.06k
            ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2];
811
9.06k
        s_best_luma_chroma_cand.u1_y_offset[3] =
812
9.06k
            ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3];
813
9.06k
        s_best_luma_chroma_cand.u1_y_offset[4] =
814
9.06k
            ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4];
815
9.06k
        s_best_luma_chroma_cand.b5_y_band_pos =
816
9.06k
            ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos;
817
9.06k
    }
818
0
    else
819
0
    {
820
        /*Back up the top pixels for (x,y+1)th ctb*/
821
0
        if(!ps_sao_ctxt->i4_is_last_ctb_row)
822
0
        {
823
0
            memcpy(
824
0
                ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
825
0
                pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
826
0
                ps_sao_ctxt->i4_sao_blk_wd);
827
0
        }
828
829
0
        s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE;
830
0
        s_best_luma_chroma_cand.u1_y_offset[1] = 0;
831
0
        s_best_luma_chroma_cand.u1_y_offset[2] = 0;
832
0
        s_best_luma_chroma_cand.u1_y_offset[3] = 0;
833
0
        s_best_luma_chroma_cand.u1_y_offset[4] = 0;
834
0
        s_best_luma_chroma_cand.b5_y_band_pos = 0;
835
0
        s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
836
0
        s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
837
838
0
        s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
839
0
        s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
840
0
        s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
841
0
        s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
842
0
        s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
843
0
        s_best_luma_chroma_cand.b5_cb_band_pos = 0;
844
845
0
        s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
846
0
        s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
847
0
        s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
848
0
        s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
849
0
        s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
850
0
        s_best_luma_chroma_cand.b5_cr_band_pos = 0;
851
0
    }
852
    /*****************************************************/
853
    /********************RDO FOR CHROMA CAND**************/
854
    /*****************************************************/
855
#if !DISABLE_SAO_WHEN_NOISY
856
    if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
857
#else
858
9.06k
    if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset)
859
9.06k
#endif
860
9.06k
    {
861
        /*Back up the top pixels for (x,y+1)th ctb*/
862
9.06k
        if(!ps_sao_ctxt->i4_is_last_ctb_row)
863
5.28k
        {
864
5.28k
            memcpy(
865
5.28k
                ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
866
5.28k
                    ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
867
5.28k
                pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
868
5.28k
                ps_sao_ctxt->i4_sao_blk_wd);
869
5.28k
        }
870
871
        /* Reset the error and edge count*/
872
54.3k
        for(edgeidx = 0; edgeidx < 5; edgeidx++)
873
45.3k
        {
874
45.3k
            acc_error_category[edgeidx] = 0;
875
45.3k
            category_count[edgeidx] = 0;
876
45.3k
        }
877
        // clang-format off
878
9.06k
        ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt,
879
9.06k
                s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category,
880
9.06k
                category_count);
881
        // clang-format on
882
883
        /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
884
        * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
885
        */
886
        // clang-format off
887
9.06k
        s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
888
9.06k
        s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0]
889
9.06k
                ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
890
9.06k
                : 0;
891
9.06k
        s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1]
892
9.06k
                ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
893
9.06k
                : 0;
894
9.06k
        s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3]
895
9.06k
                ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
896
9.06k
                : 0;
897
9.06k
        s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4]
898
9.06k
                ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
899
9.06k
                : 0;
900
9.06k
        s_best_luma_chroma_cand.b5_cb_band_pos = 0;
901
902
9.06k
        s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
903
9.06k
        s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0]
904
9.06k
                ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
905
9.06k
                : 0;
906
9.06k
        s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1]
907
9.06k
                ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
908
9.06k
                : 0;
909
9.06k
        s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3]
910
9.06k
                ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
911
9.06k
                : 0;
912
9.06k
        s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4]
913
9.06k
                ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
914
9.06k
                : 0;
915
        // clang-format on
916
9.06k
        s_best_luma_chroma_cand.b5_cr_band_pos = 0;
917
9.06k
    }
918
0
    else
919
0
    {
920
        /*Back up the top pixels for (x,y+1)th ctb*/
921
0
        if(!ps_sao_ctxt->i4_is_last_ctb_row)
922
0
        {
923
0
            memcpy(
924
0
                ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
925
0
                    ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
926
0
                pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
927
0
                ps_sao_ctxt->i4_sao_blk_wd);
928
0
        }
929
930
0
        s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
931
0
        s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
932
0
        s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
933
0
        s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
934
0
        s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
935
0
        s_best_luma_chroma_cand.b5_cb_band_pos = 0;
936
937
0
        s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
938
0
        s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
939
0
        s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
940
0
        s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
941
0
        s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
942
0
        s_best_luma_chroma_cand.b5_cr_band_pos = 0;
943
944
0
        s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
945
0
        s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
946
0
    }
947
948
9.06k
    s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
949
9.06k
    s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
950
951
    /*****************************************************/
952
    /**RDO for Best Luma - Chroma combined, No SAO,*******/
953
    /*************Left merge and Top merge****************/
954
    /*****************************************************/
955
956
    /* No SAO cand*/
957
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
958
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
959
960
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE;
961
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0;
962
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0;
963
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0;
964
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0;
965
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0;
966
967
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE;
968
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0;
969
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0;
970
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0;
971
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0;
972
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0;
973
974
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE;
975
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0;
976
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0;
977
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0;
978
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0;
979
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0;
980
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
981
9.06k
    ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
982
983
9.06k
    num_rdo_cand++;
984
985
    /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then
986
    the standard mandates that the merge candidates must be set to unavailable.
987
    Hence, check for tile boundary condition by reading
988
    s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB.
989
    A special case: Merge-candidates should be available at dependent-slices boundaries.
990
    Search for <SAO_note_01> in workspace to know more */
991
992
#if !DISABLE_SAO_WHEN_NOISY
993
    if(1)
994
#else
995
9.06k
    if(!u1_force_no_offset)
996
9.06k
#endif
997
9.06k
    {
998
        /* Merge left cand*/
999
9.06k
        if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail)
1000
5.11k
        {
1001
5.11k
            memcpy(
1002
5.11k
                &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1003
5.11k
                &ps_sao_ctxt->s_left_ctb_sao,
1004
5.11k
                sizeof(sao_enc_t));
1005
5.11k
            ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1;
1006
5.11k
            ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
1007
5.11k
            num_rdo_cand++;
1008
5.11k
        }
1009
1010
        /* Merge top cand*/
1011
9.06k
        if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail)
1012
5.28k
        {
1013
5.28k
            memcpy(
1014
5.28k
                &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1015
5.28k
                (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz),
1016
5.28k
                sizeof(sao_enc_t));
1017
5.28k
            ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
1018
5.28k
            ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1;
1019
5.28k
            num_rdo_cand++;
1020
5.28k
        }
1021
1022
        /* Best luma-chroma candidate*/
1023
9.06k
        memcpy(
1024
9.06k
            &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
1025
9.06k
            &s_best_luma_chroma_cand,
1026
9.06k
            sizeof(sao_enc_t));
1027
9.06k
        num_rdo_cand++;
1028
9.06k
    }
1029
1030
9.06k
    {
1031
9.06k
        UWORD32 luma_distortion = 0, chroma_distortion = 0;
1032
        /* First cand will be best cand after 1st iteration*/
1033
9.06k
        curr_buf_idx = 0;
1034
9.06k
        best_buf_idx = 1;
1035
9.06k
        best_cost = 0xFFFFFFFF;
1036
9.06k
        best_cand_idx = 0;
1037
1038
37.5k
        for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++)
1039
28.5k
        {
1040
28.5k
            s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
1041
1042
28.5k
            distortion = 0;
1043
1044
            /* This memcpy is required because cabac uses parameters from this structure
1045
            * to evaluate bits and this structure ptr is sent to cabac through
1046
            * "ihevce_cabac_rdo_encode_sao" function
1047
            */
1048
28.5k
            memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
1049
1050
28.5k
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1051
28.5k
            {
1052
                /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1053
                overwritten by the sao leaf level function for next ctb*/
1054
28.5k
                memcpy(
1055
28.5k
                    s_sao_ctxt.au1_left_luma_scratch,
1056
28.5k
                    ps_sao_ctxt->au1_sao_src_left_luma,
1057
28.5k
                    ps_sao_ctxt->i4_sao_blk_ht);
1058
1059
                /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1060
                overwritten by the sao leaf level function for next ctb*/
1061
28.5k
                memcpy(
1062
28.5k
                    s_sao_ctxt.au1_top_luma_scratch,
1063
28.5k
                    ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
1064
28.5k
                    ps_sao_ctxt->i4_sao_blk_wd + 2);
1065
28.5k
                s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
1066
1067
28.5k
                pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
1068
1069
                /* Copy the deblocked recon data to scratch buffer to do sao*/
1070
1071
28.5k
                ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1072
28.5k
                    pu1_luma_scratch_buf,
1073
28.5k
                    i4_luma_scratch_buf_stride,
1074
28.5k
                    pu1_recon_luma,
1075
28.5k
                    luma_recon_stride,
1076
28.5k
                    SCRATCH_BUF_STRIDE,
1077
28.5k
                    ctb_ht + 1);
1078
28.5k
                s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
1079
28.5k
                s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
1080
1081
28.5k
                ASSERT(
1082
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
1083
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
1084
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
1085
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
1086
28.5k
            }
1087
28.5k
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1088
28.5k
            {
1089
                /* Copy the left pixels to the scratch buffer for evry rdo cand because its
1090
                overwritten by the sao leaf level function for next ctb*/
1091
28.5k
                memcpy(
1092
28.5k
                    s_sao_ctxt.au1_left_chroma_scratch,
1093
28.5k
                    ps_sao_ctxt->au1_sao_src_left_chroma,
1094
28.5k
                    (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2);
1095
1096
                /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
1097
                overwritten by the sao leaf level function for next ctb*/
1098
28.5k
                memcpy(
1099
28.5k
                    s_sao_ctxt.au1_top_chroma_scratch,
1100
28.5k
                    ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2,
1101
28.5k
                    ps_sao_ctxt->i4_sao_blk_wd + 4);
1102
1103
28.5k
                s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2;
1104
1105
28.5k
                pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx];
1106
1107
                /* Copy the deblocked recon data to scratch buffer to do sao*/
1108
1109
28.5k
                ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1110
28.5k
                    pu1_chroma_scratch_buf,
1111
28.5k
                    i4_chroma_scratch_buf_stride,
1112
28.5k
                    pu1_recon_chroma,
1113
28.5k
                    chroma_recon_stride,
1114
28.5k
                    SCRATCH_BUF_STRIDE,
1115
28.5k
                    (ctb_ht >> !u1_is_422) + 1);
1116
1117
28.5k
                s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf;
1118
28.5k
                s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride;
1119
1120
28.5k
                ASSERT(
1121
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
1122
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
1123
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
1124
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
1125
28.5k
                ASSERT(
1126
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
1127
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
1128
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
1129
28.5k
                    (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
1130
28.5k
            }
1131
1132
28.5k
            ASSERT(
1133
28.5k
                (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
1134
28.5k
                (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
1135
28.5k
                (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
1136
1137
28.5k
            s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
1138
28.5k
            s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag;
1139
1140
28.5k
            ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
1141
1142
28.5k
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1143
28.5k
            {  // clang-format off
1144
28.5k
                luma_distortion =
1145
28.5k
                    ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
1146
28.5k
                            s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
1147
28.5k
                            s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
1148
28.5k
                            ctb_ht,
1149
28.5k
                            NULL_PLANE);
1150
28.5k
            }  // clang-format on
1151
1152
28.5k
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1153
28.5k
            {  // clang-format off
1154
28.5k
                chroma_distortion =
1155
28.5k
                    ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma,
1156
28.5k
                            s_sao_ctxt.pu1_cur_chroma_recon_buf,
1157
28.5k
                            chroma_src_stride,
1158
28.5k
                            s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
1159
28.5k
                            (ctb_ht >> !u1_is_422),
1160
28.5k
                            NULL_PLANE);
1161
28.5k
            }  // clang-format on
1162
1163
            /*chroma distortion is added after correction because of lambda difference*/
1164
28.5k
            distortion =
1165
28.5k
                luma_distortion +
1166
28.5k
                (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf));
1167
1168
28.5k
            ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
1169
28.5k
            ctb_bits = ihevce_cabac_rdo_encode_sao(
1170
28.5k
                ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
1171
1172
            /* Calculate the cost as D+(lamda)*R   */
1173
28.5k
            curr_cost = distortion +
1174
28.5k
                        COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
1175
1176
28.5k
            if(curr_cost < best_cost)
1177
16.2k
            {
1178
16.2k
                best_ctb_sao_bits = ctb_bits;
1179
16.2k
                best_cost = curr_cost;
1180
16.2k
                best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1181
16.2k
                best_cand_idx = rdo_cand;
1182
16.2k
                curr_buf_idx = !curr_buf_idx;
1183
16.2k
            }
1184
28.5k
        }
1185
        /*Adding sao bits to header bits*/
1186
9.06k
        *pu4_frame_rdopt_header_bits = best_ctb_sao_bits;
1187
1188
9.06k
        ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx);
1189
1190
        /* store the sao parameters of curr ctb for top merge and left merge*/
1191
9.06k
        memcpy(
1192
9.06k
            ps_sao_ctxt->ps_top_ctb_sao,
1193
9.06k
            &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1194
9.06k
            sizeof(sao_enc_t));
1195
9.06k
        memcpy(
1196
9.06k
            &ps_sao_ctxt->s_left_ctb_sao,
1197
9.06k
            &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1198
9.06k
            sizeof(sao_enc_t));
1199
1200
        /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/
1201
9.06k
        memcpy(
1202
9.06k
            &ps_ctb_enc_loop_out->s_sao,
1203
9.06k
            &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
1204
9.06k
            sizeof(sao_enc_t));
1205
1206
9.06k
        if(!ps_sao_ctxt->i4_is_last_ctb_col)
1207
5.11k
        {
1208
            /* Update left luma buffer for next ctb */
1209
289k
            for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++)
1210
284k
            {
1211
284k
                ps_sao_ctxt->au1_sao_src_left_luma[row] =
1212
284k
                    ps_sao_ctxt->pu1_cur_luma_recon_buf
1213
284k
                        [row * ps_sao_ctxt->i4_cur_luma_recon_stride +
1214
284k
                         (ps_sao_ctxt->i4_sao_blk_wd - 1)];
1215
284k
            }
1216
5.11k
        }
1217
1218
9.06k
        if(!ps_sao_ctxt->i4_is_last_ctb_col)
1219
5.11k
        {
1220
            /* Update left chroma buffer for next ctb */
1221
147k
            for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++)
1222
142k
            {
1223
142k
                *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) =
1224
142k
                    *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf +
1225
142k
                                 row * ps_sao_ctxt->i4_cur_chroma_recon_stride +
1226
142k
                                 (ps_sao_ctxt->i4_sao_blk_wd - 2));
1227
142k
            }
1228
5.11k
        }
1229
1230
9.06k
        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
1231
9.06k
        {
1232
            /* Copy the sao'ed output of the best candidate to the recon buffer*/
1233
1234
9.06k
            ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1235
9.06k
                ps_sao_ctxt->pu1_cur_luma_recon_buf,
1236
9.06k
                ps_sao_ctxt->i4_cur_luma_recon_stride,
1237
9.06k
                ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx],
1238
9.06k
                i4_luma_scratch_buf_stride,
1239
9.06k
                ctb_wd,
1240
9.06k
                ctb_ht);
1241
9.06k
        }
1242
9.06k
        if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
1243
9.06k
        {
1244
            /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/
1245
1246
9.06k
            ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
1247
9.06k
                ps_sao_ctxt->pu1_cur_chroma_recon_buf,
1248
9.06k
                ps_sao_ctxt->i4_cur_chroma_recon_stride,
1249
9.06k
                ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx],
1250
9.06k
                i4_chroma_scratch_buf_stride,
1251
9.06k
                ctb_wd,
1252
9.06k
                ctb_ht >> !u1_is_422);
1253
9.06k
        }
1254
9.06k
    }
1255
9.06k
}