Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
487M
#define SAO_SHIFT_CTB    8
79
80
81
82
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
83
12.5M
{
84
12.5M
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
85
12.5M
    UWORD8 *pu1_src_luma;
86
12.5M
    UWORD8 *pu1_src_chroma;
87
12.5M
    WORD32 src_strd;
88
12.5M
    WORD32 chroma_strd;
89
12.5M
    WORD32 ctb_size;
90
12.5M
    WORD32 log2_ctb_size;
91
12.5M
    sps_t *ps_sps;
92
12.5M
    sao_t *ps_sao;
93
12.5M
    pps_t *ps_pps;
94
12.5M
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
95
12.5M
    tile_t *ps_tile;
96
12.5M
    UWORD16 *pu1_slice_idx;
97
12.5M
    UWORD16 *pu1_tile_idx;
98
12.5M
    WORD32 row, col;
99
12.5M
    UWORD8 au1_avail_luma[8];
100
12.5M
    UWORD8 au1_avail_chroma[8];
101
12.5M
    UWORD8 au1_tile_slice_boundary[8];
102
12.5M
    UWORD8 au4_ilf_across_tile_slice_enable[8];
103
12.5M
    WORD32 i;
104
12.5M
    UWORD8 *pu1_src_top_luma;
105
12.5M
    UWORD8 *pu1_src_top_chroma;
106
12.5M
    UWORD8 *pu1_src_left_luma;
107
12.5M
    UWORD8 *pu1_src_left_chroma;
108
12.5M
    UWORD8 au1_src_top_right[2];
109
12.5M
    UWORD8 au1_src_bot_left[2];
110
12.5M
    UWORD8 *pu1_no_loop_filter_flag;
111
12.5M
    UWORD8 *pu1_src_backup_luma;
112
12.5M
    UWORD8 *pu1_src_backup_chroma;
113
12.5M
    WORD32 backup_strd;
114
12.5M
    WORD32 loop_filter_strd;
115
116
12.5M
    WORD32 no_loop_filter_enabled_luma = 0;
117
12.5M
    WORD32 no_loop_filter_enabled_chroma = 0;
118
12.5M
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
119
12.5M
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
120
12.5M
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
121
12.5M
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
122
12.5M
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
123
12.5M
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
124
12.5M
    UWORD8  u1_sao_src_top_left_luma_bot_left;
125
12.5M
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
126
12.5M
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
127
12.5M
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
128
    /* Only first 5 values are used, but arrays are large
129
     enough so that SIMD functions can read 64 bits at a time */
130
12.5M
    WORD8 ai1_offset_y[8] = {0};
131
12.5M
    WORD8 ai1_offset_cb[8] = {0};
132
12.5M
    WORD8 ai1_offset_cr[8] = {0};
133
12.5M
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
134
135
12.5M
    PROFILE_DISABLE_SAO();
136
137
12.5M
    ps_sps = ps_sao_ctxt->ps_sps;
138
12.5M
    ps_pps = ps_sao_ctxt->ps_pps;
139
12.5M
    ps_tile = ps_sao_ctxt->ps_tile;
140
141
12.5M
    WORD32 is_yuv444 = ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 1 : 0;
142
12.5M
    WORD32 h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2;
143
12.5M
    WORD32 v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1;
144
12.5M
    WORD32 chroma_pixel_strd = 2;
145
146
12.5M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
147
12.5M
    ctb_size = (1 << log2_ctb_size);
148
12.5M
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
149
12.5M
    chroma_strd = src_strd * chroma_pixel_strd / h_samp_factor;
150
12.5M
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
151
12.5M
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
152
153
12.5M
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
154
12.5M
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
155
12.5M
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
156
12.5M
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma
157
12.5M
                    + ((ps_sao_ctxt->i4_ctb_x * chroma_pixel_strd / h_samp_factor
158
12.5M
                    + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd * chroma_pixel_strd / (h_samp_factor * v_samp_factor)) << (log2_ctb_size));
159
160
    /*Stores the left value for each row ctbs- Needed for column tiles*/
161
12.5M
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
162
12.5M
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
163
12.5M
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
164
12.5M
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
165
12.5M
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
166
12.5M
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
167
12.5M
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
168
12.5M
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
169
12.5M
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
170
12.5M
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
171
172
12.5M
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
173
12.5M
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
174
12.5M
    backup_strd = 2 * MAX_CTB_SIZE;
175
176
12.5M
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
177
178
12.5M
    {
179
        /* Check the loop filter flags and copy the original values for back up */
180
        /* Luma */
181
182
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
183
         * can belong to different slice with their own sao_enable flag */
184
12.5M
        {
185
12.5M
            UWORD32 u4_no_loop_filter_flag;
186
12.5M
            WORD32 loop_filter_bit_pos;
187
12.5M
            WORD32 log2_min_cu = 3;
188
12.5M
            WORD32 min_cu = (1 << log2_min_cu);
189
12.5M
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
190
12.5M
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
191
12.5M
            WORD32 sao_blk_wd = ctb_size;
192
12.5M
            WORD32 remaining_rows;
193
12.5M
            WORD32 remaining_cols;
194
195
12.5M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
196
12.5M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
197
12.5M
            if(remaining_rows <= SAO_SHIFT_CTB)
198
512k
                sao_blk_ht += remaining_rows;
199
12.5M
            if(remaining_cols <= SAO_SHIFT_CTB)
200
587k
                sao_blk_wd += remaining_cols;
201
202
12.5M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
203
12.5M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
204
205
12.5M
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
206
207
12.5M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
208
12.5M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
209
12.5M
            if(ps_sao_ctxt->i4_ctb_x > 0)
210
11.9M
                loop_filter_bit_pos -= 1;
211
212
12.5M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
213
12.5M
                            (loop_filter_bit_pos >> 3);
214
215
12.5M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
216
55.5M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
217
43.0M
            {
218
43.0M
                WORD32 tmp_wd = sao_blk_wd;
219
220
43.0M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
221
43.0M
                                (loop_filter_bit_pos & 7);
222
43.0M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
223
224
43.0M
                if(u4_no_loop_filter_flag)
225
102k
                {
226
102k
                    no_loop_filter_enabled_luma = 1;
227
281k
                    while(tmp_wd > 0)
228
179k
                    {
229
179k
                        if(CTZ(u4_no_loop_filter_flag))
230
68.1k
                        {
231
68.1k
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
232
68.1k
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
233
68.1k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
234
68.1k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
235
68.1k
                        }
236
110k
                        else
237
110k
                        {
238
993k
                            for(row = 0; row < min_cu; row++)
239
883k
                            {
240
30.2M
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
241
29.3M
                                {
242
29.3M
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
243
29.3M
                                }
244
883k
                            }
245
110k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
246
110k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
247
110k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
248
110k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
249
110k
                        }
250
179k
                    }
251
252
102k
                    pu1_src_tmp_luma -= sao_blk_wd;
253
102k
                    pu1_src_backup_luma -= sao_blk_wd;
254
102k
                }
255
256
43.0M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
257
43.0M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
258
43.0M
            }
259
12.5M
        }
260
261
        /* Chroma */
262
263
12.5M
        if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
264
12.5M
        {
265
12.5M
            UWORD32 u4_no_loop_filter_flag;
266
12.5M
            WORD32 loop_filter_bit_pos;
267
12.5M
            WORD32 log2_min_cu = 3;
268
12.5M
            WORD32 min_cu = (1 << log2_min_cu);
269
12.5M
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
270
12.5M
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
271
12.5M
            WORD32 sao_blk_wd = ctb_size;
272
12.5M
            WORD32 remaining_rows;
273
12.5M
            WORD32 remaining_cols;
274
275
12.5M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
276
12.5M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
277
12.5M
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
278
512k
                sao_blk_ht += remaining_rows;
279
12.5M
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
280
587k
                sao_blk_wd += remaining_cols;
281
282
12.5M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
283
12.5M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * chroma_strd : 0;
284
285
12.5M
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
286
287
12.5M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
288
12.5M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
289
12.5M
            if(ps_sao_ctxt->i4_ctb_x > 0)
290
11.9M
                loop_filter_bit_pos -= (is_yuv444 ? 1 : 2);
291
292
12.5M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
293
12.5M
                            (loop_filter_bit_pos >> 3);
294
295
12.5M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
296
55.5M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
297
43.0M
            {
298
43.0M
                WORD32 tmp_wd = sao_blk_wd;
299
300
43.0M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
301
43.0M
                                (loop_filter_bit_pos & 7);
302
43.0M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
303
304
43.0M
                if(u4_no_loop_filter_flag)
305
102k
                {
306
102k
                    no_loop_filter_enabled_chroma = 1;
307
280k
                    while(tmp_wd > 0)
308
178k
                    {
309
178k
                        if(CTZ(u4_no_loop_filter_flag))
310
68.4k
                        {
311
68.4k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * chroma_pixel_strd / h_samp_factor;
312
68.4k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * chroma_pixel_strd / h_samp_factor;
313
68.4k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
314
68.4k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
315
68.4k
                        }
316
109k
                        else
317
109k
                        {
318
548k
                            for(row = 0; row < min_cu / v_samp_factor; row++)
319
438k
                            {
320
16.2M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * chroma_pixel_strd / h_samp_factor; col++)
321
15.8M
                                {
322
15.8M
                                    pu1_src_backup_chroma[row * backup_strd * (chroma_pixel_strd / h_samp_factor) + col] = pu1_src_tmp_chroma[row * chroma_strd + col];
323
15.8M
                                }
324
438k
                            }
325
326
109k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * chroma_pixel_strd / h_samp_factor;
327
109k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * chroma_pixel_strd / h_samp_factor;
328
109k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
329
109k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
330
109k
                        }
331
178k
                    }
332
333
102k
                    pu1_src_tmp_chroma -= sao_blk_wd * (chroma_pixel_strd / h_samp_factor);
334
102k
                    pu1_src_backup_chroma -= sao_blk_wd * (chroma_pixel_strd / h_samp_factor);
335
102k
                }
336
337
43.0M
                pu1_src_tmp_chroma += (((src_strd * chroma_pixel_strd) / (h_samp_factor * v_samp_factor)) << log2_min_cu);
338
43.0M
                pu1_src_backup_chroma += (((backup_strd * chroma_pixel_strd) / (h_samp_factor * v_samp_factor)) << log2_min_cu);
339
43.0M
            }
340
12.5M
        }
341
12.5M
    }
342
343
12.5M
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
344
345
    /* Top-left CTB */
346
12.5M
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
347
11.4M
    {
348
11.4M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
349
11.4M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
350
11.4M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
351
11.4M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
352
353
11.4M
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
354
11.4M
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
355
11.4M
        WORD32 au4_idx_tl[8], idx_tl;
356
357
11.4M
        slice_header_t *ps_slice_hdr_top_left;
358
11.4M
        {
359
11.4M
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
360
11.4M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
361
11.4M
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
362
11.4M
        }
363
364
365
11.4M
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
366
11.4M
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * chroma_strd);
367
11.4M
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
368
11.4M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
369
11.4M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor) - sao_wd_chroma;
370
11.4M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
371
11.4M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) * (chroma_pixel_strd / v_samp_factor) - (2 * sao_ht_chroma);
372
373
11.4M
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
374
4.48M
        {
375
4.48M
            if(0 == ps_sao->b3_y_type_idx)
376
4.30M
            {
377
                /* Update left, top and top-left */
378
38.1M
                for(row = 0; row < sao_ht_luma; row++)
379
33.8M
                {
380
33.8M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
381
33.8M
                }
382
4.30M
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
383
384
4.30M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
385
386
387
4.30M
            }
388
389
179k
            else if(1 == ps_sao->b3_y_type_idx)
390
96.4k
            {
391
96.4k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
392
96.4k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
393
96.4k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
394
96.4k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
395
396
96.4k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
397
96.4k
                                                                          src_strd,
398
96.4k
                                                                          pu1_src_left_luma,
399
96.4k
                                                                          pu1_src_top_luma,
400
96.4k
                                                                          pu1_sao_src_luma_top_left_ctb,
401
96.4k
                                                                          ps_sao->b5_y_band_pos,
402
96.4k
                                                                          ai1_offset_y,
403
96.4k
                                                                          sao_wd_luma,
404
96.4k
                                                                          sao_ht_luma
405
96.4k
                                                                         );
406
96.4k
            }
407
408
82.9k
            else // if(2 <= ps_sao->b3_y_type_idx)
409
82.9k
            {
410
82.9k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
411
82.9k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
412
82.9k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
413
82.9k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
414
415
747k
                for(i = 0; i < 8; i++)
416
664k
                {
417
664k
                    au1_avail_luma[i] = 255;
418
664k
                    au1_tile_slice_boundary[i] = 0;
419
664k
                    au4_idx_tl[i] = 0;
420
664k
                    au4_ilf_across_tile_slice_enable[i] = 1;
421
664k
                }
422
423
                /******************************************************************
424
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
425
                 *
426
                 *          TL_T
427
                 *       4  _2__5________
428
                 *     0   |    |       |
429
                 *    TL_L | TL | 1 TL_R|
430
                 *         |____|_______|____
431
                 *        6|TL_D|7      |    |
432
                 *         | 3  |       |    |
433
                 *         |____|_______|    |
434
                 *              |            |
435
                 *              |            |
436
                 *              |____________|
437
                 *
438
                 *****************************************************************/
439
440
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
441
82.9k
                {
442
82.9k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
443
51.4k
                    {
444
51.4k
                        {
445
                            /*Assuming that sao shift is uniform along x and y directions*/
446
51.4k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
447
0
                            {
448
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
449
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
450
0
                            }
451
51.4k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
452
51.4k
                            {
453
51.4k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
454
51.4k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
455
51.4k
                            }
456
51.4k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
457
51.4k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
458
459
51.4k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
460
51.4k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
461
462
51.4k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
463
51.4k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
464
465
51.4k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
466
51.4k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
467
51.4k
                        }
468
469
51.4k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
470
7.70k
                        {
471
                            /*Calculate slice indices for neighbor pixels*/
472
7.70k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
473
7.70k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
474
7.70k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
475
7.70k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
476
7.70k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
477
7.70k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
478
479
7.70k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
480
0
                            {
481
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
482
0
                                {
483
0
                                    au4_idx_tl[6] = -1;
484
0
                                    au4_idx_tl[4] = -1;
485
0
                                }
486
0
                                else
487
0
                                {
488
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
489
0
                                }
490
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
491
0
                                {
492
0
                                    au4_idx_tl[5] = -1;
493
0
                                    au4_idx_tl[4] = -1;
494
0
                                }
495
0
                                else
496
0
                                {
497
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
498
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
499
0
                                }
500
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
501
0
                            }
502
503
                            /* Verify that the neighbor ctbs dont cross pic boundary.
504
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
505
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
506
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
507
                             * the respective pixel's flags are checked
508
                             */
509
510
7.70k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
511
0
                            {
512
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
513
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
514
0
                            }
515
7.70k
                            else
516
7.70k
                            {
517
7.70k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
518
7.70k
                            }
519
7.70k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
520
0
                            {
521
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
522
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
523
0
                            }
524
7.70k
                            else
525
7.70k
                            {
526
7.70k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
527
7.70k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
528
7.70k
                            }
529
7.70k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
530
7.70k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
531
7.70k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
532
7.70k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
533
7.70k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
534
535
7.70k
                            if(au4_idx_tl[5] > idx_tl)
536
872
                            {
537
872
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
538
872
                            }
539
540
                            /*
541
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
542
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
543
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
544
                             * the respective pixel's flags are checked
545
                             */
546
69.2k
                            for(i = 0; i < 8; i++)
547
61.5k
                            {
548
                                /*Sets the edges that lie on the slice/tile boundary*/
549
61.5k
                                if(au4_idx_tl[i] != idx_tl)
550
16.7k
                                {
551
16.7k
                                    au1_tile_slice_boundary[i] = 1;
552
16.7k
                                }
553
44.8k
                                else
554
44.8k
                                {
555
44.8k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
556
44.8k
                                }
557
61.5k
                            }
558
559
7.70k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
560
7.70k
                        }
561
562
51.4k
                        if(ps_pps->i1_tiles_enabled_flag)
563
43.8k
                        {
564
                            /* Calculate availability flags at slice boundary */
565
43.8k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
566
21.3k
                            {
567
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
568
21.3k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
569
19.7k
                                {
570
                                    /*Set the boundary arrays*/
571
                                    /*Calculate tile indices for neighbor pixels*/
572
19.7k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
573
19.7k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
574
19.7k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
575
19.7k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
576
19.7k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
577
19.7k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
578
579
19.7k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
580
0
                                    {
581
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
582
0
                                        {
583
0
                                            au4_idx_tl[6] = -1;
584
0
                                            au4_idx_tl[4] = -1;
585
0
                                        }
586
0
                                        else
587
0
                                        {
588
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
589
0
                                        }
590
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
591
0
                                        {
592
0
                                            au4_idx_tl[5] = -1;
593
0
                                            au4_idx_tl[4] = -1;
594
0
                                        }
595
0
                                        else
596
0
                                        {
597
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
598
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
599
0
                                        }
600
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
601
0
                                    }
602
178k
                                    for(i = 0; i < 8; i++)
603
158k
                                    {
604
                                        /*Sets the edges that lie on the tile boundary*/
605
158k
                                        if(au4_idx_tl[i] != idx_tl)
606
60.7k
                                        {
607
60.7k
                                            au1_tile_slice_boundary[i] |= 1;
608
60.7k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
609
60.7k
                                        }
610
158k
                                    }
611
19.7k
                                }
612
21.3k
                            }
613
43.8k
                        }
614
615
616
                        /*Set availability flags based on tile and slice boundaries*/
617
463k
                        for(i = 0; i < 8; i++)
618
411k
                        {
619
                            /*Sets the edges that lie on the slice/tile boundary*/
620
411k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
621
60.7k
                            {
622
60.7k
                                au1_avail_luma[i] = 0;
623
60.7k
                            }
624
411k
                        }
625
51.4k
                    }
626
82.9k
                }
627
628
82.9k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
629
0
                {
630
0
                    au1_avail_luma[0] = 0;
631
0
                    au1_avail_luma[4] = 0;
632
0
                    au1_avail_luma[6] = 0;
633
0
                }
634
635
82.9k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
636
0
                {
637
0
                    au1_avail_luma[1] = 0;
638
0
                    au1_avail_luma[5] = 0;
639
0
                    au1_avail_luma[7] = 0;
640
0
                }
641
                //y==1 case
642
82.9k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
643
0
                {
644
0
                    au1_avail_luma[2] = 0;
645
0
                    au1_avail_luma[4] = 0;
646
0
                    au1_avail_luma[5] = 0;
647
0
                }
648
82.9k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
649
0
                {
650
0
                    au1_avail_luma[3] = 0;
651
0
                    au1_avail_luma[6] = 0;
652
0
                    au1_avail_luma[7] = 0;
653
0
                }
654
655
82.9k
                {
656
82.9k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
657
82.9k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
658
82.9k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
659
82.9k
                                                                      src_strd,
660
82.9k
                                                                      pu1_src_left_luma,
661
82.9k
                                                                      pu1_src_top_luma,
662
82.9k
                                                                      pu1_sao_src_luma_top_left_ctb,
663
82.9k
                                                                      au1_src_top_right,
664
82.9k
                                                                      &u1_sao_src_top_left_luma_bot_left,
665
82.9k
                                                                      au1_avail_luma,
666
82.9k
                                                                      ai1_offset_y,
667
82.9k
                                                                      sao_wd_luma,
668
82.9k
                                                                      sao_ht_luma);
669
82.9k
                }
670
82.9k
            }
671
672
4.48M
        }
673
7.00M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
674
3.54M
        {
675
            /* Update left, top and top-left */
676
31.6M
            for(row = 0; row < sao_ht_luma; row++)
677
28.0M
            {
678
28.0M
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
679
28.0M
            }
680
3.54M
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
681
682
3.54M
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
683
3.54M
        }
684
685
11.4M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
686
5.10M
        {
687
5.10M
            if(0 == ps_sao->b3_cb_type_idx)
688
4.97M
            {
689
44.5M
                for(row = 0; row < sao_ht_chroma; row++)
690
39.5M
                {
691
39.5M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
692
39.5M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
693
39.5M
                }
694
4.97M
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
695
4.97M
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
696
697
4.97M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
698
699
4.97M
            }
700
701
136k
            else if(1 == ps_sao->b3_cb_type_idx)
702
68.6k
            {
703
68.6k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
704
68.6k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
705
68.6k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
706
68.6k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
707
708
68.6k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
709
68.6k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
710
68.6k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
711
68.6k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
712
713
68.6k
                if(chroma_yuv420sp_vu)
714
10.1k
                {
715
10.1k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
716
10.1k
                                                                                chroma_strd,
717
10.1k
                                                                                pu1_src_left_chroma,
718
10.1k
                                                                                pu1_src_top_chroma,
719
10.1k
                                                                                pu1_sao_src_chroma_top_left_ctb,
720
10.1k
                                                                                ps_sao->b5_cr_band_pos,
721
10.1k
                                                                                ps_sao->b5_cb_band_pos,
722
10.1k
                                                                                ai1_offset_cr,
723
10.1k
                                                                                ai1_offset_cb,
724
10.1k
                                                                                sao_wd_chroma,
725
10.1k
                                                                                sao_ht_chroma
726
10.1k
                                                                               );
727
10.1k
                }
728
58.4k
                else
729
58.4k
                {
730
58.4k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
731
58.4k
                                                                                chroma_strd,
732
58.4k
                                                                                pu1_src_left_chroma,
733
58.4k
                                                                                pu1_src_top_chroma,
734
58.4k
                                                                                pu1_sao_src_chroma_top_left_ctb,
735
58.4k
                                                                                ps_sao->b5_cb_band_pos,
736
58.4k
                                                                                ps_sao->b5_cr_band_pos,
737
58.4k
                                                                                ai1_offset_cb,
738
58.4k
                                                                                ai1_offset_cr,
739
58.4k
                                                                                sao_wd_chroma,
740
58.4k
                                                                                sao_ht_chroma
741
58.4k
                                                                               );
742
58.4k
                }
743
68.6k
            }
744
745
67.6k
            else // if(2 <= ps_sao->b3_cb_type_idx)
746
67.6k
            {
747
67.6k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
748
67.6k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
749
67.6k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
750
67.6k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
751
752
67.6k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
753
67.6k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
754
67.6k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
755
67.6k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
756
620k
                for(i = 0; i < 8; i++)
757
552k
                {
758
552k
                    au1_avail_chroma[i] = 255;
759
552k
                    au1_tile_slice_boundary[i] = 0;
760
552k
                    au4_idx_tl[i] = 0;
761
552k
                    au4_ilf_across_tile_slice_enable[i] = 1;
762
552k
                }
763
                /*In case of slices*/
764
67.6k
                {
765
67.6k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
766
45.3k
                    {
767
45.3k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
768
443
                        {
769
443
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
770
443
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
771
443
                        }
772
44.9k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
773
43.7k
                        {
774
43.7k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
775
43.7k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
776
43.7k
                        }
777
45.3k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
778
45.3k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
779
780
45.3k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
781
45.3k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
782
783
45.3k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
784
45.3k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
785
786
45.3k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
787
45.3k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
788
789
45.3k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
790
4.32k
                        {
791
792
4.32k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
793
4.32k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
794
4.32k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
795
4.32k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
796
4.32k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
797
4.32k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
798
799
4.32k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
800
11
                            {
801
11
                                if(ps_sao_ctxt->i4_ctb_x == 1)
802
0
                                {
803
0
                                    au4_idx_tl[6] = -1;
804
0
                                    au4_idx_tl[4] = -1;
805
0
                                }
806
11
                                else
807
11
                                {
808
11
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
809
11
                                }
810
11
                                if(ps_sao_ctxt->i4_ctb_y == 1)
811
9
                                {
812
9
                                    au4_idx_tl[5] = -1;
813
9
                                    au4_idx_tl[4] = -1;
814
9
                                }
815
2
                                else
816
2
                                {
817
2
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
818
2
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
819
2
                                }
820
11
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
821
11
                            }
822
823
                            /* Verify that the neighbor ctbs don't cross pic boundary
824
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
825
4.32k
                            if((0 == ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor)) - sao_wd_chroma))
826
0
                            {
827
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
828
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
829
0
                            }
830
4.32k
                            else
831
4.32k
                            {
832
4.32k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
833
4.32k
                            }
834
4.32k
                            if((0 == ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) / v_samp_factor) - sao_ht_chroma))
835
9
                            {
836
9
                                au4_ilf_across_tile_slice_enable[5] = 0;
837
9
                                au4_ilf_across_tile_slice_enable[4] = 0;
838
9
                            }
839
4.31k
                            else
840
4.31k
                            {
841
4.31k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
842
4.31k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
843
4.31k
                            }
844
4.32k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
845
4.32k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
846
4.32k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
847
4.32k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
848
4.32k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
849
                            /*
850
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
851
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
852
                             */
853
38.8k
                            for(i = 0; i < 8; i++)
854
34.5k
                            {
855
                                /*Sets the edges that lie on the slice/tile boundary*/
856
34.5k
                                if(au4_idx_tl[i] != idx_tl)
857
6.14k
                                {
858
6.14k
                                    au1_tile_slice_boundary[i] = 1;
859
6.14k
                                }
860
28.3k
                                else
861
28.3k
                                {
862
28.3k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
863
28.3k
                                }
864
34.5k
                            }
865
866
                            /*Reset indices*/
867
38.8k
                            for(i = 0; i < 8; i++)
868
34.5k
                            {
869
34.5k
                                au4_idx_tl[i] = 0;
870
34.5k
                            }
871
4.32k
                        }
872
45.3k
                        if(ps_pps->i1_tiles_enabled_flag)
873
41.2k
                        {
874
                            /* Calculate availability flags at slice boundary */
875
41.2k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
876
20.8k
                            {
877
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
878
20.8k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
879
19.6k
                                {
880
                                    /*Set the boundary arrays*/
881
                                    /*Calculate tile indices for neighbor pixels*/
882
19.6k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
883
19.6k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
884
19.6k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
885
19.6k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
886
19.6k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
887
19.6k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
888
889
19.6k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
890
0
                                    {
891
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
892
0
                                        {
893
0
                                            au4_idx_tl[6] = -1;
894
0
                                            au4_idx_tl[4] = -1;
895
0
                                        }
896
0
                                        else
897
0
                                        {
898
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
899
0
                                        }
900
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
901
0
                                        {
902
0
                                            au4_idx_tl[5] = -1;
903
0
                                            au4_idx_tl[4] = -1;
904
0
                                        }
905
0
                                        else
906
0
                                        {
907
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
908
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
909
0
                                        }
910
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
911
0
                                    }
912
177k
                                    for(i = 0; i < 8; i++)
913
157k
                                    {
914
                                        /*Sets the edges that lie on the tile boundary*/
915
157k
                                        if(au4_idx_tl[i] != idx_tl)
916
61.9k
                                        {
917
61.9k
                                            au1_tile_slice_boundary[i] |= 1;
918
61.9k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
919
61.9k
                                        }
920
157k
                                    }
921
19.6k
                                }
922
20.8k
                            }
923
41.2k
                        }
924
925
408k
                        for(i = 0; i < 8; i++)
926
362k
                        {
927
                            /*Sets the edges that lie on the slice/tile boundary*/
928
362k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
929
62.0k
                            {
930
62.0k
                                au1_avail_chroma[i] = 0;
931
62.0k
                            }
932
362k
                        }
933
45.3k
                    }
934
67.6k
                }
935
936
67.6k
                if(0 == ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor)) - sao_wd_chroma)
937
184
                {
938
184
                    au1_avail_chroma[0] = 0;
939
184
                    au1_avail_chroma[4] = 0;
940
184
                    au1_avail_chroma[6] = 0;
941
184
                }
942
67.6k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
943
0
                {
944
0
                    au1_avail_chroma[1] = 0;
945
0
                    au1_avail_chroma[5] = 0;
946
0
                    au1_avail_chroma[7] = 0;
947
0
                }
948
949
67.6k
                if(0 == ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) / v_samp_factor) - sao_ht_chroma)
950
1.24k
                {
951
1.24k
                    au1_avail_chroma[2] = 0;
952
1.24k
                    au1_avail_chroma[4] = 0;
953
1.24k
                    au1_avail_chroma[5] = 0;
954
1.24k
                }
955
67.6k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
956
0
                {
957
0
                    au1_avail_chroma[3] = 0;
958
0
                    au1_avail_chroma[6] = 0;
959
0
                    au1_avail_chroma[7] = 0;
960
0
                }
961
962
67.6k
                {
963
67.6k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
964
67.6k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
965
67.6k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
966
67.6k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
967
67.6k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
968
2.00k
                    {
969
2.00k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * chroma_strd - 2];
970
2.00k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * chroma_strd - 1];
971
2.00k
                    }
972
973
67.6k
                    if(chroma_yuv420sp_vu)
974
10.4k
                    {
975
10.4k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
976
10.4k
                                                                             chroma_strd,
977
10.4k
                                                                             pu1_src_left_chroma,
978
10.4k
                                                                             pu1_src_top_chroma,
979
10.4k
                                                                             pu1_sao_src_chroma_top_left_ctb,
980
10.4k
                                                                             au1_src_top_right,
981
10.4k
                                                                             au1_sao_src_top_left_chroma_bot_left,
982
10.4k
                                                                             au1_avail_chroma,
983
10.4k
                                                                             ai1_offset_cr,
984
10.4k
                                                                             ai1_offset_cb,
985
10.4k
                                                                             sao_wd_chroma,
986
10.4k
                                                                             sao_ht_chroma);
987
10.4k
                    }
988
57.2k
                    else
989
57.2k
                    {
990
57.2k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
991
57.2k
                                                                             chroma_strd,
992
57.2k
                                                                             pu1_src_left_chroma,
993
57.2k
                                                                             pu1_src_top_chroma,
994
57.2k
                                                                             pu1_sao_src_chroma_top_left_ctb,
995
57.2k
                                                                             au1_src_top_right,
996
57.2k
                                                                             au1_sao_src_top_left_chroma_bot_left,
997
57.2k
                                                                             au1_avail_chroma,
998
57.2k
                                                                             ai1_offset_cb,
999
57.2k
                                                                             ai1_offset_cr,
1000
57.2k
                                                                             sao_wd_chroma,
1001
57.2k
                                                                             sao_ht_chroma);
1002
57.2k
                    }
1003
67.6k
                }
1004
67.6k
            }
1005
5.10M
        }
1006
6.37M
        else if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)))
1007
3.11M
        {
1008
27.9M
            for(row = 0; row < sao_ht_chroma; row++)
1009
24.8M
            {
1010
24.8M
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
1011
24.8M
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
1012
24.8M
            }
1013
3.11M
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1014
3.11M
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1015
1016
3.11M
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
1017
3.11M
        }
1018
1019
11.4M
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1020
11.4M
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * chroma_strd;
1021
11.4M
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1022
11.4M
    }
1023
1024
1025
    /* Top CTB */
1026
12.5M
    if((ps_sao_ctxt->i4_ctb_y > 0))
1027
11.9M
    {
1028
11.9M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1029
11.9M
        WORD32 sao_wd_chroma = ctb_size * (chroma_pixel_strd / h_samp_factor) - 2 * SAO_SHIFT_CTB;
1030
11.9M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1031
11.9M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1032
1033
11.9M
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1034
11.9M
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1035
11.9M
        WORD32 au4_idx_t[8], idx_t;
1036
1037
11.9M
        WORD32 remaining_cols;
1038
1039
11.9M
        slice_header_t *ps_slice_hdr_top;
1040
11.9M
        {
1041
11.9M
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1042
11.9M
                                        (ps_sao_ctxt->i4_ctb_x);
1043
11.9M
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1044
11.9M
        }
1045
1046
11.9M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1047
11.9M
        if(remaining_cols <= SAO_SHIFT_CTB)
1048
556k
        {
1049
556k
            sao_wd_luma += remaining_cols;
1050
556k
        }
1051
11.9M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples * (chroma_pixel_strd / h_samp_factor) - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor) + sao_wd_chroma);
1052
11.9M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1053
556k
        {
1054
556k
            sao_wd_chroma += remaining_cols;
1055
556k
        }
1056
1057
11.9M
        pu1_src_luma -= (sao_ht_luma * src_strd);
1058
11.9M
        pu1_src_chroma -= (sao_ht_chroma * chroma_strd);
1059
11.9M
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1060
11.9M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1061
11.9M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor);
1062
11.9M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1063
11.9M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) * (chroma_pixel_strd / v_samp_factor) - (2 * sao_ht_chroma);
1064
1065
11.9M
        if(0 != sao_wd_luma)
1066
11.9M
        {
1067
11.9M
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1068
4.56M
            {
1069
4.56M
                if(0 == ps_sao->b3_y_type_idx)
1070
4.37M
                {
1071
                    /* Update left, top and top-left */
1072
39.0M
                    for(row = 0; row < sao_ht_luma; row++)
1073
34.7M
                    {
1074
34.7M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1075
34.7M
                    }
1076
4.37M
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1077
1078
4.37M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1079
1080
4.37M
                }
1081
1082
195k
                else if(1 == ps_sao->b3_y_type_idx)
1083
104k
                {
1084
104k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1085
104k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1086
104k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1087
104k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1088
1089
104k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1090
104k
                                                                              src_strd,
1091
104k
                                                                              pu1_src_left_luma,
1092
104k
                                                                              pu1_src_top_luma,
1093
104k
                                                                              pu1_sao_src_luma_top_left_ctb,
1094
104k
                                                                              ps_sao->b5_y_band_pos,
1095
104k
                                                                              ai1_offset_y,
1096
104k
                                                                              sao_wd_luma,
1097
104k
                                                                              sao_ht_luma
1098
104k
                                                                             );
1099
104k
                }
1100
1101
91.2k
                else // if(2 <= ps_sao->b3_y_type_idx)
1102
91.2k
                {
1103
91.2k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1104
91.2k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1105
91.2k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1106
91.2k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1107
1108
91.2k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1109
91.2k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1110
91.2k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1111
1112
827k
                    for(i = 0; i < 8; i++)
1113
736k
                    {
1114
1115
736k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1116
736k
                    }
1117
                    /******************************************************************
1118
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1119
                     *
1120
                     *               T_T
1121
                     *          ____________
1122
                     *         |    |       |
1123
                     *         | T_L|  T    |T_R
1124
                     *         |    | ______|____
1125
                     *         |    |  T_D  |    |
1126
                     *         |    |       |    |
1127
                     *         |____|_______|    |
1128
                     *              |            |
1129
                     *              |            |
1130
                     *              |____________|
1131
                     *
1132
                     *****************************************************************/
1133
1134
                    /*In case of slices*/
1135
91.2k
                    {
1136
91.2k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1137
55.4k
                        {
1138
1139
55.4k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1140
55.4k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1141
1142
55.4k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1143
55.4k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1144
1145
55.4k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1146
55.4k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1147
1148
55.4k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1149
55.4k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1150
1151
55.4k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1152
55.4k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1153
1154
55.4k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1155
7.93k
                            {
1156
                                /*Calculate neighbor ctb slice indices*/
1157
7.93k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1158
877
                                {
1159
877
                                    au4_idx_t[0] = -1;
1160
877
                                    au4_idx_t[6] = -1;
1161
877
                                    au4_idx_t[4] = -1;
1162
877
                                }
1163
7.05k
                                else
1164
7.05k
                                {
1165
7.05k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1166
7.05k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1167
7.05k
                                }
1168
7.93k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1169
7.93k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1170
7.93k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1171
7.93k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1172
1173
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1174
7.93k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1175
877
                                {
1176
877
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1177
877
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1178
877
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1179
877
                                }
1180
7.05k
                                else
1181
7.05k
                                {
1182
7.05k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1183
7.05k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1184
7.05k
                                }
1185
1186
1187
1188
7.93k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1189
7.93k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1190
7.93k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1191
7.93k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1192
7.93k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1193
1194
7.93k
                                if(au4_idx_t[6] < idx_t)
1195
877
                                {
1196
877
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1197
877
                                }
1198
1199
                                /*
1200
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1201
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1202
                                 */
1203
1204
71.3k
                                for(i = 0; i < 8; i++)
1205
63.4k
                                {
1206
                                    /*Sets the edges that lie on the slice/tile boundary*/
1207
63.4k
                                    if(au4_idx_t[i] != idx_t)
1208
18.6k
                                    {
1209
18.6k
                                        au1_tile_slice_boundary[i] = 1;
1210
                                        /*Check for slice flag at such boundaries*/
1211
18.6k
                                    }
1212
44.7k
                                    else
1213
44.7k
                                    {
1214
44.7k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1215
44.7k
                                    }
1216
63.4k
                                }
1217
                                /*Reset indices*/
1218
71.4k
                                for(i = 0; i < 8; i++)
1219
63.4k
                                {
1220
63.4k
                                    au4_idx_t[i] = 0;
1221
63.4k
                                }
1222
7.93k
                            }
1223
1224
55.4k
                            if(ps_pps->i1_tiles_enabled_flag)
1225
47.5k
                            {
1226
                                /* Calculate availability flags at slice boundary */
1227
47.5k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1228
23.5k
                                {
1229
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1230
23.5k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1231
21.8k
                                    {
1232
                                        /*Calculate neighbor ctb slice indices*/
1233
21.8k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1234
4.08k
                                        {
1235
4.08k
                                            au4_idx_t[0] = -1;
1236
4.08k
                                            au4_idx_t[6] = -1;
1237
4.08k
                                            au4_idx_t[4] = -1;
1238
4.08k
                                        }
1239
17.7k
                                        else
1240
17.7k
                                        {
1241
17.7k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1242
17.7k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1243
17.7k
                                        }
1244
21.8k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1245
21.8k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1246
21.8k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1247
21.8k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1248
1249
196k
                                        for(i = 0; i < 8; i++)
1250
174k
                                        {
1251
                                            /*Sets the edges that lie on the tile boundary*/
1252
174k
                                            if(au4_idx_t[i] != idx_t)
1253
71.8k
                                            {
1254
71.8k
                                                au1_tile_slice_boundary[i] |= 1;
1255
71.8k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1256
71.8k
                                            }
1257
174k
                                        }
1258
21.8k
                                    }
1259
23.5k
                                }
1260
47.5k
                            }
1261
1262
499k
                            for(i = 0; i < 8; i++)
1263
443k
                            {
1264
                                /*Sets the edges that lie on the slice/tile boundary*/
1265
443k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1266
73.6k
                                {
1267
73.6k
                                    au1_avail_luma[i] = 0;
1268
73.6k
                                }
1269
443k
                            }
1270
55.4k
                        }
1271
91.2k
                    }
1272
1273
1274
91.2k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1275
13.4k
                    {
1276
13.4k
                        au1_avail_luma[0] = 0;
1277
13.4k
                        au1_avail_luma[4] = 0;
1278
13.4k
                        au1_avail_luma[6] = 0;
1279
13.4k
                    }
1280
1281
91.2k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1282
8.99k
                    {
1283
8.99k
                        au1_avail_luma[1] = 0;
1284
8.99k
                        au1_avail_luma[5] = 0;
1285
8.99k
                        au1_avail_luma[7] = 0;
1286
8.99k
                    }
1287
1288
91.2k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1289
0
                    {
1290
0
                        au1_avail_luma[2] = 0;
1291
0
                        au1_avail_luma[4] = 0;
1292
0
                        au1_avail_luma[5] = 0;
1293
0
                    }
1294
1295
91.2k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1296
0
                    {
1297
0
                        au1_avail_luma[3] = 0;
1298
0
                        au1_avail_luma[6] = 0;
1299
0
                        au1_avail_luma[7] = 0;
1300
0
                    }
1301
1302
91.2k
                    {
1303
91.2k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1304
91.2k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1305
91.2k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1306
91.2k
                                                                          src_strd,
1307
91.2k
                                                                          pu1_src_left_luma,
1308
91.2k
                                                                          pu1_src_top_luma,
1309
91.2k
                                                                          pu1_sao_src_luma_top_left_ctb,
1310
91.2k
                                                                          au1_src_top_right,
1311
91.2k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1312
91.2k
                                                                          au1_avail_luma,
1313
91.2k
                                                                          ai1_offset_y,
1314
91.2k
                                                                          sao_wd_luma,
1315
91.2k
                                                                          sao_ht_luma);
1316
91.2k
                    }
1317
91.2k
                }
1318
4.56M
            }
1319
7.38M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1320
3.57M
            {
1321
                /* Update left, top and top-left */
1322
31.9M
                for(row = 0; row < sao_ht_luma; row++)
1323
28.3M
                {
1324
28.3M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1325
28.3M
                }
1326
3.57M
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1327
1328
3.57M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1329
3.57M
            }
1330
11.9M
        }
1331
1332
11.9M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_wd_chroma)
1333
4.64M
        {
1334
4.64M
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1335
1.57M
            {
1336
1.57M
                if(0 == ps_sao->b3_cb_type_idx)
1337
1.42M
                {
1338
1339
12.8M
                    for(row = 0; row < sao_ht_chroma; row++)
1340
11.4M
                    {
1341
11.4M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
1342
11.4M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
1343
11.4M
                    }
1344
1.42M
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1345
1.42M
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1346
1347
1.42M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
1348
1349
1.42M
                }
1350
1351
146k
                else if(1 == ps_sao->b3_cb_type_idx)
1352
71.2k
                {
1353
71.2k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1354
71.2k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1355
71.2k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1356
71.2k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1357
1358
71.2k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1359
71.2k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1360
71.2k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1361
71.2k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1362
1363
71.2k
                    if(chroma_yuv420sp_vu)
1364
10.4k
                    {
1365
10.4k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1366
10.4k
                                                                                    chroma_strd,
1367
10.4k
                                                                                    pu1_src_left_chroma,
1368
10.4k
                                                                                    pu1_src_top_chroma,
1369
10.4k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1370
10.4k
                                                                                    ps_sao->b5_cr_band_pos,
1371
10.4k
                                                                                    ps_sao->b5_cb_band_pos,
1372
10.4k
                                                                                    ai1_offset_cr,
1373
10.4k
                                                                                    ai1_offset_cb,
1374
10.4k
                                                                                    sao_wd_chroma,
1375
10.4k
                                                                                    sao_ht_chroma
1376
10.4k
                                                                                   );
1377
10.4k
                    }
1378
60.8k
                    else
1379
60.8k
                    {
1380
60.8k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1381
60.8k
                                                                                    chroma_strd,
1382
60.8k
                                                                                    pu1_src_left_chroma,
1383
60.8k
                                                                                    pu1_src_top_chroma,
1384
60.8k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1385
60.8k
                                                                                    ps_sao->b5_cb_band_pos,
1386
60.8k
                                                                                    ps_sao->b5_cr_band_pos,
1387
60.8k
                                                                                    ai1_offset_cb,
1388
60.8k
                                                                                    ai1_offset_cr,
1389
60.8k
                                                                                    sao_wd_chroma,
1390
60.8k
                                                                                    sao_ht_chroma
1391
60.8k
                                                                                   );
1392
60.8k
                    }
1393
71.2k
                }
1394
75.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1395
75.1k
                {
1396
75.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1397
75.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1398
75.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1399
75.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1400
1401
75.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1402
75.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1403
75.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1404
75.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1405
1406
678k
                    for(i = 0; i < 8; i++)
1407
602k
                    {
1408
602k
                        au1_avail_chroma[i] = 255;
1409
602k
                        au1_tile_slice_boundary[i] = 0;
1410
602k
                        au4_idx_t[i] = 0;
1411
602k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1412
602k
                    }
1413
1414
75.1k
                    {
1415
75.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1416
48.6k
                        {
1417
48.6k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1418
48.6k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1419
1420
48.6k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1421
48.6k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1422
1423
48.6k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1424
48.6k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1425
1426
48.6k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1427
48.6k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1428
1429
48.6k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1430
48.6k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1431
1432
48.6k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1433
4.83k
                            {
1434
4.83k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1435
741
                                {
1436
741
                                    au4_idx_t[0] = -1;
1437
741
                                    au4_idx_t[6] = -1;
1438
741
                                    au4_idx_t[4] = -1;
1439
741
                                }
1440
4.09k
                                else
1441
4.09k
                                {
1442
4.09k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1443
4.09k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1444
4.09k
                                }
1445
4.83k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1446
4.83k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1447
4.83k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1448
4.83k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1449
1450
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1451
1452
4.83k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1453
741
                                {
1454
741
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1455
741
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1456
741
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1457
741
                                }
1458
4.09k
                                else
1459
4.09k
                                {
1460
4.09k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1461
4.09k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1462
4.09k
                                }
1463
1464
4.83k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1465
4.83k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1466
4.83k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1467
4.83k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1468
4.83k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1469
1470
4.83k
                                if(idx_t > au4_idx_t[6])
1471
743
                                {
1472
743
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1473
743
                                }
1474
1475
                                /*
1476
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1477
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1478
                                 */
1479
43.5k
                                for(i = 0; i < 8; i++)
1480
38.6k
                                {
1481
                                    /*Sets the edges that lie on the slice/tile boundary*/
1482
38.6k
                                    if(au4_idx_t[i] != idx_t)
1483
8.18k
                                    {
1484
8.18k
                                        au1_tile_slice_boundary[i] = 1;
1485
8.18k
                                    }
1486
30.5k
                                    else
1487
30.5k
                                    {
1488
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1489
30.5k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1490
30.5k
                                    }
1491
38.6k
                                }
1492
                                /*Reset indices*/
1493
43.5k
                                for(i = 0; i < 8; i++)
1494
38.6k
                                {
1495
38.6k
                                    au4_idx_t[i] = 0;
1496
38.6k
                                }
1497
4.83k
                            }
1498
48.6k
                            if(ps_pps->i1_tiles_enabled_flag)
1499
43.9k
                            {
1500
                                /* Calculate availability flags at slice boundary */
1501
43.9k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1502
21.3k
                                {
1503
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1504
21.3k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1505
20.1k
                                    {
1506
                                        /*Calculate neighbor ctb slice indices*/
1507
20.1k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1508
3.30k
                                        {
1509
3.30k
                                            au4_idx_t[0] = -1;
1510
3.30k
                                            au4_idx_t[6] = -1;
1511
3.30k
                                            au4_idx_t[4] = -1;
1512
3.30k
                                        }
1513
16.8k
                                        else
1514
16.8k
                                        {
1515
16.8k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1516
16.8k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1517
16.8k
                                        }
1518
20.1k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1519
20.1k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1520
20.1k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1521
20.1k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1522
1523
181k
                                        for(i = 0; i < 8; i++)
1524
161k
                                        {
1525
                                            /*Sets the edges that lie on the tile boundary*/
1526
161k
                                            if(au4_idx_t[i] != idx_t)
1527
66.1k
                                            {
1528
66.1k
                                                au1_tile_slice_boundary[i] |= 1;
1529
66.1k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1530
66.1k
                                            }
1531
161k
                                        }
1532
20.1k
                                    }
1533
21.3k
                                }
1534
43.9k
                            }
1535
438k
                            for(i = 0; i < 8; i++)
1536
389k
                            {
1537
                                /*Sets the edges that lie on the slice/tile boundary*/
1538
389k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1539
68.1k
                                {
1540
68.1k
                                    au1_avail_chroma[i] = 0;
1541
68.1k
                                }
1542
389k
                            }
1543
1544
48.6k
                        }
1545
75.1k
                    }
1546
75.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1547
10.6k
                    {
1548
10.6k
                        au1_avail_chroma[0] = 0;
1549
10.6k
                        au1_avail_chroma[4] = 0;
1550
10.6k
                        au1_avail_chroma[6] = 0;
1551
10.6k
                    }
1552
1553
75.1k
                    if(ps_sps->i2_pic_width_in_luma_samples * (chroma_pixel_strd / h_samp_factor) - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor)) <= sao_wd_chroma)
1554
8.37k
                    {
1555
8.37k
                        au1_avail_chroma[1] = 0;
1556
8.37k
                        au1_avail_chroma[5] = 0;
1557
8.37k
                        au1_avail_chroma[7] = 0;
1558
8.37k
                    }
1559
1560
75.1k
                    if(0 == ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) / v_samp_factor) - sao_ht_chroma)
1561
74
                    {
1562
74
                        au1_avail_chroma[2] = 0;
1563
74
                        au1_avail_chroma[4] = 0;
1564
74
                        au1_avail_chroma[5] = 0;
1565
74
                    }
1566
1567
75.1k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1568
0
                    {
1569
0
                        au1_avail_chroma[3] = 0;
1570
0
                        au1_avail_chroma[6] = 0;
1571
0
                        au1_avail_chroma[7] = 0;
1572
0
                    }
1573
1574
75.1k
                    {
1575
75.1k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
1576
75.1k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
1577
75.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * chroma_strd - 2];
1578
75.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * chroma_strd - 1];
1579
1580
75.1k
                        if(chroma_yuv420sp_vu)
1581
11.5k
                        {
1582
11.5k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1583
11.5k
                                                                                 chroma_strd,
1584
11.5k
                                                                                 pu1_src_left_chroma,
1585
11.5k
                                                                                 pu1_src_top_chroma,
1586
11.5k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
1587
11.5k
                                                                                 au1_src_top_right,
1588
11.5k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
1589
11.5k
                                                                                 au1_avail_chroma,
1590
11.5k
                                                                                 ai1_offset_cr,
1591
11.5k
                                                                                 ai1_offset_cb,
1592
11.5k
                                                                                 sao_wd_chroma,
1593
11.5k
                                                                                 sao_ht_chroma);
1594
11.5k
                        }
1595
63.6k
                        else
1596
63.6k
                        {
1597
63.6k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1598
63.6k
                                                                                 chroma_strd,
1599
63.6k
                                                                                 pu1_src_left_chroma,
1600
63.6k
                                                                                 pu1_src_top_chroma,
1601
63.6k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
1602
63.6k
                                                                                 au1_src_top_right,
1603
63.6k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
1604
63.6k
                                                                                 au1_avail_chroma,
1605
63.6k
                                                                                 ai1_offset_cb,
1606
63.6k
                                                                                 ai1_offset_cr,
1607
63.6k
                                                                                 sao_wd_chroma,
1608
63.6k
                                                                                 sao_ht_chroma);
1609
63.6k
                        }
1610
75.1k
                    }
1611
1612
75.1k
                }
1613
1.57M
            }
1614
3.06M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1615
1.36M
            {
1616
12.2M
                for(row = 0; row < sao_ht_chroma; row++)
1617
10.8M
                {
1618
10.8M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
1619
10.8M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
1620
10.8M
                }
1621
1.36M
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1622
1.36M
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1623
1624
1.36M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
1625
1.36M
            }
1626
4.64M
        }
1627
1628
11.9M
        pu1_src_luma += sao_ht_luma * src_strd;
1629
11.9M
        pu1_src_chroma += sao_ht_chroma * chroma_strd;
1630
11.9M
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
1631
11.9M
    }
1632
1633
    /* Left CTB */
1634
12.5M
    if(ps_sao_ctxt->i4_ctb_x > 0)
1635
11.8M
    {
1636
11.8M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
1637
11.8M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
1638
11.8M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
1639
11.8M
        WORD32 sao_ht_chroma = (ctb_size / v_samp_factor) - SAO_SHIFT_CTB;
1640
1641
11.8M
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
1642
11.8M
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
1643
11.8M
        WORD32 au4_idx_l[8], idx_l;
1644
1645
11.8M
        WORD32 remaining_rows;
1646
11.8M
        slice_header_t *ps_slice_hdr_left;
1647
11.8M
        {
1648
11.8M
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
1649
11.8M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
1650
11.8M
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
1651
11.8M
        }
1652
1653
11.8M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
1654
11.8M
        if(remaining_rows <= SAO_SHIFT_CTB)
1655
480k
        {
1656
480k
            sao_ht_luma += remaining_rows;
1657
480k
        }
1658
11.8M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / v_samp_factor - (((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) / v_samp_factor) + sao_ht_chroma);
1659
11.8M
        if(remaining_rows <= SAO_SHIFT_CTB)
1660
480k
        {
1661
480k
            sao_ht_chroma += remaining_rows;
1662
480k
        }
1663
1664
11.8M
        pu1_src_luma -= sao_wd_luma;
1665
11.8M
        pu1_src_chroma -= sao_wd_chroma;
1666
11.8M
        ps_sao -= 1;
1667
11.8M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
1668
11.8M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor) - sao_wd_chroma;
1669
11.8M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
1670
11.8M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) * (chroma_pixel_strd / v_samp_factor);
1671
1672
1673
11.8M
        if(0 != sao_ht_luma)
1674
11.8M
        {
1675
11.8M
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
1676
4.62M
            {
1677
4.62M
                if(0 == ps_sao->b3_y_type_idx)
1678
4.42M
                {
1679
                    /* Update left, top and top-left */
1680
90.6M
                    for(row = 0; row < sao_ht_luma; row++)
1681
86.1M
                    {
1682
86.1M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1683
86.1M
                    }
1684
                    /*Update in next location*/
1685
4.42M
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1686
1687
4.42M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1688
1689
4.42M
                }
1690
1691
198k
                else if(1 == ps_sao->b3_y_type_idx)
1692
106k
                {
1693
106k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1694
106k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1695
106k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1696
106k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1697
1698
106k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1699
106k
                                                                              src_strd,
1700
106k
                                                                              pu1_src_left_luma,
1701
106k
                                                                              pu1_src_top_luma,
1702
106k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
1703
106k
                                                                              ps_sao->b5_y_band_pos,
1704
106k
                                                                              ai1_offset_y,
1705
106k
                                                                              sao_wd_luma,
1706
106k
                                                                              sao_ht_luma
1707
106k
                                                                             );
1708
106k
                }
1709
1710
91.9k
                else // if(2 <= ps_sao->b3_y_type_idx)
1711
91.9k
                {
1712
91.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1713
91.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1714
91.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1715
91.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1716
1717
833k
                    for(i = 0; i < 8; i++)
1718
741k
                    {
1719
741k
                        au1_avail_luma[i] = 255;
1720
741k
                        au1_tile_slice_boundary[i] = 0;
1721
741k
                        au4_idx_l[i] = 0;
1722
741k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1723
741k
                    }
1724
                    /******************************************************************
1725
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
1726
                     *
1727
                     *
1728
                     *          ____________
1729
                     *         |    |       |
1730
                     *         | L_T|       |
1731
                     *         |____|_______|____
1732
                     *         |    |       |    |
1733
                     *     L_L |  L |  L_R  |    |
1734
                     *         |____|_______|    |
1735
                     *              |            |
1736
                     *          L_D |            |
1737
                     *              |____________|
1738
                     *
1739
                     *****************************************************************/
1740
1741
                    /*In case of slices or tiles*/
1742
91.9k
                    {
1743
91.9k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1744
55.4k
                        {
1745
55.4k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
1746
55.4k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
1747
1748
55.4k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
1749
55.4k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
1750
1751
55.4k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
1752
55.4k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
1753
1754
55.4k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
1755
55.4k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
1756
1757
55.4k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
1758
55.4k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
1759
1760
55.4k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1761
7.13k
                            {
1762
7.13k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
1763
332
                                {
1764
332
                                    au4_idx_l[2] = -1;
1765
332
                                    au4_idx_l[4] = -1;
1766
332
                                    au4_idx_l[5] = -1;
1767
332
                                }
1768
6.80k
                                else
1769
6.80k
                                {
1770
6.80k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
1771
6.80k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
1772
6.80k
                                }
1773
7.13k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
1774
7.13k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
1775
7.13k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
1776
7.13k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
1777
1778
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1779
7.13k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
1780
332
                                {
1781
332
                                    au4_ilf_across_tile_slice_enable[2] = 0;
1782
332
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1783
332
                                    au4_ilf_across_tile_slice_enable[5] = 0;
1784
332
                                }
1785
6.80k
                                else
1786
6.80k
                                {
1787
6.80k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
1788
6.80k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
1789
1790
6.80k
                                }
1791
                                //TODO: ILF flag checks for [0] and [6] is missing.
1792
7.13k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1793
7.13k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1794
7.13k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1795
1796
7.13k
                                if(idx_l < au4_idx_l[5])
1797
122
                                {
1798
122
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1799
122
                                }
1800
1801
                                /*
1802
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1803
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1804
                                 */
1805
64.2k
                                for(i = 0; i < 8; i++)
1806
57.0k
                                {
1807
                                    /*Sets the edges that lie on the slice/tile boundary*/
1808
57.0k
                                    if(au4_idx_l[i] != idx_l)
1809
16.9k
                                    {
1810
16.9k
                                        au1_tile_slice_boundary[i] = 1;
1811
16.9k
                                    }
1812
40.1k
                                    else
1813
40.1k
                                    {
1814
40.1k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1815
40.1k
                                    }
1816
57.0k
                                }
1817
                                /*Reset indices*/
1818
64.2k
                                for(i = 0; i < 8; i++)
1819
57.0k
                                {
1820
57.0k
                                    au4_idx_l[i] = 0;
1821
57.0k
                                }
1822
7.13k
                            }
1823
1824
55.4k
                            if(ps_pps->i1_tiles_enabled_flag)
1825
48.4k
                            {
1826
                                /* Calculate availability flags at slice boundary */
1827
48.4k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1828
23.2k
                                {
1829
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1830
23.2k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1831
21.9k
                                    {
1832
21.9k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
1833
3.14k
                                        {
1834
3.14k
                                            au4_idx_l[2] = -1;
1835
3.14k
                                            au4_idx_l[4] = -1;
1836
3.14k
                                            au4_idx_l[5] = -1;
1837
3.14k
                                        }
1838
18.7k
                                        else
1839
18.7k
                                        {
1840
18.7k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
1841
18.7k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
1842
18.7k
                                        }
1843
1844
21.9k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
1845
21.9k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
1846
21.9k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
1847
21.9k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
1848
1849
197k
                                        for(i = 0; i < 8; i++)
1850
175k
                                        {
1851
                                            /*Sets the edges that lie on the slice/tile boundary*/
1852
175k
                                            if(au4_idx_l[i] != idx_l)
1853
69.1k
                                            {
1854
69.1k
                                                au1_tile_slice_boundary[i] |= 1;
1855
69.1k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1856
69.1k
                                            }
1857
175k
                                        }
1858
21.9k
                                    }
1859
23.2k
                                }
1860
48.4k
                            }
1861
1862
498k
                            for(i = 0; i < 8; i++)
1863
443k
                            {
1864
                                /*Sets the edges that lie on the slice/tile boundary*/
1865
443k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1866
70.1k
                                {
1867
70.1k
                                    au1_avail_luma[i] = 0;
1868
70.1k
                                }
1869
443k
                            }
1870
55.4k
                        }
1871
91.9k
                    }
1872
91.9k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1873
0
                    {
1874
0
                        au1_avail_luma[0] = 0;
1875
0
                        au1_avail_luma[4] = 0;
1876
0
                        au1_avail_luma[6] = 0;
1877
0
                    }
1878
91.9k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1879
0
                    {
1880
0
                        au1_avail_luma[1] = 0;
1881
0
                        au1_avail_luma[5] = 0;
1882
0
                        au1_avail_luma[7] = 0;
1883
0
                    }
1884
1885
91.9k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
1886
17.8k
                    {
1887
17.8k
                        au1_avail_luma[2] = 0;
1888
17.8k
                        au1_avail_luma[4] = 0;
1889
17.8k
                        au1_avail_luma[5] = 0;
1890
17.8k
                    }
1891
1892
91.9k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
1893
9.60k
                    {
1894
9.60k
                        au1_avail_luma[3] = 0;
1895
9.60k
                        au1_avail_luma[6] = 0;
1896
9.60k
                        au1_avail_luma[7] = 0;
1897
9.60k
                    }
1898
1899
91.9k
                    {
1900
91.9k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1901
91.9k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
1902
91.9k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1903
91.9k
                                                                          src_strd,
1904
91.9k
                                                                          pu1_src_left_luma,
1905
91.9k
                                                                          pu1_src_top_luma,
1906
91.9k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
1907
91.9k
                                                                          au1_src_top_right,
1908
91.9k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1909
91.9k
                                                                          au1_avail_luma,
1910
91.9k
                                                                          ai1_offset_y,
1911
91.9k
                                                                          sao_wd_luma,
1912
91.9k
                                                                          sao_ht_luma);
1913
91.9k
                    }
1914
1915
91.9k
                }
1916
4.62M
            }
1917
7.22M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1918
3.57M
            {
1919
                /* Update left, top and top-left */
1920
56.3M
                for(row = 0; row < sao_ht_luma; row++)
1921
52.7M
                {
1922
52.7M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1923
52.7M
                }
1924
                /*Update in next location*/
1925
3.57M
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1926
1927
3.57M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1928
3.57M
            }
1929
11.8M
        }
1930
1931
11.8M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_ht_chroma)
1932
4.56M
        {
1933
4.56M
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
1934
1.75M
            {
1935
1.75M
                if(0 == ps_sao->b3_cb_type_idx)
1936
1.60M
                {
1937
28.3M
                    for(row = 0; row < sao_ht_chroma; row++)
1938
26.7M
                    {
1939
26.7M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
1940
26.7M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
1941
26.7M
                    }
1942
1.60M
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1943
1.60M
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1944
1945
1.60M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
1946
1.60M
                }
1947
1948
148k
                else if(1 == ps_sao->b3_cb_type_idx)
1949
74.1k
                {
1950
74.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1951
74.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1952
74.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1953
74.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1954
1955
74.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1956
74.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1957
74.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1958
74.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1959
1960
74.1k
                    if(chroma_yuv420sp_vu)
1961
11.0k
                    {
1962
11.0k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1963
11.0k
                                                                                    chroma_strd,
1964
11.0k
                                                                                    pu1_src_left_chroma,
1965
11.0k
                                                                                    pu1_src_top_chroma,
1966
11.0k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
1967
11.0k
                                                                                    ps_sao->b5_cr_band_pos,
1968
11.0k
                                                                                    ps_sao->b5_cb_band_pos,
1969
11.0k
                                                                                    ai1_offset_cr,
1970
11.0k
                                                                                    ai1_offset_cb,
1971
11.0k
                                                                                    sao_wd_chroma,
1972
11.0k
                                                                                    sao_ht_chroma
1973
11.0k
                                                                                   );
1974
11.0k
                    }
1975
63.1k
                    else
1976
63.1k
                    {
1977
63.1k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1978
63.1k
                                                                                    chroma_strd,
1979
63.1k
                                                                                    pu1_src_left_chroma,
1980
63.1k
                                                                                    pu1_src_top_chroma,
1981
63.1k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
1982
63.1k
                                                                                    ps_sao->b5_cb_band_pos,
1983
63.1k
                                                                                    ps_sao->b5_cr_band_pos,
1984
63.1k
                                                                                    ai1_offset_cb,
1985
63.1k
                                                                                    ai1_offset_cr,
1986
63.1k
                                                                                    sao_wd_chroma,
1987
63.1k
                                                                                    sao_ht_chroma
1988
63.1k
                                                                                   );
1989
63.1k
                    }
1990
74.1k
                }
1991
1992
74.6k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1993
74.6k
                {
1994
74.6k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1995
74.6k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1996
74.6k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1997
74.6k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1998
1999
74.6k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2000
74.6k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2001
74.6k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2002
74.6k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2003
2004
672k
                    for(i = 0; i < 8; i++)
2005
597k
                    {
2006
597k
                        au1_avail_chroma[i] = 255;
2007
597k
                        au1_tile_slice_boundary[i] = 0;
2008
597k
                        au4_idx_l[i] = 0;
2009
597k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2010
597k
                    }
2011
                    /*In case of slices*/
2012
74.6k
                    {
2013
74.6k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2014
46.8k
                        {
2015
46.8k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2016
46.8k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2017
2018
46.8k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2019
46.8k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2020
2021
46.8k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2022
46.8k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2023
2024
46.8k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2025
46.8k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2026
2027
46.8k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2028
46.8k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2029
2030
46.8k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2031
4.36k
                            {
2032
4.36k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2033
260
                                {
2034
260
                                    au4_idx_l[2] = -1;
2035
260
                                    au4_idx_l[4] = -1;
2036
260
                                    au4_idx_l[5] = -1;
2037
260
                                }
2038
4.10k
                                else
2039
4.10k
                                {
2040
4.10k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2041
4.10k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2042
4.10k
                                }
2043
4.36k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2044
4.36k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2045
4.36k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2046
4.36k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2047
2048
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2049
4.36k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2050
260
                                {
2051
260
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2052
260
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2053
260
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2054
260
                                }
2055
4.10k
                                else
2056
4.10k
                                {
2057
4.10k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2058
4.10k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2059
4.10k
                                }
2060
2061
4.36k
                                if(au4_idx_l[5] > idx_l)
2062
122
                                {
2063
122
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2064
122
                                }
2065
2066
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2067
4.36k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2068
4.36k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2069
4.36k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2070
                                /*
2071
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2072
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2073
                                 */
2074
39.3k
                                for(i = 0; i < 8; i++)
2075
34.9k
                                {
2076
                                    /*Sets the edges that lie on the slice/tile boundary*/
2077
34.9k
                                    if(au4_idx_l[i] != idx_l)
2078
8.04k
                                    {
2079
8.04k
                                        au1_tile_slice_boundary[i] = 1;
2080
8.04k
                                    }
2081
26.8k
                                    else
2082
26.8k
                                    {
2083
26.8k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2084
26.8k
                                    }
2085
34.9k
                                }
2086
                                /*Reset indices*/
2087
39.3k
                                for(i = 0; i < 8; i++)
2088
34.9k
                                {
2089
34.9k
                                    au4_idx_l[i] = 0;
2090
34.9k
                                }
2091
4.36k
                            }
2092
46.8k
                            if(ps_pps->i1_tiles_enabled_flag)
2093
42.6k
                            {
2094
                                /* Calculate availability flags at slice boundary */
2095
42.6k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2096
19.6k
                                {
2097
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2098
19.6k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2099
18.6k
                                    {
2100
18.6k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2101
5.01k
                                        {
2102
5.01k
                                            au4_idx_l[2] = -1;
2103
5.01k
                                            au4_idx_l[4] = -1;
2104
5.01k
                                            au4_idx_l[5] = -1;
2105
5.01k
                                        }
2106
13.6k
                                        else
2107
13.6k
                                        {
2108
13.6k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2109
13.6k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2110
13.6k
                                        }
2111
2112
18.6k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2113
18.6k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2114
18.6k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2115
18.6k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2116
2117
167k
                                        for(i = 0; i < 8; i++)
2118
149k
                                        {
2119
                                            /*Sets the edges that lie on the slice/tile boundary*/
2120
149k
                                            if(au4_idx_l[i] != idx_l)
2121
58.8k
                                            {
2122
58.8k
                                                au1_tile_slice_boundary[i] |= 1;
2123
58.8k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2124
58.8k
                                            }
2125
149k
                                        }
2126
18.6k
                                    }
2127
19.6k
                                }
2128
42.6k
                            }
2129
421k
                            for(i = 0; i < 8; i++)
2130
374k
                            {
2131
                                /*Sets the edges that lie on the slice/tile boundary*/
2132
374k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2133
59.8k
                                {
2134
59.8k
                                    au1_avail_chroma[i] = 0;
2135
59.8k
                                }
2136
374k
                            }
2137
46.8k
                        }
2138
74.6k
                    }
2139
74.6k
                    if(0 == ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor)) - sao_wd_chroma)
2140
264
                    {
2141
264
                        au1_avail_chroma[0] = 0;
2142
264
                        au1_avail_chroma[4] = 0;
2143
264
                        au1_avail_chroma[6] = 0;
2144
264
                    }
2145
2146
74.6k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2147
0
                    {
2148
0
                        au1_avail_chroma[1] = 0;
2149
0
                        au1_avail_chroma[5] = 0;
2150
0
                        au1_avail_chroma[7] = 0;
2151
0
                    }
2152
2153
74.6k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2154
16.7k
                    {
2155
16.7k
                        au1_avail_chroma[2] = 0;
2156
16.7k
                        au1_avail_chroma[4] = 0;
2157
16.7k
                        au1_avail_chroma[5] = 0;
2158
16.7k
                    }
2159
2160
74.6k
                    if((ps_sps->i2_pic_height_in_luma_samples / v_samp_factor) - ((ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) / v_samp_factor) <= sao_ht_chroma)
2161
7.69k
                    {
2162
7.69k
                        au1_avail_chroma[3] = 0;
2163
7.69k
                        au1_avail_chroma[6] = 0;
2164
7.69k
                        au1_avail_chroma[7] = 0;
2165
7.69k
                    }
2166
2167
74.6k
                    {
2168
74.6k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2169
74.6k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2170
74.6k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2171
74.6k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2172
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2173
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2174
74.6k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2175
199
                        {
2176
199
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - chroma_strd];
2177
199
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - chroma_strd + 1];
2178
199
                        }
2179
2180
2181
74.6k
                        if(chroma_yuv420sp_vu)
2182
12.1k
                        {
2183
12.1k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2184
12.1k
                                                                                 chroma_strd,
2185
12.1k
                                                                                 pu1_src_left_chroma,
2186
12.1k
                                                                                 pu1_src_top_chroma,
2187
12.1k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2188
12.1k
                                                                                 au1_src_top_right,
2189
12.1k
                                                                                 au1_src_bot_left,
2190
12.1k
                                                                                 au1_avail_chroma,
2191
12.1k
                                                                                 ai1_offset_cr,
2192
12.1k
                                                                                 ai1_offset_cb,
2193
12.1k
                                                                                 sao_wd_chroma,
2194
12.1k
                                                                                 sao_ht_chroma);
2195
12.1k
                        }
2196
62.5k
                        else
2197
62.5k
                        {
2198
62.5k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2199
62.5k
                                                                                 chroma_strd,
2200
62.5k
                                                                                 pu1_src_left_chroma,
2201
62.5k
                                                                                 pu1_src_top_chroma,
2202
62.5k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2203
62.5k
                                                                                 au1_src_top_right,
2204
62.5k
                                                                                 au1_src_bot_left,
2205
62.5k
                                                                                 au1_avail_chroma,
2206
62.5k
                                                                                 ai1_offset_cb,
2207
62.5k
                                                                                 ai1_offset_cr,
2208
62.5k
                                                                                 sao_wd_chroma,
2209
62.5k
                                                                                 sao_ht_chroma);
2210
62.5k
                        }
2211
74.6k
                    }
2212
2213
74.6k
                }
2214
1.75M
            }
2215
2.81M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2216
1.38M
            {
2217
22.5M
                for(row = 0; row < sao_ht_chroma; row++)
2218
21.1M
                {
2219
21.1M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
2220
21.1M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
2221
21.1M
                }
2222
1.38M
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2223
1.38M
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2224
2225
1.38M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
2226
1.38M
            }
2227
2228
4.56M
        }
2229
11.8M
        pu1_src_luma += sao_wd_luma;
2230
11.8M
        pu1_src_chroma += sao_wd_chroma;
2231
11.8M
        ps_sao += 1;
2232
11.8M
    }
2233
2234
2235
    /* Current CTB */
2236
12.5M
    {
2237
12.5M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2238
12.5M
        WORD32 sao_wd_chroma = ctb_size * (chroma_pixel_strd / h_samp_factor) - SAO_SHIFT_CTB * 2;
2239
12.5M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2240
12.5M
        WORD32 sao_ht_chroma = (ctb_size / v_samp_factor) - SAO_SHIFT_CTB;
2241
12.5M
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2242
12.5M
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2243
12.5M
        WORD32 au4_idx_c[8], idx_c;
2244
12.5M
        WORD32 chroma_strd = is_yuv444 ? src_strd * 2 : src_strd;
2245
2246
12.5M
        WORD32 remaining_rows;
2247
12.5M
        WORD32 remaining_cols;
2248
2249
12.5M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2250
12.5M
        if(remaining_cols <= SAO_SHIFT_CTB)
2251
587k
        {
2252
587k
            sao_wd_luma += remaining_cols;
2253
587k
        }
2254
12.5M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples * (chroma_pixel_strd / h_samp_factor) - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor) + sao_wd_chroma);
2255
12.5M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2256
587k
        {
2257
587k
            sao_wd_chroma += remaining_cols;
2258
587k
        }
2259
2260
12.5M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2261
12.5M
        if(remaining_rows <= SAO_SHIFT_CTB)
2262
512k
        {
2263
512k
            sao_ht_luma += remaining_rows;
2264
512k
        }
2265
12.5M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / v_samp_factor - (((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) / v_samp_factor) + sao_ht_chroma);
2266
12.5M
        if(remaining_rows <= SAO_SHIFT_CTB)
2267
512k
        {
2268
512k
            sao_ht_chroma += remaining_rows;
2269
512k
        }
2270
2271
12.5M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2272
12.5M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor);
2273
12.5M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2274
12.5M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) * (chroma_pixel_strd / v_samp_factor);
2275
2276
12.5M
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2277
12.4M
        {
2278
12.4M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2279
4.75M
            {
2280
4.75M
                if(0 == ps_sao->b3_y_type_idx)
2281
4.53M
                {
2282
                    /* Update left, top and top-left */
2283
95.4M
                    for(row = 0; row < sao_ht_luma; row++)
2284
90.8M
                    {
2285
90.8M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2286
90.8M
                    }
2287
4.53M
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2288
2289
4.53M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2290
2291
4.53M
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2292
2293
4.53M
                }
2294
2295
220k
                else if(1 == ps_sao->b3_y_type_idx)
2296
116k
                {
2297
116k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2298
116k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2299
116k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2300
116k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2301
2302
116k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2303
116k
                                                                              src_strd,
2304
116k
                                                                              pu1_src_left_luma,
2305
116k
                                                                              pu1_src_top_luma,
2306
116k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2307
116k
                                                                              ps_sao->b5_y_band_pos,
2308
116k
                                                                              ai1_offset_y,
2309
116k
                                                                              sao_wd_luma,
2310
116k
                                                                              sao_ht_luma
2311
116k
                                                                             );
2312
116k
                }
2313
2314
103k
                else // if(2 <= ps_sao->b3_y_type_idx)
2315
103k
                {
2316
103k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2317
103k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2318
103k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2319
103k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2320
2321
925k
                    for(i = 0; i < 8; i++)
2322
821k
                    {
2323
821k
                        au1_avail_luma[i] = 255;
2324
821k
                        au1_tile_slice_boundary[i] = 0;
2325
821k
                        au4_idx_c[i] = 0;
2326
821k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2327
821k
                    }
2328
                    /******************************************************************
2329
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2330
                     *
2331
                     *
2332
                     *          ____________
2333
                     *         |    |       |
2334
                     *         |    | C_T   |
2335
                     *         |____|_______|____
2336
                     *         |    |       |    |
2337
                     *         | C_L|   C   | C_R|
2338
                     *         |____|_______|    |
2339
                     *              |  C_D       |
2340
                     *              |            |
2341
                     *              |____________|
2342
                     *
2343
                     *****************************************************************/
2344
2345
                    /*In case of slices*/
2346
103k
                    {
2347
103k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2348
59.6k
                        {
2349
59.6k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2350
59.6k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2351
2352
59.6k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2353
59.6k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2354
2355
59.6k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2356
59.6k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2357
2358
59.6k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2359
59.6k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2360
2361
59.6k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2362
59.6k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2363
2364
59.6k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2365
7.26k
                            {
2366
7.26k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2367
680
                                {
2368
680
                                    au4_idx_c[6] = -1;
2369
680
                                    au4_idx_c[0] = -1;
2370
680
                                    au4_idx_c[4] = -1;
2371
680
                                }
2372
6.58k
                                else
2373
6.58k
                                {
2374
6.58k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2375
6.58k
                                }
2376
2377
7.26k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2378
281
                                {
2379
281
                                    au4_idx_c[2] = -1;
2380
281
                                    au4_idx_c[5] = -1;
2381
281
                                    au4_idx_c[4] = -1;
2382
281
                                }
2383
6.98k
                                else
2384
6.98k
                                {
2385
6.98k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2386
6.98k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2387
6.98k
                                }
2388
7.26k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2389
7.26k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2390
7.26k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2391
2392
7.26k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2393
680
                                {
2394
680
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2395
680
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2396
680
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2397
680
                                }
2398
6.58k
                                else
2399
6.58k
                                {
2400
6.58k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2401
6.58k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2402
6.58k
                                }
2403
7.26k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2404
281
                                {
2405
281
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2406
281
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2407
281
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2408
281
                                }
2409
6.98k
                                else
2410
6.98k
                                {
2411
6.98k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2412
6.98k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2413
6.98k
                                }
2414
7.26k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2415
7.26k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2416
7.26k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2417
2418
7.26k
                                if(au4_idx_c[6] < idx_c)
2419
1.36k
                                {
2420
1.36k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2421
1.36k
                                }
2422
2423
                                /*
2424
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2425
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2426
                                 */
2427
65.3k
                                for(i = 0; i < 8; i++)
2428
58.0k
                                {
2429
                                    /*Sets the edges that lie on the slice/tile boundary*/
2430
58.0k
                                    if(au4_idx_c[i] != idx_c)
2431
18.2k
                                    {
2432
18.2k
                                        au1_tile_slice_boundary[i] = 1;
2433
18.2k
                                    }
2434
39.8k
                                    else
2435
39.8k
                                    {
2436
39.8k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2437
39.8k
                                    }
2438
58.0k
                                }
2439
                                /*Reset indices*/
2440
65.3k
                                for(i = 0; i < 8; i++)
2441
58.0k
                                {
2442
58.0k
                                    au4_idx_c[i] = 0;
2443
58.0k
                                }
2444
7.26k
                            }
2445
2446
59.6k
                            if(ps_pps->i1_tiles_enabled_flag)
2447
52.4k
                            {
2448
                                /* Calculate availability flags at slice boundary */
2449
52.4k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2450
25.1k
                                {
2451
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2452
25.1k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2453
23.6k
                                    {
2454
23.6k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2455
3.32k
                                        {
2456
3.32k
                                            au4_idx_c[6] = -1;
2457
3.32k
                                            au4_idx_c[0] = -1;
2458
3.32k
                                            au4_idx_c[4] = -1;
2459
3.32k
                                        }
2460
20.3k
                                        else
2461
20.3k
                                        {
2462
20.3k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2463
20.3k
                                        }
2464
2465
23.6k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2466
2.88k
                                        {
2467
2.88k
                                            au4_idx_c[2] = -1;
2468
2.88k
                                            au4_idx_c[5] = -1;
2469
2.88k
                                            au4_idx_c[4] = -1;
2470
2.88k
                                        }
2471
20.7k
                                        else
2472
20.7k
                                        {
2473
20.7k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2474
20.7k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2475
20.7k
                                        }
2476
23.6k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2477
23.6k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2478
23.6k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2479
2480
212k
                                        for(i = 0; i < 8; i++)
2481
189k
                                        {
2482
                                            /*Sets the edges that lie on the slice/tile boundary*/
2483
189k
                                            if(au4_idx_c[i] != idx_c)
2484
77.6k
                                            {
2485
77.6k
                                                au1_tile_slice_boundary[i] |= 1;
2486
77.6k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2487
77.6k
                                            }
2488
189k
                                        }
2489
23.6k
                                    }
2490
25.1k
                                }
2491
52.4k
                            }
2492
2493
537k
                            for(i = 0; i < 8; i++)
2494
477k
                            {
2495
                                /*Sets the edges that lie on the slice/tile boundary*/
2496
477k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2497
79.1k
                                {
2498
79.1k
                                    au1_avail_luma[i] = 0;
2499
79.1k
                                }
2500
477k
                            }
2501
2502
59.6k
                        }
2503
103k
                    }
2504
103k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2505
14.7k
                    {
2506
14.7k
                        au1_avail_luma[0] = 0;
2507
14.7k
                        au1_avail_luma[4] = 0;
2508
14.7k
                        au1_avail_luma[6] = 0;
2509
14.7k
                    }
2510
2511
103k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2512
10.0k
                    {
2513
10.0k
                        au1_avail_luma[1] = 0;
2514
10.0k
                        au1_avail_luma[5] = 0;
2515
10.0k
                        au1_avail_luma[7] = 0;
2516
10.0k
                    }
2517
2518
103k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2519
19.8k
                    {
2520
19.8k
                        au1_avail_luma[2] = 0;
2521
19.8k
                        au1_avail_luma[4] = 0;
2522
19.8k
                        au1_avail_luma[5] = 0;
2523
19.8k
                    }
2524
2525
103k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2526
10.6k
                    {
2527
10.6k
                        au1_avail_luma[3] = 0;
2528
10.6k
                        au1_avail_luma[6] = 0;
2529
10.6k
                        au1_avail_luma[7] = 0;
2530
10.6k
                    }
2531
2532
103k
                    {
2533
103k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2534
103k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2535
2536
103k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2537
103k
                                                                          src_strd,
2538
103k
                                                                          pu1_src_left_luma,
2539
103k
                                                                          pu1_src_top_luma,
2540
103k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2541
103k
                                                                          au1_src_top_right,
2542
103k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2543
103k
                                                                          au1_avail_luma,
2544
103k
                                                                          ai1_offset_y,
2545
103k
                                                                          sao_wd_luma,
2546
103k
                                                                          sao_ht_luma);
2547
103k
                    }
2548
103k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2549
103k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2550
103k
                }
2551
4.75M
            }
2552
7.66M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2553
3.63M
            {
2554
                /* Update left, top and top-left */
2555
58.3M
                for(row = 0; row < sao_ht_luma; row++)
2556
54.7M
                {
2557
54.7M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2558
54.7M
                }
2559
3.63M
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2560
2561
3.63M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2562
2563
3.63M
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2564
3.63M
            }
2565
12.4M
        }
2566
2567
12.5M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && (0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2568
4.85M
        {
2569
4.85M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
2570
1.78M
            {
2571
1.78M
                if(0 == ps_sao->b3_cb_type_idx)
2572
1.62M
                {
2573
29.3M
                    for(row = 0; row < sao_ht_chroma; row++)
2574
27.7M
                    {
2575
27.7M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
2576
27.7M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
2577
27.7M
                    }
2578
1.62M
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2579
1.62M
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2580
2581
1.62M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
2582
2583
1.62M
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd + sao_wd_chroma];
2584
1.62M
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd + sao_wd_chroma + 1];
2585
1.62M
                }
2586
2587
164k
                else if(1 == ps_sao->b3_cb_type_idx)
2588
80.3k
                {
2589
80.3k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2590
80.3k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2591
80.3k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2592
80.3k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2593
2594
80.3k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2595
80.3k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2596
80.3k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2597
80.3k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2598
2599
80.3k
                    if(chroma_yuv420sp_vu)
2600
12.1k
                    {
2601
12.1k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2602
12.1k
                                                                                    chroma_strd,
2603
12.1k
                                                                                    pu1_src_left_chroma,
2604
12.1k
                                                                                    pu1_src_top_chroma,
2605
12.1k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2606
12.1k
                                                                                    ps_sao->b5_cr_band_pos,
2607
12.1k
                                                                                    ps_sao->b5_cb_band_pos,
2608
12.1k
                                                                                    ai1_offset_cr,
2609
12.1k
                                                                                    ai1_offset_cb,
2610
12.1k
                                                                                    sao_wd_chroma,
2611
12.1k
                                                                                    sao_ht_chroma
2612
12.1k
                                                                                   );
2613
12.1k
                    }
2614
68.2k
                    else
2615
68.2k
                    {
2616
68.2k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2617
68.2k
                                                                                    chroma_strd,
2618
68.2k
                                                                                    pu1_src_left_chroma,
2619
68.2k
                                                                                    pu1_src_top_chroma,
2620
68.2k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2621
68.2k
                                                                                    ps_sao->b5_cb_band_pos,
2622
68.2k
                                                                                    ps_sao->b5_cr_band_pos,
2623
68.2k
                                                                                    ai1_offset_cb,
2624
68.2k
                                                                                    ai1_offset_cr,
2625
68.2k
                                                                                    sao_wd_chroma,
2626
68.2k
                                                                                    sao_ht_chroma
2627
68.2k
                                                                                   );
2628
68.2k
                    }
2629
80.3k
                }
2630
2631
84.3k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2632
84.3k
                {
2633
84.3k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2634
84.3k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2635
84.3k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2636
84.3k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2637
2638
84.3k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2639
84.3k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2640
84.3k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2641
84.3k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2642
2643
760k
                    for(i = 0; i < 8; i++)
2644
675k
                    {
2645
675k
                        au1_avail_chroma[i] = 255;
2646
675k
                        au1_tile_slice_boundary[i] = 0;
2647
675k
                        au4_idx_c[i] = 0;
2648
675k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2649
675k
                    }
2650
84.3k
                    {
2651
84.3k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2652
51.8k
                        {
2653
51.8k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2654
51.8k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2655
2656
51.8k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2657
51.8k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2658
2659
51.8k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2660
51.8k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2661
2662
51.8k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2663
51.8k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2664
2665
51.8k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2666
51.8k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2667
2668
51.8k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2669
4.79k
                            {
2670
4.79k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2671
644
                                {
2672
644
                                    au4_idx_c[0] = -1;
2673
644
                                    au4_idx_c[4] = -1;
2674
644
                                    au4_idx_c[6] = -1;
2675
644
                                }
2676
4.15k
                                else
2677
4.15k
                                {
2678
4.15k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2679
4.15k
                                }
2680
2681
4.79k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2682
212
                                {
2683
212
                                    au4_idx_c[2] = -1;
2684
212
                                    au4_idx_c[4] = -1;
2685
212
                                    au4_idx_c[5] = -1;
2686
212
                                }
2687
4.58k
                                else
2688
4.58k
                                {
2689
4.58k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2690
4.58k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2691
4.58k
                                }
2692
4.79k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2693
4.79k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2694
4.79k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2695
2696
4.79k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2697
644
                                {
2698
644
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2699
644
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2700
644
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2701
644
                                }
2702
4.15k
                                else
2703
4.15k
                                {
2704
4.15k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2705
4.15k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2706
4.15k
                                }
2707
2708
4.79k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2709
212
                                {
2710
212
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2711
212
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2712
212
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2713
212
                                }
2714
4.58k
                                else
2715
4.58k
                                {
2716
4.58k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2717
4.58k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2718
4.58k
                                }
2719
2720
4.79k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2721
4.79k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2722
4.79k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2723
2724
4.79k
                                if(idx_c > au4_idx_c[6])
2725
972
                                {
2726
972
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2727
972
                                }
2728
2729
                                /*
2730
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2731
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2732
                                 */
2733
43.1k
                                for(i = 0; i < 8; i++)
2734
38.3k
                                {
2735
                                    /*Sets the edges that lie on the slice/tile boundary*/
2736
38.3k
                                    if(au4_idx_c[i] != idx_c)
2737
9.42k
                                    {
2738
9.42k
                                        au1_tile_slice_boundary[i] = 1;
2739
9.42k
                                    }
2740
28.8k
                                    else
2741
28.8k
                                    {
2742
28.8k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2743
28.8k
                                    }
2744
38.3k
                                }
2745
                                /*Reset indices*/
2746
43.1k
                                for(i = 0; i < 8; i++)
2747
38.3k
                                {
2748
38.3k
                                    au4_idx_c[i] = 0;
2749
38.3k
                                }
2750
4.79k
                            }
2751
2752
51.8k
                            if(ps_pps->i1_tiles_enabled_flag)
2753
47.1k
                            {
2754
                                /* Calculate availability flags at slice boundary */
2755
47.1k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2756
21.6k
                                {
2757
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2758
21.6k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2759
20.6k
                                    {
2760
20.6k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2761
2.61k
                                        {
2762
2.61k
                                            au4_idx_c[6] = -1;
2763
2.61k
                                            au4_idx_c[0] = -1;
2764
2.61k
                                            au4_idx_c[4] = -1;
2765
2.61k
                                        }
2766
18.0k
                                        else
2767
18.0k
                                        {
2768
18.0k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2769
18.0k
                                        }
2770
2771
20.6k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2772
4.83k
                                        {
2773
4.83k
                                            au4_idx_c[2] = -1;
2774
4.83k
                                            au4_idx_c[5] = -1;
2775
4.83k
                                            au4_idx_c[4] = -1;
2776
4.83k
                                        }
2777
15.7k
                                        else
2778
15.7k
                                        {
2779
15.7k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2780
15.7k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2781
15.7k
                                        }
2782
20.6k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2783
20.6k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2784
20.6k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2785
2786
185k
                                        for(i = 0; i < 8; i++)
2787
164k
                                        {
2788
                                            /*Sets the edges that lie on the slice/tile boundary*/
2789
164k
                                            if(au4_idx_c[i] != idx_c)
2790
65.4k
                                            {
2791
65.4k
                                                au1_tile_slice_boundary[i] |= 1;
2792
65.4k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2793
65.4k
                                            }
2794
164k
                                        }
2795
20.6k
                                    }
2796
21.6k
                                }
2797
47.1k
                            }
2798
2799
466k
                            for(i = 0; i < 8; i++)
2800
415k
                            {
2801
                                /*Sets the edges that lie on the slice/tile boundary*/
2802
415k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2803
67.3k
                                {
2804
67.3k
                                    au1_avail_chroma[i] = 0;
2805
67.3k
                                }
2806
415k
                            }
2807
51.8k
                        }
2808
84.3k
                    }
2809
2810
84.3k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2811
12.3k
                    {
2812
12.3k
                        au1_avail_chroma[0] = 0;
2813
12.3k
                        au1_avail_chroma[4] = 0;
2814
12.3k
                        au1_avail_chroma[6] = 0;
2815
12.3k
                    }
2816
2817
84.3k
                    if(ps_sps->i2_pic_width_in_luma_samples * (chroma_pixel_strd / h_samp_factor) - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) * (chroma_pixel_strd / h_samp_factor)) <= sao_wd_chroma)
2818
10.1k
                    {
2819
10.1k
                        au1_avail_chroma[1] = 0;
2820
10.1k
                        au1_avail_chroma[5] = 0;
2821
10.1k
                        au1_avail_chroma[7] = 0;
2822
10.1k
                    }
2823
2824
84.3k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2825
18.8k
                    {
2826
18.8k
                        au1_avail_chroma[2] = 0;
2827
18.8k
                        au1_avail_chroma[4] = 0;
2828
18.8k
                        au1_avail_chroma[5] = 0;
2829
18.8k
                    }
2830
2831
84.3k
                    if(ps_sps->i2_pic_height_in_luma_samples / v_samp_factor - ((ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) / v_samp_factor) <= sao_ht_chroma)
2832
9.23k
                    {
2833
9.23k
                        au1_avail_chroma[3] = 0;
2834
9.23k
                        au1_avail_chroma[6] = 0;
2835
9.23k
                        au1_avail_chroma[7] = 0;
2836
9.23k
                    }
2837
2838
84.3k
                    {
2839
84.3k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - chroma_strd];
2840
84.3k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - chroma_strd + 1];
2841
2842
84.3k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * chroma_strd - 2];
2843
84.3k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * chroma_strd - 1];
2844
2845
84.3k
                        if(chroma_yuv420sp_vu)
2846
13.7k
                        {
2847
13.7k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2848
13.7k
                                                                                 chroma_strd,
2849
13.7k
                                                                                 pu1_src_left_chroma,
2850
13.7k
                                                                                 pu1_src_top_chroma,
2851
13.7k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2852
13.7k
                                                                                 au1_src_top_right,
2853
13.7k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2854
13.7k
                                                                                 au1_avail_chroma,
2855
13.7k
                                                                                 ai1_offset_cr,
2856
13.7k
                                                                                 ai1_offset_cb,
2857
13.7k
                                                                                 sao_wd_chroma,
2858
13.7k
                                                                                 sao_ht_chroma);
2859
13.7k
                        }
2860
70.5k
                        else
2861
70.5k
                        {
2862
70.5k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2863
70.5k
                                                                                 chroma_strd,
2864
70.5k
                                                                                 pu1_src_left_chroma,
2865
70.5k
                                                                                 pu1_src_top_chroma,
2866
70.5k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2867
70.5k
                                                                                 au1_src_top_right,
2868
70.5k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2869
70.5k
                                                                                 au1_avail_chroma,
2870
70.5k
                                                                                 ai1_offset_cb,
2871
70.5k
                                                                                 ai1_offset_cr,
2872
70.5k
                                                                                 sao_wd_chroma,
2873
70.5k
                                                                                 sao_ht_chroma);
2874
70.5k
                        }
2875
84.3k
                    }
2876
2877
84.3k
                }
2878
1.78M
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd + sao_wd_chroma];
2879
1.78M
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd + sao_wd_chroma + 1];
2880
2881
1.78M
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma) * chroma_strd + sao_wd_chroma - 2];
2882
1.78M
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma) * chroma_strd + sao_wd_chroma - 1];
2883
1.78M
            }
2884
3.06M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2885
1.45M
            {
2886
24.0M
                for(row = 0; row < sao_ht_chroma; row++)
2887
22.5M
                {
2888
22.5M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 2)];
2889
22.5M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * chroma_strd + (sao_wd_chroma - 1)];
2890
22.5M
                }
2891
1.45M
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2892
1.45M
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2893
2894
1.45M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd], sao_wd_chroma);
2895
2896
1.45M
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd + sao_wd_chroma];
2897
1.45M
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * chroma_strd + sao_wd_chroma + 1];
2898
1.45M
            }
2899
2900
4.85M
        }
2901
12.5M
    }
2902
2903
2904
2905
2906
/* If no loop filter is enabled copy the backed up values */
2907
12.5M
    {
2908
        /* Luma */
2909
12.5M
        if(no_loop_filter_enabled_luma)
2910
29.9k
        {
2911
29.9k
            UWORD32 u4_no_loop_filter_flag;
2912
29.9k
            WORD32 loop_filter_bit_pos;
2913
29.9k
            WORD32 log2_min_cu = 3;
2914
29.9k
            WORD32 min_cu = (1 << log2_min_cu);
2915
29.9k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
2916
29.9k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
2917
29.9k
            WORD32 sao_blk_wd = ctb_size;
2918
29.9k
            WORD32 remaining_rows;
2919
29.9k
            WORD32 remaining_cols;
2920
2921
29.9k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
2922
29.9k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
2923
29.9k
            if(remaining_rows <= SAO_SHIFT_CTB)
2924
6.79k
                sao_blk_ht += remaining_rows;
2925
29.9k
            if(remaining_cols <= SAO_SHIFT_CTB)
2926
6.67k
                sao_blk_wd += remaining_cols;
2927
2928
29.9k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
2929
29.9k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
2930
2931
29.9k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
2932
2933
29.9k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
2934
29.9k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
2935
29.9k
            if(ps_sao_ctxt->i4_ctb_x > 0)
2936
20.0k
                loop_filter_bit_pos -= 1;
2937
2938
29.9k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
2939
29.9k
                            (loop_filter_bit_pos >> 3);
2940
2941
29.9k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
2942
199k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
2943
169k
            {
2944
169k
                WORD32 tmp_wd = sao_blk_wd;
2945
2946
169k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
2947
169k
                                (loop_filter_bit_pos & 7);
2948
169k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
2949
2950
169k
                if(u4_no_loop_filter_flag)
2951
102k
                {
2952
281k
                    while(tmp_wd > 0)
2953
179k
                    {
2954
179k
                        if(CTZ(u4_no_loop_filter_flag))
2955
68.1k
                        {
2956
68.1k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
2957
68.1k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
2958
68.1k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
2959
68.1k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
2960
68.1k
                        }
2961
111k
                        else
2962
111k
                        {
2963
995k
                            for(row = 0; row < min_cu; row++)
2964
884k
                            {
2965
30.7M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
2966
29.8M
                                {
2967
29.8M
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
2968
29.8M
                                }
2969
884k
                            }
2970
111k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
2971
111k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
2972
111k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
2973
111k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
2974
111k
                        }
2975
179k
                    }
2976
2977
102k
                    pu1_src_tmp_luma -= sao_blk_wd;
2978
102k
                    pu1_src_backup_luma -= sao_blk_wd;
2979
102k
                }
2980
2981
169k
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
2982
169k
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
2983
169k
            }
2984
29.9k
        }
2985
2986
        /* Chroma */
2987
12.5M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && no_loop_filter_enabled_chroma)
2988
29.6k
        {
2989
29.6k
            UWORD32 u4_no_loop_filter_flag;
2990
29.6k
            WORD32 loop_filter_bit_pos;
2991
29.6k
            WORD32 log2_min_cu = 3;
2992
29.6k
            WORD32 min_cu = (1 << log2_min_cu);
2993
29.6k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
2994
29.6k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
2995
29.6k
            WORD32 sao_blk_wd = ctb_size;
2996
29.6k
            WORD32 remaining_rows;
2997
29.6k
            WORD32 remaining_cols;
2998
2999
29.6k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3000
29.6k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3001
29.6k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3002
6.98k
                sao_blk_ht += remaining_rows;
3003
29.6k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3004
6.65k
                sao_blk_wd += remaining_cols;
3005
3006
29.6k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3007
29.6k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * chroma_strd : 0;
3008
3009
29.6k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3010
3011
29.6k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3012
29.6k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3013
29.6k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3014
19.7k
                loop_filter_bit_pos -= (is_yuv444 ? 1 : 2);
3015
3016
29.6k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3017
29.6k
                            (loop_filter_bit_pos >> 3);
3018
3019
29.6k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3020
196k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3021
166k
            {
3022
166k
                WORD32 tmp_wd = sao_blk_wd;
3023
3024
166k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3025
166k
                                (loop_filter_bit_pos & 7);
3026
166k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3027
3028
166k
                if(u4_no_loop_filter_flag)
3029
102k
                {
3030
280k
                    while(tmp_wd > 0)
3031
178k
                    {
3032
178k
                        if(CTZ(u4_no_loop_filter_flag))
3033
68.4k
                        {
3034
68.4k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * (chroma_pixel_strd / h_samp_factor);
3035
68.4k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * (chroma_pixel_strd / h_samp_factor);
3036
68.4k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3037
68.4k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3038
68.4k
                        }
3039
109k
                        else
3040
109k
                        {
3041
548k
                            for(row = 0; row < min_cu / v_samp_factor; row++)
3042
438k
                            {
3043
16.3M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * (chroma_pixel_strd / h_samp_factor); col++)
3044
15.8M
                                {
3045
15.8M
                                    pu1_src_tmp_chroma[row * chroma_strd + col] = pu1_src_backup_chroma[row * backup_strd * (chroma_pixel_strd / h_samp_factor) + col];
3046
15.8M
                                }
3047
438k
                            }
3048
3049
109k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * (chroma_pixel_strd / h_samp_factor);
3050
109k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd) * (chroma_pixel_strd / h_samp_factor);
3051
109k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3052
109k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3053
109k
                        }
3054
178k
                    }
3055
3056
102k
                    pu1_src_tmp_chroma -= sao_blk_wd * (chroma_pixel_strd / h_samp_factor);
3057
102k
                    pu1_src_backup_chroma -= sao_blk_wd * (chroma_pixel_strd / h_samp_factor);
3058
102k
                }
3059
3060
166k
                pu1_src_tmp_chroma += (((src_strd * chroma_pixel_strd) / (h_samp_factor * v_samp_factor)) << log2_min_cu);
3061
166k
                pu1_src_backup_chroma += (((backup_strd * chroma_pixel_strd) / (h_samp_factor * v_samp_factor)) << log2_min_cu);
3062
166k
            }
3063
29.6k
        }
3064
12.5M
    }
3065
3066
12.5M
}
3067