Coverage Report

Created: 2025-08-26 06:27

/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
174M
#define SAO_SHIFT_CTB    8
79
80
/**
81
 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82
 */
83
void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84
0
{
85
0
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86
0
    UWORD8 *pu1_src_luma;
87
0
    UWORD8 *pu1_src_chroma;
88
0
    WORD32 src_strd;
89
0
    WORD32 ctb_size;
90
0
    WORD32 log2_ctb_size;
91
0
    sps_t *ps_sps;
92
0
    sao_t *ps_sao;
93
0
    WORD32 row, col;
94
0
    UWORD8 au1_avail_luma[8];
95
0
    UWORD8 au1_avail_chroma[8];
96
0
    WORD32 i;
97
0
    UWORD8 *pu1_src_top_luma;
98
0
    UWORD8 *pu1_src_top_chroma;
99
0
    UWORD8 *pu1_src_left_luma;
100
0
    UWORD8 *pu1_src_left_chroma;
101
0
    UWORD8 au1_src_top_right[2];
102
0
    UWORD8 au1_src_bot_left[2];
103
0
    UWORD8 *pu1_no_loop_filter_flag;
104
0
    WORD32 loop_filter_strd;
105
106
    /* Only first 5 values are used, but arrays are large
107
     enough so that SIMD functions can read 64 bits at a time */
108
0
    WORD8 ai1_offset_y[8] = {0};
109
0
    WORD8 ai1_offset_cb[8] = {0};
110
0
    WORD8 ai1_offset_cr[8] = {0};
111
112
0
    PROFILE_DISABLE_SAO();
113
114
0
    ps_sps = ps_sao_ctxt->ps_sps;
115
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
116
0
    ctb_size = (1 << log2_ctb_size);
117
0
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118
0
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119
0
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121
0
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122
0
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124
    /* Current CTB */
125
0
    {
126
0
        WORD32 sao_wd_luma;
127
0
        WORD32 sao_wd_chroma;
128
0
        WORD32 sao_ht_luma;
129
0
        WORD32 sao_ht_chroma;
130
131
0
        WORD32 remaining_rows;
132
0
        WORD32 remaining_cols;
133
134
0
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135
0
        sao_wd_luma = MIN(ctb_size, remaining_cols);
136
0
        sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138
0
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139
0
        sao_ht_luma = MIN(ctb_size, remaining_rows);
140
0
        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142
0
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143
0
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144
0
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145
0
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147
0
        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148
0
                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149
0
                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151
0
        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152
0
        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153
0
        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154
0
        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156
0
        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157
0
        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158
0
        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159
0
        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161
0
        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162
0
        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163
0
        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164
0
        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166
0
        for(i = 0; i < 8; i++)
167
0
        {
168
0
            au1_avail_luma[i] = 255;
169
0
            au1_avail_chroma[i] = 255;
170
0
        }
171
172
173
0
        if(0 == ps_sao_ctxt->i4_ctb_x)
174
0
        {
175
0
            au1_avail_luma[0] = 0;
176
0
            au1_avail_luma[4] = 0;
177
0
            au1_avail_luma[6] = 0;
178
179
0
            au1_avail_chroma[0] = 0;
180
0
            au1_avail_chroma[4] = 0;
181
0
            au1_avail_chroma[6] = 0;
182
0
        }
183
184
0
        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185
0
        {
186
0
            au1_avail_luma[1] = 0;
187
0
            au1_avail_luma[5] = 0;
188
0
            au1_avail_luma[7] = 0;
189
190
0
            au1_avail_chroma[1] = 0;
191
0
            au1_avail_chroma[5] = 0;
192
0
            au1_avail_chroma[7] = 0;
193
0
        }
194
195
0
        if(0 == ps_sao_ctxt->i4_ctb_y)
196
0
        {
197
0
            au1_avail_luma[2] = 0;
198
0
            au1_avail_luma[4] = 0;
199
0
            au1_avail_luma[5] = 0;
200
201
0
            au1_avail_chroma[2] = 0;
202
0
            au1_avail_chroma[4] = 0;
203
0
            au1_avail_chroma[5] = 0;
204
0
        }
205
206
0
        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207
0
        {
208
0
            au1_avail_luma[3] = 0;
209
0
            au1_avail_luma[6] = 0;
210
0
            au1_avail_luma[7] = 0;
211
212
0
            au1_avail_chroma[3] = 0;
213
0
            au1_avail_chroma[6] = 0;
214
0
            au1_avail_chroma[7] = 0;
215
0
        }
216
217
218
0
        if(0 == ps_sao->b3_y_type_idx)
219
0
        {
220
            /* Update left, top and top-left */
221
0
            for(row = 0; row < sao_ht_luma; row++)
222
0
            {
223
0
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224
0
            }
225
0
            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229
0
        }
230
0
        else
231
0
        {
232
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235
0
            WORD32 no_loop_filter_enabled = 0;
236
237
            /* Check the loop filter flags and copy the original values for back up */
238
0
            {
239
0
                UWORD32 u4_no_loop_filter_flag;
240
0
                WORD32 min_cu = 8;
241
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244
0
                {
245
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246
0
                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249
0
                    if(u4_no_loop_filter_flag)
250
0
                    {
251
0
                        WORD32 tmp_wd = sao_wd_luma;
252
0
                        no_loop_filter_enabled = 1;
253
0
                        while(tmp_wd > 0)
254
0
                        {
255
0
                            if(CTZ(u4_no_loop_filter_flag))
256
0
                            {
257
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260
0
                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261
0
                            }
262
0
                            else
263
0
                            {
264
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265
0
                                {
266
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267
0
                                    {
268
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269
0
                                    }
270
0
                                }
271
272
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276
0
                            }
277
0
                        }
278
279
0
                        pu1_src_tmp -= sao_wd_luma;
280
0
                    }
281
282
0
                    pu1_src_tmp += min_cu * src_strd;
283
0
                    pu1_src_copy += min_cu * tmp_strd;
284
0
                }
285
0
            }
286
287
0
            if(1 == ps_sao->b3_y_type_idx)
288
0
            {
289
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290
0
                                                                          src_strd,
291
0
                                                                          pu1_src_left_luma,
292
0
                                                                          pu1_src_top_luma,
293
0
                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294
0
                                                                          ps_sao->b5_y_band_pos,
295
0
                                                                          ai1_offset_y,
296
0
                                                                          sao_wd_luma,
297
0
                                                                          sao_ht_luma);
298
0
            }
299
0
            else // if(2 <= ps_sao->b3_y_type_idx)
300
0
            {
301
0
                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302
0
                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303
0
                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304
0
                                                                  src_strd,
305
0
                                                                  pu1_src_left_luma,
306
0
                                                                  pu1_src_top_luma,
307
0
                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308
0
                                                                  au1_src_top_right,
309
0
                                                                  au1_src_bot_left,
310
0
                                                                  au1_avail_luma,
311
0
                                                                  ai1_offset_y,
312
0
                                                                  sao_wd_luma,
313
0
                                                                  sao_ht_luma);
314
0
            }
315
316
            /* Check the loop filter flags and copy the original values back if they are set */
317
0
            if(no_loop_filter_enabled)
318
0
            {
319
0
                UWORD32 u4_no_loop_filter_flag;
320
0
                WORD32 min_cu = 8;
321
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324
0
                {
325
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328
0
                    if(u4_no_loop_filter_flag)
329
0
                    {
330
0
                        WORD32 tmp_wd = sao_wd_luma;
331
0
                        while(tmp_wd > 0)
332
0
                        {
333
0
                            if(CTZ(u4_no_loop_filter_flag))
334
0
                            {
335
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339
0
                            }
340
0
                            else
341
0
                            {
342
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343
0
                                {
344
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345
0
                                    {
346
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347
0
                                    }
348
0
                                }
349
350
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354
0
                            }
355
0
                        }
356
357
0
                        pu1_src_tmp -= sao_wd_luma;
358
0
                    }
359
360
0
                    pu1_src_tmp += min_cu * src_strd;
361
0
                    pu1_src_copy += min_cu * tmp_strd;
362
0
                }
363
0
            }
364
365
0
        }
366
367
0
        if(0 == ps_sao->b3_cb_type_idx)
368
0
        {
369
0
            for(row = 0; row < sao_ht_chroma; row++)
370
0
            {
371
0
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372
0
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373
0
            }
374
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378
0
        }
379
0
        else
380
0
        {
381
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384
0
            WORD32 no_loop_filter_enabled = 0;
385
386
            /* Check the loop filter flags and copy the original values for back up */
387
0
            {
388
0
                UWORD32 u4_no_loop_filter_flag;
389
0
                WORD32 min_cu = 4;
390
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393
0
                {
394
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397
0
                    if(u4_no_loop_filter_flag)
398
0
                    {
399
0
                        WORD32 tmp_wd = sao_wd_chroma;
400
0
                        no_loop_filter_enabled = 1;
401
0
                        while(tmp_wd > 0)
402
0
                        {
403
0
                            if(CTZ(u4_no_loop_filter_flag))
404
0
                            {
405
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409
0
                            }
410
0
                            else
411
0
                            {
412
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413
0
                                {
414
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415
0
                                    {
416
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417
0
                                    }
418
0
                                }
419
420
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424
0
                            }
425
0
                        }
426
427
0
                        pu1_src_tmp -= sao_wd_chroma;
428
0
                    }
429
430
0
                    pu1_src_tmp += min_cu * src_strd;
431
0
                    pu1_src_copy += min_cu * tmp_strd;
432
0
                }
433
0
            }
434
435
0
            if(1 == ps_sao->b3_cb_type_idx)
436
0
            {
437
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438
0
                                                                            src_strd,
439
0
                                                                            pu1_src_left_chroma,
440
0
                                                                            pu1_src_top_chroma,
441
0
                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442
0
                                                                            ps_sao->b5_cb_band_pos,
443
0
                                                                            ps_sao->b5_cr_band_pos,
444
0
                                                                            ai1_offset_cb,
445
0
                                                                            ai1_offset_cr,
446
0
                                                                            sao_wd_chroma,
447
0
                                                                            sao_ht_chroma
448
0
                                                                           );
449
0
            }
450
0
            else // if(2 <= ps_sao->b3_cb_type_idx)
451
0
            {
452
0
                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453
0
                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454
0
                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455
0
                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456
0
                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457
0
                                                                     src_strd,
458
0
                                                                     pu1_src_left_chroma,
459
0
                                                                     pu1_src_top_chroma,
460
0
                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461
0
                                                                     au1_src_top_right,
462
0
                                                                     au1_src_bot_left,
463
0
                                                                     au1_avail_chroma,
464
0
                                                                     ai1_offset_cb,
465
0
                                                                     ai1_offset_cr,
466
0
                                                                     sao_wd_chroma,
467
0
                                                                     sao_ht_chroma);
468
0
            }
469
470
            /* Check the loop filter flags and copy the original values back if they are set */
471
0
            if(no_loop_filter_enabled)
472
0
            {
473
0
                UWORD32 u4_no_loop_filter_flag;
474
0
                WORD32 min_cu = 4;
475
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478
0
                {
479
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482
0
                    if(u4_no_loop_filter_flag)
483
0
                    {
484
0
                        WORD32 tmp_wd = sao_wd_chroma;
485
0
                        while(tmp_wd > 0)
486
0
                        {
487
0
                            if(CTZ(u4_no_loop_filter_flag))
488
0
                            {
489
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493
0
                            }
494
0
                            else
495
0
                            {
496
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497
0
                                {
498
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499
0
                                    {
500
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501
0
                                    }
502
0
                                }
503
504
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508
0
                            }
509
0
                        }
510
511
0
                        pu1_src_tmp -= sao_wd_chroma;
512
0
                    }
513
514
0
                    pu1_src_tmp += min_cu * src_strd;
515
0
                    pu1_src_copy += min_cu * tmp_strd;
516
0
                }
517
0
            }
518
519
0
        }
520
521
0
    }
522
0
}
523
524
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525
4.48M
{
526
4.48M
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527
4.48M
    UWORD8 *pu1_src_luma;
528
4.48M
    UWORD8 *pu1_src_chroma;
529
4.48M
    WORD32 src_strd;
530
4.48M
    WORD32 ctb_size;
531
4.48M
    WORD32 log2_ctb_size;
532
4.48M
    sps_t *ps_sps;
533
4.48M
    sao_t *ps_sao;
534
4.48M
    pps_t *ps_pps;
535
4.48M
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536
4.48M
    tile_t *ps_tile;
537
4.48M
    UWORD16 *pu1_slice_idx;
538
4.48M
    UWORD16 *pu1_tile_idx;
539
4.48M
    WORD32 row, col;
540
4.48M
    UWORD8 au1_avail_luma[8];
541
4.48M
    UWORD8 au1_avail_chroma[8];
542
4.48M
    UWORD8 au1_tile_slice_boundary[8];
543
4.48M
    UWORD8 au4_ilf_across_tile_slice_enable[8];
544
4.48M
    WORD32 i;
545
4.48M
    UWORD8 *pu1_src_top_luma;
546
4.48M
    UWORD8 *pu1_src_top_chroma;
547
4.48M
    UWORD8 *pu1_src_left_luma;
548
4.48M
    UWORD8 *pu1_src_left_chroma;
549
4.48M
    UWORD8 au1_src_top_right[2];
550
4.48M
    UWORD8 au1_src_bot_left[2];
551
4.48M
    UWORD8 *pu1_no_loop_filter_flag;
552
4.48M
    UWORD8 *pu1_src_backup_luma;
553
4.48M
    UWORD8 *pu1_src_backup_chroma;
554
4.48M
    WORD32 backup_strd;
555
4.48M
    WORD32 loop_filter_strd;
556
557
4.48M
    WORD32 no_loop_filter_enabled_luma = 0;
558
4.48M
    WORD32 no_loop_filter_enabled_chroma = 0;
559
4.48M
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560
4.48M
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561
4.48M
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
562
4.48M
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563
4.48M
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
564
4.48M
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565
4.48M
    UWORD8  u1_sao_src_top_left_luma_bot_left;
566
4.48M
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567
4.48M
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568
4.48M
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569
    /* Only first 5 values are used, but arrays are large
570
     enough so that SIMD functions can read 64 bits at a time */
571
4.48M
    WORD8 ai1_offset_y[8] = {0};
572
4.48M
    WORD8 ai1_offset_cb[8] = {0};
573
4.48M
    WORD8 ai1_offset_cr[8] = {0};
574
4.48M
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576
4.48M
    PROFILE_DISABLE_SAO();
577
578
4.48M
    ps_sps = ps_sao_ctxt->ps_sps;
579
4.48M
    ps_pps = ps_sao_ctxt->ps_pps;
580
4.48M
    ps_tile = ps_sao_ctxt->ps_tile;
581
582
4.48M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
583
4.48M
    ctb_size = (1 << log2_ctb_size);
584
4.48M
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585
4.48M
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586
4.48M
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588
4.48M
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589
4.48M
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590
4.48M
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591
4.48M
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593
    /*Stores the left value for each row ctbs- Needed for column tiles*/
594
4.48M
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595
4.48M
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596
4.48M
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597
4.48M
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598
4.48M
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599
4.48M
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600
4.48M
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601
4.48M
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602
4.48M
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603
4.48M
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605
4.48M
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606
4.48M
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607
4.48M
    backup_strd = 2 * MAX_CTB_SIZE;
608
609
4.48M
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611
4.48M
    {
612
        /* Check the loop filter flags and copy the original values for back up */
613
        /* Luma */
614
615
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616
         * can belong to different slice with their own sao_enable flag */
617
4.48M
        {
618
4.48M
            UWORD32 u4_no_loop_filter_flag;
619
4.48M
            WORD32 loop_filter_bit_pos;
620
4.48M
            WORD32 log2_min_cu = 3;
621
4.48M
            WORD32 min_cu = (1 << log2_min_cu);
622
4.48M
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623
4.48M
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624
4.48M
            WORD32 sao_blk_wd = ctb_size;
625
4.48M
            WORD32 remaining_rows;
626
4.48M
            WORD32 remaining_cols;
627
628
4.48M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629
4.48M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630
4.48M
            if(remaining_rows <= SAO_SHIFT_CTB)
631
413k
                sao_blk_ht += remaining_rows;
632
4.48M
            if(remaining_cols <= SAO_SHIFT_CTB)
633
170k
                sao_blk_wd += remaining_cols;
634
635
4.48M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636
4.48M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638
4.48M
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640
4.48M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641
4.48M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642
4.48M
            if(ps_sao_ctxt->i4_ctb_x > 0)
643
4.31M
                loop_filter_bit_pos -= 1;
644
645
4.48M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646
4.48M
                            (loop_filter_bit_pos >> 3);
647
648
4.48M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649
33.0M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650
28.5M
            {
651
28.5M
                WORD32 tmp_wd = sao_blk_wd;
652
653
28.5M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654
28.5M
                                (loop_filter_bit_pos & 7);
655
28.5M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657
28.5M
                if(u4_no_loop_filter_flag)
658
1.16M
                {
659
1.16M
                    no_loop_filter_enabled_luma = 1;
660
2.43M
                    while(tmp_wd > 0)
661
1.27M
                    {
662
1.27M
                        if(CTZ(u4_no_loop_filter_flag))
663
97.5k
                        {
664
97.5k
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665
97.5k
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666
97.5k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667
97.5k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668
97.5k
                        }
669
1.17M
                        else
670
1.17M
                        {
671
10.3M
                            for(row = 0; row < min_cu; row++)
672
9.19M
                            {
673
449M
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674
440M
                                {
675
440M
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676
440M
                                }
677
9.19M
                            }
678
1.17M
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679
1.17M
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680
1.17M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681
1.17M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682
1.17M
                        }
683
1.27M
                    }
684
685
1.16M
                    pu1_src_tmp_luma -= sao_blk_wd;
686
1.16M
                    pu1_src_backup_luma -= sao_blk_wd;
687
1.16M
                }
688
689
28.5M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
690
28.5M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
691
28.5M
            }
692
4.48M
        }
693
694
        /* Chroma */
695
696
4.48M
        {
697
4.48M
            UWORD32 u4_no_loop_filter_flag;
698
4.48M
            WORD32 loop_filter_bit_pos;
699
4.48M
            WORD32 log2_min_cu = 3;
700
4.48M
            WORD32 min_cu = (1 << log2_min_cu);
701
4.48M
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
702
4.48M
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
703
4.48M
            WORD32 sao_blk_wd = ctb_size;
704
4.48M
            WORD32 remaining_rows;
705
4.48M
            WORD32 remaining_cols;
706
707
4.48M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
708
4.48M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709
4.48M
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
710
413k
                sao_blk_ht += remaining_rows;
711
4.48M
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
712
170k
                sao_blk_wd += remaining_cols;
713
714
4.48M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
715
4.48M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
716
717
4.48M
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
718
719
4.48M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
720
4.48M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
721
4.48M
            if(ps_sao_ctxt->i4_ctb_x > 0)
722
4.31M
                loop_filter_bit_pos -= 2;
723
724
4.48M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
725
4.48M
                            (loop_filter_bit_pos >> 3);
726
727
4.48M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
728
33.0M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
729
28.5M
            {
730
28.5M
                WORD32 tmp_wd = sao_blk_wd;
731
732
28.5M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
733
28.5M
                                (loop_filter_bit_pos & 7);
734
28.5M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
735
736
28.5M
                if(u4_no_loop_filter_flag)
737
1.16M
                {
738
1.16M
                    no_loop_filter_enabled_chroma = 1;
739
2.43M
                    while(tmp_wd > 0)
740
1.26M
                    {
741
1.26M
                        if(CTZ(u4_no_loop_filter_flag))
742
95.5k
                        {
743
95.5k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
744
95.5k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745
95.5k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
746
95.5k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
747
95.5k
                        }
748
1.17M
                        else
749
1.17M
                        {
750
5.81M
                            for(row = 0; row < min_cu / 2; row++)
751
4.64M
                            {
752
265M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
753
260M
                                {
754
260M
                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
755
260M
                                }
756
4.64M
                            }
757
758
1.17M
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
759
1.17M
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760
1.17M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
761
1.17M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
762
1.17M
                        }
763
1.26M
                    }
764
765
1.16M
                    pu1_src_tmp_chroma -= sao_blk_wd;
766
1.16M
                    pu1_src_backup_chroma -= sao_blk_wd;
767
1.16M
                }
768
769
28.5M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
770
28.5M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
771
28.5M
            }
772
4.48M
        }
773
4.48M
    }
774
775
4.48M
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
776
777
    /* Top-left CTB */
778
4.48M
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
779
3.91M
    {
780
3.91M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
781
3.91M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
782
3.91M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
783
3.91M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
784
785
3.91M
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
786
3.91M
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
787
3.91M
        WORD32 au4_idx_tl[8], idx_tl;
788
789
3.91M
        slice_header_t *ps_slice_hdr_top_left;
790
3.91M
        {
791
3.91M
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
792
3.91M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
793
3.91M
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
794
3.91M
        }
795
796
797
3.91M
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
798
3.91M
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
799
3.91M
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
800
3.91M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
801
3.91M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
802
3.91M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
803
3.91M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
804
805
3.91M
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
806
413k
        {
807
413k
            if(0 == ps_sao->b3_y_type_idx)
808
347k
            {
809
                /* Update left, top and top-left */
810
3.12M
                for(row = 0; row < sao_ht_luma; row++)
811
2.77M
                {
812
2.77M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
813
2.77M
                }
814
347k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
815
816
347k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
817
818
819
347k
            }
820
821
65.9k
            else if(1 == ps_sao->b3_y_type_idx)
822
36.7k
            {
823
36.7k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
824
36.7k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
825
36.7k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
826
36.7k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
827
828
36.7k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
829
36.7k
                                                                          src_strd,
830
36.7k
                                                                          pu1_src_left_luma,
831
36.7k
                                                                          pu1_src_top_luma,
832
36.7k
                                                                          pu1_sao_src_luma_top_left_ctb,
833
36.7k
                                                                          ps_sao->b5_y_band_pos,
834
36.7k
                                                                          ai1_offset_y,
835
36.7k
                                                                          sao_wd_luma,
836
36.7k
                                                                          sao_ht_luma
837
36.7k
                                                                         );
838
36.7k
            }
839
840
29.2k
            else // if(2 <= ps_sao->b3_y_type_idx)
841
29.2k
            {
842
29.2k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
843
29.2k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
844
29.2k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
845
29.2k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
846
847
263k
                for(i = 0; i < 8; i++)
848
233k
                {
849
233k
                    au1_avail_luma[i] = 255;
850
233k
                    au1_tile_slice_boundary[i] = 0;
851
233k
                    au4_idx_tl[i] = 0;
852
233k
                    au4_ilf_across_tile_slice_enable[i] = 1;
853
233k
                }
854
855
                /******************************************************************
856
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
857
                 *
858
                 *          TL_T
859
                 *       4  _2__5________
860
                 *     0   |    |       |
861
                 *    TL_L | TL | 1 TL_R|
862
                 *         |____|_______|____
863
                 *        6|TL_D|7      |    |
864
                 *         | 3  |       |    |
865
                 *         |____|_______|    |
866
                 *              |            |
867
                 *              |            |
868
                 *              |____________|
869
                 *
870
                 *****************************************************************/
871
872
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
873
29.2k
                {
874
29.2k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
875
16.3k
                    {
876
16.3k
                        {
877
                            /*Assuming that sao shift is uniform along x and y directions*/
878
16.3k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
879
0
                            {
880
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
881
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
882
0
                            }
883
16.3k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
884
16.3k
                            {
885
16.3k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
886
16.3k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
887
16.3k
                            }
888
16.3k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
889
16.3k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
890
891
16.3k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
892
16.3k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
893
894
16.3k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
895
16.3k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
896
897
16.3k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
898
16.3k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
899
16.3k
                        }
900
901
16.3k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
902
6.89k
                        {
903
                            /*Calculate slice indices for neighbor pixels*/
904
6.89k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
905
6.89k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
906
6.89k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
907
6.89k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
908
6.89k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
909
6.89k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
911
6.89k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
912
0
                            {
913
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
914
0
                                {
915
0
                                    au4_idx_tl[6] = -1;
916
0
                                    au4_idx_tl[4] = -1;
917
0
                                }
918
0
                                else
919
0
                                {
920
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
921
0
                                }
922
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
923
0
                                {
924
0
                                    au4_idx_tl[5] = -1;
925
0
                                    au4_idx_tl[4] = -1;
926
0
                                }
927
0
                                else
928
0
                                {
929
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
930
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
931
0
                                }
932
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
933
0
                            }
934
935
                            /* Verify that the neighbor ctbs dont cross pic boundary.
936
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
937
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
938
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
939
                             * the respective pixel's flags are checked
940
                             */
941
942
6.89k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
943
0
                            {
944
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
945
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
946
0
                            }
947
6.89k
                            else
948
6.89k
                            {
949
6.89k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
950
6.89k
                            }
951
6.89k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
952
0
                            {
953
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
954
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
955
0
                            }
956
6.89k
                            else
957
6.89k
                            {
958
6.89k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959
6.89k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960
6.89k
                            }
961
6.89k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
962
6.89k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963
6.89k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
964
6.89k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
965
6.89k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
966
967
6.89k
                            if(au4_idx_tl[5] > idx_tl)
968
611
                            {
969
611
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
970
611
                            }
971
972
                            /*
973
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
974
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
975
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
976
                             * the respective pixel's flags are checked
977
                             */
978
61.9k
                            for(i = 0; i < 8; i++)
979
55.0k
                            {
980
                                /*Sets the edges that lie on the slice/tile boundary*/
981
55.0k
                                if(au4_idx_tl[i] != idx_tl)
982
13.6k
                                {
983
13.6k
                                    au1_tile_slice_boundary[i] = 1;
984
13.6k
                                }
985
41.4k
                                else
986
41.4k
                                {
987
41.4k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
988
41.4k
                                }
989
55.0k
                            }
990
991
6.89k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
992
6.89k
                        }
993
994
16.3k
                        if(ps_pps->i1_tiles_enabled_flag)
995
9.73k
                        {
996
                            /* Calculate availability flags at slice boundary */
997
9.73k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
998
4.60k
                            {
999
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1000
4.60k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1001
3.81k
                                {
1002
                                    /*Set the boundary arrays*/
1003
                                    /*Calculate tile indices for neighbor pixels*/
1004
3.81k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1005
3.81k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1006
3.81k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1007
3.81k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1008
3.81k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1009
3.81k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
1011
3.81k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1012
0
                                    {
1013
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1014
0
                                        {
1015
0
                                            au4_idx_tl[6] = -1;
1016
0
                                            au4_idx_tl[4] = -1;
1017
0
                                        }
1018
0
                                        else
1019
0
                                        {
1020
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1021
0
                                        }
1022
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1023
0
                                        {
1024
0
                                            au4_idx_tl[5] = -1;
1025
0
                                            au4_idx_tl[4] = -1;
1026
0
                                        }
1027
0
                                        else
1028
0
                                        {
1029
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1030
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1031
0
                                        }
1032
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1033
0
                                    }
1034
34.3k
                                    for(i = 0; i < 8; i++)
1035
30.5k
                                    {
1036
                                        /*Sets the edges that lie on the tile boundary*/
1037
30.5k
                                        if(au4_idx_tl[i] != idx_tl)
1038
12.5k
                                        {
1039
12.5k
                                            au1_tile_slice_boundary[i] |= 1;
1040
12.5k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1041
12.5k
                                        }
1042
30.5k
                                    }
1043
3.81k
                                }
1044
4.60k
                            }
1045
9.73k
                        }
1046
1047
1048
                        /*Set availability flags based on tile and slice boundaries*/
1049
147k
                        for(i = 0; i < 8; i++)
1050
130k
                        {
1051
                            /*Sets the edges that lie on the slice/tile boundary*/
1052
130k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1053
12.7k
                            {
1054
12.7k
                                au1_avail_luma[i] = 0;
1055
12.7k
                            }
1056
130k
                        }
1057
16.3k
                    }
1058
29.2k
                }
1059
1060
29.2k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1061
0
                {
1062
0
                    au1_avail_luma[0] = 0;
1063
0
                    au1_avail_luma[4] = 0;
1064
0
                    au1_avail_luma[6] = 0;
1065
0
                }
1066
1067
29.2k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1068
0
                {
1069
0
                    au1_avail_luma[1] = 0;
1070
0
                    au1_avail_luma[5] = 0;
1071
0
                    au1_avail_luma[7] = 0;
1072
0
                }
1073
                //y==1 case
1074
29.2k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1075
0
                {
1076
0
                    au1_avail_luma[2] = 0;
1077
0
                    au1_avail_luma[4] = 0;
1078
0
                    au1_avail_luma[5] = 0;
1079
0
                }
1080
29.2k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1081
0
                {
1082
0
                    au1_avail_luma[3] = 0;
1083
0
                    au1_avail_luma[6] = 0;
1084
0
                    au1_avail_luma[7] = 0;
1085
0
                }
1086
1087
29.2k
                {
1088
29.2k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1089
29.2k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1090
29.2k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1091
29.2k
                                                                      src_strd,
1092
29.2k
                                                                      pu1_src_left_luma,
1093
29.2k
                                                                      pu1_src_top_luma,
1094
29.2k
                                                                      pu1_sao_src_luma_top_left_ctb,
1095
29.2k
                                                                      au1_src_top_right,
1096
29.2k
                                                                      &u1_sao_src_top_left_luma_bot_left,
1097
29.2k
                                                                      au1_avail_luma,
1098
29.2k
                                                                      ai1_offset_y,
1099
29.2k
                                                                      sao_wd_luma,
1100
29.2k
                                                                      sao_ht_luma);
1101
29.2k
                }
1102
29.2k
            }
1103
1104
413k
        }
1105
3.50M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1106
755k
        {
1107
            /* Update left, top and top-left */
1108
6.79M
            for(row = 0; row < sao_ht_luma; row++)
1109
6.03M
            {
1110
6.03M
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1111
6.03M
            }
1112
755k
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1113
1114
755k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1115
755k
        }
1116
1117
3.91M
        if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1118
327k
        {
1119
327k
            if(0 == ps_sao->b3_cb_type_idx)
1120
282k
            {
1121
2.54M
                for(row = 0; row < sao_ht_chroma; row++)
1122
2.26M
                {
1123
2.26M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1124
2.26M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1125
2.26M
                }
1126
282k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1127
282k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1128
1129
282k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1130
1131
282k
            }
1132
1133
44.8k
            else if(1 == ps_sao->b3_cb_type_idx)
1134
20.8k
            {
1135
20.8k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1136
20.8k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1137
20.8k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1138
20.8k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1139
1140
20.8k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1141
20.8k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1142
20.8k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1143
20.8k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1144
1145
20.8k
                if(chroma_yuv420sp_vu)
1146
5.79k
                {
1147
5.79k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1148
5.79k
                                                                                src_strd,
1149
5.79k
                                                                                pu1_src_left_chroma,
1150
5.79k
                                                                                pu1_src_top_chroma,
1151
5.79k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1152
5.79k
                                                                                ps_sao->b5_cr_band_pos,
1153
5.79k
                                                                                ps_sao->b5_cb_band_pos,
1154
5.79k
                                                                                ai1_offset_cr,
1155
5.79k
                                                                                ai1_offset_cb,
1156
5.79k
                                                                                sao_wd_chroma,
1157
5.79k
                                                                                sao_ht_chroma
1158
5.79k
                                                                               );
1159
5.79k
                }
1160
15.0k
                else
1161
15.0k
                {
1162
15.0k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1163
15.0k
                                                                                src_strd,
1164
15.0k
                                                                                pu1_src_left_chroma,
1165
15.0k
                                                                                pu1_src_top_chroma,
1166
15.0k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1167
15.0k
                                                                                ps_sao->b5_cb_band_pos,
1168
15.0k
                                                                                ps_sao->b5_cr_band_pos,
1169
15.0k
                                                                                ai1_offset_cb,
1170
15.0k
                                                                                ai1_offset_cr,
1171
15.0k
                                                                                sao_wd_chroma,
1172
15.0k
                                                                                sao_ht_chroma
1173
15.0k
                                                                               );
1174
15.0k
                }
1175
20.8k
            }
1176
1177
24.0k
            else // if(2 <= ps_sao->b3_cb_type_idx)
1178
24.0k
            {
1179
24.0k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1180
24.0k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1181
24.0k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1182
24.0k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1183
1184
24.0k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1185
24.0k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1186
24.0k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1187
24.0k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1188
216k
                for(i = 0; i < 8; i++)
1189
192k
                {
1190
192k
                    au1_avail_chroma[i] = 255;
1191
192k
                    au1_tile_slice_boundary[i] = 0;
1192
192k
                    au4_idx_tl[i] = 0;
1193
192k
                    au4_ilf_across_tile_slice_enable[i] = 1;
1194
192k
                }
1195
                /*In case of slices*/
1196
24.0k
                {
1197
24.0k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1198
13.3k
                    {
1199
13.3k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1200
1.63k
                        {
1201
1.63k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1202
1.63k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1203
1.63k
                        }
1204
11.7k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1205
11.0k
                        {
1206
11.0k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1207
11.0k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1208
11.0k
                        }
1209
13.3k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1210
13.3k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1211
1212
13.3k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1213
13.3k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1214
1215
13.3k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1216
13.3k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1217
1218
13.3k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1219
13.3k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1220
1221
13.3k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1222
4.24k
                        {
1223
1224
4.24k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1225
4.24k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1226
4.24k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1227
4.24k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1228
4.24k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1229
4.24k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
1231
4.24k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1232
2.18k
                            {
1233
2.18k
                                if(ps_sao_ctxt->i4_ctb_x == 1)
1234
540
                                {
1235
540
                                    au4_idx_tl[6] = -1;
1236
540
                                    au4_idx_tl[4] = -1;
1237
540
                                }
1238
1.64k
                                else
1239
1.64k
                                {
1240
1.64k
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1241
1.64k
                                }
1242
2.18k
                                if(ps_sao_ctxt->i4_ctb_y == 1)
1243
17
                                {
1244
17
                                    au4_idx_tl[5] = -1;
1245
17
                                    au4_idx_tl[4] = -1;
1246
17
                                }
1247
2.17k
                                else
1248
2.17k
                                {
1249
2.17k
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1250
2.17k
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1251
2.17k
                                }
1252
2.18k
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1253
2.18k
                            }
1254
1255
                            /* Verify that the neighbor ctbs don't cross pic boundary
1256
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1257
4.24k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1258
540
                            {
1259
540
                                au4_ilf_across_tile_slice_enable[4] = 0;
1260
540
                                au4_ilf_across_tile_slice_enable[6] = 0;
1261
540
                            }
1262
3.70k
                            else
1263
3.70k
                            {
1264
3.70k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1265
3.70k
                            }
1266
4.24k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1267
17
                            {
1268
17
                                au4_ilf_across_tile_slice_enable[5] = 0;
1269
17
                                au4_ilf_across_tile_slice_enable[4] = 0;
1270
17
                            }
1271
4.22k
                            else
1272
4.22k
                            {
1273
4.22k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1274
4.22k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1275
4.22k
                            }
1276
4.24k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1277
4.24k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278
4.24k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1279
4.24k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1280
4.24k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1281
                            /*
1282
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1283
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1284
                             */
1285
38.1k
                            for(i = 0; i < 8; i++)
1286
33.9k
                            {
1287
                                /*Sets the edges that lie on the slice/tile boundary*/
1288
33.9k
                                if(au4_idx_tl[i] != idx_tl)
1289
6.46k
                                {
1290
6.46k
                                    au1_tile_slice_boundary[i] = 1;
1291
6.46k
                                }
1292
27.4k
                                else
1293
27.4k
                                {
1294
27.4k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
1295
27.4k
                                }
1296
33.9k
                            }
1297
1298
                            /*Reset indices*/
1299
38.1k
                            for(i = 0; i < 8; i++)
1300
33.9k
                            {
1301
33.9k
                                au4_idx_tl[i] = 0;
1302
33.9k
                            }
1303
4.24k
                        }
1304
13.3k
                        if(ps_pps->i1_tiles_enabled_flag)
1305
9.24k
                        {
1306
                            /* Calculate availability flags at slice boundary */
1307
9.24k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1308
3.86k
                            {
1309
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1310
3.86k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1311
3.28k
                                {
1312
                                    /*Set the boundary arrays*/
1313
                                    /*Calculate tile indices for neighbor pixels*/
1314
3.28k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1315
3.28k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1316
3.28k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1317
3.28k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1318
3.28k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1319
3.28k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
1321
3.28k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1322
0
                                    {
1323
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1324
0
                                        {
1325
0
                                            au4_idx_tl[6] = -1;
1326
0
                                            au4_idx_tl[4] = -1;
1327
0
                                        }
1328
0
                                        else
1329
0
                                        {
1330
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1331
0
                                        }
1332
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1333
0
                                        {
1334
0
                                            au4_idx_tl[5] = -1;
1335
0
                                            au4_idx_tl[4] = -1;
1336
0
                                        }
1337
0
                                        else
1338
0
                                        {
1339
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1340
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1341
0
                                        }
1342
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1343
0
                                    }
1344
29.5k
                                    for(i = 0; i < 8; i++)
1345
26.2k
                                    {
1346
                                        /*Sets the edges that lie on the tile boundary*/
1347
26.2k
                                        if(au4_idx_tl[i] != idx_tl)
1348
10.3k
                                        {
1349
10.3k
                                            au1_tile_slice_boundary[i] |= 1;
1350
10.3k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1351
10.3k
                                        }
1352
26.2k
                                    }
1353
3.28k
                                }
1354
3.86k
                            }
1355
9.24k
                        }
1356
1357
120k
                        for(i = 0; i < 8; i++)
1358
106k
                        {
1359
                            /*Sets the edges that lie on the slice/tile boundary*/
1360
106k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1361
11.2k
                            {
1362
11.2k
                                au1_avail_chroma[i] = 0;
1363
11.2k
                            }
1364
106k
                        }
1365
13.3k
                    }
1366
24.0k
                }
1367
1368
24.0k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1369
641
                {
1370
641
                    au1_avail_chroma[0] = 0;
1371
641
                    au1_avail_chroma[4] = 0;
1372
641
                    au1_avail_chroma[6] = 0;
1373
641
                }
1374
24.0k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1375
0
                {
1376
0
                    au1_avail_chroma[1] = 0;
1377
0
                    au1_avail_chroma[5] = 0;
1378
0
                    au1_avail_chroma[7] = 0;
1379
0
                }
1380
1381
24.0k
                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1382
331
                {
1383
331
                    au1_avail_chroma[2] = 0;
1384
331
                    au1_avail_chroma[4] = 0;
1385
331
                    au1_avail_chroma[5] = 0;
1386
331
                }
1387
24.0k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1388
0
                {
1389
0
                    au1_avail_chroma[3] = 0;
1390
0
                    au1_avail_chroma[6] = 0;
1391
0
                    au1_avail_chroma[7] = 0;
1392
0
                }
1393
1394
24.0k
                {
1395
24.0k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1396
24.0k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1397
24.0k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1398
24.0k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1399
24.0k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1400
2.38k
                    {
1401
2.38k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1402
2.38k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1403
2.38k
                    }
1404
1405
24.0k
                    if(chroma_yuv420sp_vu)
1406
6.31k
                    {
1407
6.31k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1408
6.31k
                                                                             src_strd,
1409
6.31k
                                                                             pu1_src_left_chroma,
1410
6.31k
                                                                             pu1_src_top_chroma,
1411
6.31k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1412
6.31k
                                                                             au1_src_top_right,
1413
6.31k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1414
6.31k
                                                                             au1_avail_chroma,
1415
6.31k
                                                                             ai1_offset_cr,
1416
6.31k
                                                                             ai1_offset_cb,
1417
6.31k
                                                                             sao_wd_chroma,
1418
6.31k
                                                                             sao_ht_chroma);
1419
6.31k
                    }
1420
17.7k
                    else
1421
17.7k
                    {
1422
17.7k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1423
17.7k
                                                                             src_strd,
1424
17.7k
                                                                             pu1_src_left_chroma,
1425
17.7k
                                                                             pu1_src_top_chroma,
1426
17.7k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1427
17.7k
                                                                             au1_src_top_right,
1428
17.7k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1429
17.7k
                                                                             au1_avail_chroma,
1430
17.7k
                                                                             ai1_offset_cb,
1431
17.7k
                                                                             ai1_offset_cr,
1432
17.7k
                                                                             sao_wd_chroma,
1433
17.7k
                                                                             sao_ht_chroma);
1434
17.7k
                    }
1435
24.0k
                }
1436
24.0k
            }
1437
327k
        }
1438
3.58M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1439
839k
        {
1440
7.55M
            for(row = 0; row < sao_ht_chroma; row++)
1441
6.71M
            {
1442
6.71M
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1443
6.71M
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1444
6.71M
            }
1445
839k
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1446
839k
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1447
1448
839k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1449
839k
        }
1450
1451
3.91M
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1452
3.91M
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1453
3.91M
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1454
3.91M
    }
1455
1456
1457
    /* Top CTB */
1458
4.48M
    if((ps_sao_ctxt->i4_ctb_y > 0))
1459
4.06M
    {
1460
4.06M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1461
4.06M
        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1462
4.06M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1463
4.06M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1464
1465
4.06M
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1466
4.06M
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1467
4.06M
        WORD32 au4_idx_t[8], idx_t;
1468
1469
4.06M
        WORD32 remaining_cols;
1470
1471
4.06M
        slice_header_t *ps_slice_hdr_top;
1472
4.06M
        {
1473
4.06M
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1474
4.06M
                                        (ps_sao_ctxt->i4_ctb_x);
1475
4.06M
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1476
4.06M
        }
1477
1478
4.06M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1479
4.06M
        if(remaining_cols <= SAO_SHIFT_CTB)
1480
156k
        {
1481
156k
            sao_wd_luma += remaining_cols;
1482
156k
        }
1483
4.06M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1484
4.06M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1485
156k
        {
1486
156k
            sao_wd_chroma += remaining_cols;
1487
156k
        }
1488
1489
4.06M
        pu1_src_luma -= (sao_ht_luma * src_strd);
1490
4.06M
        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1491
4.06M
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1492
4.06M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1493
4.06M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494
4.06M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1495
4.06M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1496
1497
4.06M
        if(0 != sao_wd_luma)
1498
4.06M
        {
1499
4.06M
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1500
445k
            {
1501
445k
                if(0 == ps_sao->b3_y_type_idx)
1502
373k
                {
1503
                    /* Update left, top and top-left */
1504
3.35M
                    for(row = 0; row < sao_ht_luma; row++)
1505
2.98M
                    {
1506
2.98M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1507
2.98M
                    }
1508
373k
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1509
1510
373k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1511
1512
373k
                }
1513
1514
72.6k
                else if(1 == ps_sao->b3_y_type_idx)
1515
40.1k
                {
1516
40.1k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1517
40.1k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1518
40.1k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1519
40.1k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1520
1521
40.1k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1522
40.1k
                                                                              src_strd,
1523
40.1k
                                                                              pu1_src_left_luma,
1524
40.1k
                                                                              pu1_src_top_luma,
1525
40.1k
                                                                              pu1_sao_src_luma_top_left_ctb,
1526
40.1k
                                                                              ps_sao->b5_y_band_pos,
1527
40.1k
                                                                              ai1_offset_y,
1528
40.1k
                                                                              sao_wd_luma,
1529
40.1k
                                                                              sao_ht_luma
1530
40.1k
                                                                             );
1531
40.1k
                }
1532
1533
32.4k
                else // if(2 <= ps_sao->b3_y_type_idx)
1534
32.4k
                {
1535
32.4k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1536
32.4k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1537
32.4k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1538
32.4k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1539
1540
32.4k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1541
32.4k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1542
32.4k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1543
1544
292k
                    for(i = 0; i < 8; i++)
1545
259k
                    {
1546
1547
259k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1548
259k
                    }
1549
                    /******************************************************************
1550
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1551
                     *
1552
                     *               T_T
1553
                     *          ____________
1554
                     *         |    |       |
1555
                     *         | T_L|  T    |T_R
1556
                     *         |    | ______|____
1557
                     *         |    |  T_D  |    |
1558
                     *         |    |       |    |
1559
                     *         |____|_______|    |
1560
                     *              |            |
1561
                     *              |            |
1562
                     *              |____________|
1563
                     *
1564
                     *****************************************************************/
1565
1566
                    /*In case of slices*/
1567
32.4k
                    {
1568
32.4k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1569
17.6k
                        {
1570
1571
17.6k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1572
17.6k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1573
1574
17.6k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1575
17.6k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1576
1577
17.6k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1578
17.6k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1579
1580
17.6k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1581
17.6k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1582
1583
17.6k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1584
17.6k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1585
1586
17.6k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1587
7.42k
                            {
1588
                                /*Calculate neighbor ctb slice indices*/
1589
7.42k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1590
1.01k
                                {
1591
1.01k
                                    au4_idx_t[0] = -1;
1592
1.01k
                                    au4_idx_t[6] = -1;
1593
1.01k
                                    au4_idx_t[4] = -1;
1594
1.01k
                                }
1595
6.40k
                                else
1596
6.40k
                                {
1597
6.40k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1598
6.40k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1599
6.40k
                                }
1600
7.42k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1601
7.42k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1602
7.42k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1603
7.42k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604
1605
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1606
7.42k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1607
1.01k
                                {
1608
1.01k
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1609
1.01k
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1610
1.01k
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1611
1.01k
                                }
1612
6.40k
                                else
1613
6.40k
                                {
1614
6.40k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1615
6.40k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1616
6.40k
                                }
1617
1618
1619
1620
7.42k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621
7.42k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622
7.42k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1623
7.42k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1624
7.42k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
1626
7.42k
                                if(au4_idx_t[6] < idx_t)
1627
1.01k
                                {
1628
1.01k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1629
1.01k
                                }
1630
1631
                                /*
1632
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1633
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1634
                                 */
1635
1636
66.7k
                                for(i = 0; i < 8; i++)
1637
59.3k
                                {
1638
                                    /*Sets the edges that lie on the slice/tile boundary*/
1639
59.3k
                                    if(au4_idx_t[i] != idx_t)
1640
16.3k
                                    {
1641
16.3k
                                        au1_tile_slice_boundary[i] = 1;
1642
                                        /*Check for slice flag at such boundaries*/
1643
16.3k
                                    }
1644
43.0k
                                    else
1645
43.0k
                                    {
1646
43.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1647
43.0k
                                    }
1648
59.3k
                                }
1649
                                /*Reset indices*/
1650
66.7k
                                for(i = 0; i < 8; i++)
1651
59.3k
                                {
1652
59.3k
                                    au4_idx_t[i] = 0;
1653
59.3k
                                }
1654
7.42k
                            }
1655
1656
17.6k
                            if(ps_pps->i1_tiles_enabled_flag)
1657
10.4k
                            {
1658
                                /* Calculate availability flags at slice boundary */
1659
10.4k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1660
5.02k
                                {
1661
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1662
5.02k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1663
4.21k
                                    {
1664
                                        /*Calculate neighbor ctb slice indices*/
1665
4.21k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1666
606
                                        {
1667
606
                                            au4_idx_t[0] = -1;
1668
606
                                            au4_idx_t[6] = -1;
1669
606
                                            au4_idx_t[4] = -1;
1670
606
                                        }
1671
3.60k
                                        else
1672
3.60k
                                        {
1673
3.60k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1674
3.60k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1675
3.60k
                                        }
1676
4.21k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1677
4.21k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1678
4.21k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1679
4.21k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680
1681
37.9k
                                        for(i = 0; i < 8; i++)
1682
33.7k
                                        {
1683
                                            /*Sets the edges that lie on the tile boundary*/
1684
33.7k
                                            if(au4_idx_t[i] != idx_t)
1685
14.6k
                                            {
1686
14.6k
                                                au1_tile_slice_boundary[i] |= 1;
1687
14.6k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1688
14.6k
                                            }
1689
33.7k
                                        }
1690
4.21k
                                    }
1691
5.02k
                                }
1692
10.4k
                            }
1693
1694
158k
                            for(i = 0; i < 8; i++)
1695
141k
                            {
1696
                                /*Sets the edges that lie on the slice/tile boundary*/
1697
141k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1698
16.9k
                                {
1699
16.9k
                                    au1_avail_luma[i] = 0;
1700
16.9k
                                }
1701
141k
                            }
1702
17.6k
                        }
1703
32.4k
                    }
1704
1705
1706
32.4k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1707
4.24k
                    {
1708
4.24k
                        au1_avail_luma[0] = 0;
1709
4.24k
                        au1_avail_luma[4] = 0;
1710
4.24k
                        au1_avail_luma[6] = 0;
1711
4.24k
                    }
1712
1713
32.4k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1714
3.25k
                    {
1715
3.25k
                        au1_avail_luma[1] = 0;
1716
3.25k
                        au1_avail_luma[5] = 0;
1717
3.25k
                        au1_avail_luma[7] = 0;
1718
3.25k
                    }
1719
1720
32.4k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1721
0
                    {
1722
0
                        au1_avail_luma[2] = 0;
1723
0
                        au1_avail_luma[4] = 0;
1724
0
                        au1_avail_luma[5] = 0;
1725
0
                    }
1726
1727
32.4k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1728
0
                    {
1729
0
                        au1_avail_luma[3] = 0;
1730
0
                        au1_avail_luma[6] = 0;
1731
0
                        au1_avail_luma[7] = 0;
1732
0
                    }
1733
1734
32.4k
                    {
1735
32.4k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1736
32.4k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1737
32.4k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1738
32.4k
                                                                          src_strd,
1739
32.4k
                                                                          pu1_src_left_luma,
1740
32.4k
                                                                          pu1_src_top_luma,
1741
32.4k
                                                                          pu1_sao_src_luma_top_left_ctb,
1742
32.4k
                                                                          au1_src_top_right,
1743
32.4k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1744
32.4k
                                                                          au1_avail_luma,
1745
32.4k
                                                                          ai1_offset_y,
1746
32.4k
                                                                          sao_wd_luma,
1747
32.4k
                                                                          sao_ht_luma);
1748
32.4k
                    }
1749
32.4k
                }
1750
445k
            }
1751
3.62M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1752
777k
            {
1753
                /* Update left, top and top-left */
1754
6.99M
                for(row = 0; row < sao_ht_luma; row++)
1755
6.21M
                {
1756
6.21M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1757
6.21M
                }
1758
777k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1759
1760
777k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1761
777k
            }
1762
4.06M
        }
1763
1764
4.06M
        if(0 != sao_wd_chroma)
1765
3.58M
        {
1766
3.58M
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1767
284k
            {
1768
284k
                if(0 == ps_sao->b3_cb_type_idx)
1769
239k
                {
1770
1771
2.15M
                    for(row = 0; row < sao_ht_chroma; row++)
1772
1.91M
                    {
1773
1.91M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1774
1.91M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1775
1.91M
                    }
1776
239k
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1777
239k
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1778
1779
239k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1780
1781
239k
                }
1782
1783
44.8k
                else if(1 == ps_sao->b3_cb_type_idx)
1784
20.3k
                {
1785
20.3k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1786
20.3k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1787
20.3k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1788
20.3k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1789
1790
20.3k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1791
20.3k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1792
20.3k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1793
20.3k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1794
1795
20.3k
                    if(chroma_yuv420sp_vu)
1796
5.76k
                    {
1797
5.76k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1798
5.76k
                                                                                    src_strd,
1799
5.76k
                                                                                    pu1_src_left_chroma,
1800
5.76k
                                                                                    pu1_src_top_chroma,
1801
5.76k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1802
5.76k
                                                                                    ps_sao->b5_cr_band_pos,
1803
5.76k
                                                                                    ps_sao->b5_cb_band_pos,
1804
5.76k
                                                                                    ai1_offset_cr,
1805
5.76k
                                                                                    ai1_offset_cb,
1806
5.76k
                                                                                    sao_wd_chroma,
1807
5.76k
                                                                                    sao_ht_chroma
1808
5.76k
                                                                                   );
1809
5.76k
                    }
1810
14.6k
                    else
1811
14.6k
                    {
1812
14.6k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1813
14.6k
                                                                                    src_strd,
1814
14.6k
                                                                                    pu1_src_left_chroma,
1815
14.6k
                                                                                    pu1_src_top_chroma,
1816
14.6k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1817
14.6k
                                                                                    ps_sao->b5_cb_band_pos,
1818
14.6k
                                                                                    ps_sao->b5_cr_band_pos,
1819
14.6k
                                                                                    ai1_offset_cb,
1820
14.6k
                                                                                    ai1_offset_cr,
1821
14.6k
                                                                                    sao_wd_chroma,
1822
14.6k
                                                                                    sao_ht_chroma
1823
14.6k
                                                                                   );
1824
14.6k
                    }
1825
20.3k
                }
1826
24.4k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1827
24.4k
                {
1828
24.4k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1829
24.4k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1830
24.4k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1831
24.4k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1832
1833
24.4k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1834
24.4k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1835
24.4k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1836
24.4k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1837
1838
220k
                    for(i = 0; i < 8; i++)
1839
195k
                    {
1840
195k
                        au1_avail_chroma[i] = 255;
1841
195k
                        au1_tile_slice_boundary[i] = 0;
1842
195k
                        au4_idx_t[i] = 0;
1843
195k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1844
195k
                    }
1845
1846
24.4k
                    {
1847
24.4k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1848
12.2k
                        {
1849
12.2k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1850
12.2k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1851
1852
12.2k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1853
12.2k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1854
1855
12.2k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1856
12.2k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1857
1858
12.2k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1859
12.2k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1860
1861
12.2k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1862
12.2k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1863
1864
12.2k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1865
2.48k
                            {
1866
2.48k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1867
273
                                {
1868
273
                                    au4_idx_t[0] = -1;
1869
273
                                    au4_idx_t[6] = -1;
1870
273
                                    au4_idx_t[4] = -1;
1871
273
                                }
1872
2.20k
                                else
1873
2.20k
                                {
1874
2.20k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1875
2.20k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1876
2.20k
                                }
1877
2.48k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1878
2.48k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1879
2.48k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1880
2.48k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881
1882
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1883
1884
2.48k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1885
273
                                {
1886
273
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1887
273
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1888
273
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1889
273
                                }
1890
2.20k
                                else
1891
2.20k
                                {
1892
2.20k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1893
2.20k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1894
2.20k
                                }
1895
1896
2.48k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1897
2.48k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898
2.48k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1899
2.48k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1900
2.48k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
1902
2.48k
                                if(idx_t > au4_idx_t[6])
1903
273
                                {
1904
273
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1905
273
                                }
1906
1907
                                /*
1908
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1909
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1910
                                 */
1911
22.3k
                                for(i = 0; i < 8; i++)
1912
19.8k
                                {
1913
                                    /*Sets the edges that lie on the slice/tile boundary*/
1914
19.8k
                                    if(au4_idx_t[i] != idx_t)
1915
5.52k
                                    {
1916
5.52k
                                        au1_tile_slice_boundary[i] = 1;
1917
5.52k
                                    }
1918
14.3k
                                    else
1919
14.3k
                                    {
1920
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1921
14.3k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1922
14.3k
                                    }
1923
19.8k
                                }
1924
                                /*Reset indices*/
1925
22.3k
                                for(i = 0; i < 8; i++)
1926
19.8k
                                {
1927
19.8k
                                    au4_idx_t[i] = 0;
1928
19.8k
                                }
1929
2.48k
                            }
1930
12.2k
                            if(ps_pps->i1_tiles_enabled_flag)
1931
9.80k
                            {
1932
                                /* Calculate availability flags at slice boundary */
1933
9.80k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1934
3.74k
                                {
1935
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1936
3.74k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1937
3.22k
                                    {
1938
                                        /*Calculate neighbor ctb slice indices*/
1939
3.22k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1940
489
                                        {
1941
489
                                            au4_idx_t[0] = -1;
1942
489
                                            au4_idx_t[6] = -1;
1943
489
                                            au4_idx_t[4] = -1;
1944
489
                                        }
1945
2.73k
                                        else
1946
2.73k
                                        {
1947
2.73k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1948
2.73k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1949
2.73k
                                        }
1950
3.22k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1951
3.22k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1952
3.22k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1953
3.22k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954
1955
29.0k
                                        for(i = 0; i < 8; i++)
1956
25.7k
                                        {
1957
                                            /*Sets the edges that lie on the tile boundary*/
1958
25.7k
                                            if(au4_idx_t[i] != idx_t)
1959
10.7k
                                            {
1960
10.7k
                                                au1_tile_slice_boundary[i] |= 1;
1961
10.7k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1962
10.7k
                                            }
1963
25.7k
                                        }
1964
3.22k
                                    }
1965
3.74k
                                }
1966
9.80k
                            }
1967
109k
                            for(i = 0; i < 8; i++)
1968
97.6k
                            {
1969
                                /*Sets the edges that lie on the slice/tile boundary*/
1970
97.6k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1971
11.2k
                                {
1972
11.2k
                                    au1_avail_chroma[i] = 0;
1973
11.2k
                                }
1974
97.6k
                            }
1975
1976
12.2k
                        }
1977
24.4k
                    }
1978
24.4k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1979
3.25k
                    {
1980
3.25k
                        au1_avail_chroma[0] = 0;
1981
3.25k
                        au1_avail_chroma[4] = 0;
1982
3.25k
                        au1_avail_chroma[6] = 0;
1983
3.25k
                    }
1984
1985
24.4k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1986
2.99k
                    {
1987
2.99k
                        au1_avail_chroma[1] = 0;
1988
2.99k
                        au1_avail_chroma[5] = 0;
1989
2.99k
                        au1_avail_chroma[7] = 0;
1990
2.99k
                    }
1991
1992
24.4k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1993
51
                    {
1994
51
                        au1_avail_chroma[2] = 0;
1995
51
                        au1_avail_chroma[4] = 0;
1996
51
                        au1_avail_chroma[5] = 0;
1997
51
                    }
1998
1999
24.4k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2000
0
                    {
2001
0
                        au1_avail_chroma[3] = 0;
2002
0
                        au1_avail_chroma[6] = 0;
2003
0
                        au1_avail_chroma[7] = 0;
2004
0
                    }
2005
2006
24.4k
                    {
2007
24.4k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2008
24.4k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2009
24.4k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2010
24.4k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2011
2012
24.4k
                        if(chroma_yuv420sp_vu)
2013
6.46k
                        {
2014
6.46k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2015
6.46k
                                                                                 src_strd,
2016
6.46k
                                                                                 pu1_src_left_chroma,
2017
6.46k
                                                                                 pu1_src_top_chroma,
2018
6.46k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2019
6.46k
                                                                                 au1_src_top_right,
2020
6.46k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2021
6.46k
                                                                                 au1_avail_chroma,
2022
6.46k
                                                                                 ai1_offset_cr,
2023
6.46k
                                                                                 ai1_offset_cb,
2024
6.46k
                                                                                 sao_wd_chroma,
2025
6.46k
                                                                                 sao_ht_chroma);
2026
6.46k
                        }
2027
17.9k
                        else
2028
17.9k
                        {
2029
17.9k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2030
17.9k
                                                                                 src_strd,
2031
17.9k
                                                                                 pu1_src_left_chroma,
2032
17.9k
                                                                                 pu1_src_top_chroma,
2033
17.9k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2034
17.9k
                                                                                 au1_src_top_right,
2035
17.9k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2036
17.9k
                                                                                 au1_avail_chroma,
2037
17.9k
                                                                                 ai1_offset_cb,
2038
17.9k
                                                                                 ai1_offset_cr,
2039
17.9k
                                                                                 sao_wd_chroma,
2040
17.9k
                                                                                 sao_ht_chroma);
2041
17.9k
                        }
2042
24.4k
                    }
2043
2044
24.4k
                }
2045
284k
            }
2046
3.30M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2047
458k
            {
2048
4.12M
                for(row = 0; row < sao_ht_chroma; row++)
2049
3.66M
                {
2050
3.66M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2051
3.66M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2052
3.66M
                }
2053
458k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2054
458k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2055
2056
458k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2057
458k
            }
2058
3.58M
        }
2059
2060
4.06M
        pu1_src_luma += sao_ht_luma * src_strd;
2061
4.06M
        pu1_src_chroma += sao_ht_chroma * src_strd;
2062
4.06M
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2063
4.06M
    }
2064
2065
    /* Left CTB */
2066
4.48M
    if(ps_sao_ctxt->i4_ctb_x > 0)
2067
4.31M
    {
2068
4.31M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2069
4.31M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2070
4.31M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2071
4.31M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2072
2073
4.31M
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2074
4.31M
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2075
4.31M
        WORD32 au4_idx_l[8], idx_l;
2076
2077
4.31M
        WORD32 remaining_rows;
2078
4.31M
        slice_header_t *ps_slice_hdr_left;
2079
4.31M
        {
2080
4.31M
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2081
4.31M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
2082
4.31M
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2083
4.31M
        }
2084
2085
4.31M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2086
4.31M
        if(remaining_rows <= SAO_SHIFT_CTB)
2087
399k
        {
2088
399k
            sao_ht_luma += remaining_rows;
2089
399k
        }
2090
4.31M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2091
4.31M
        if(remaining_rows <= SAO_SHIFT_CTB)
2092
399k
        {
2093
399k
            sao_ht_chroma += remaining_rows;
2094
399k
        }
2095
2096
4.31M
        pu1_src_luma -= sao_wd_luma;
2097
4.31M
        pu1_src_chroma -= sao_wd_chroma;
2098
4.31M
        ps_sao -= 1;
2099
4.31M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2100
4.31M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2101
4.31M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2102
4.31M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
2104
2105
4.31M
        if(0 != sao_ht_luma)
2106
4.31M
        {
2107
4.31M
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2108
457k
            {
2109
457k
                if(0 == ps_sao->b3_y_type_idx)
2110
384k
                {
2111
                    /* Update left, top and top-left */
2112
13.5M
                    for(row = 0; row < sao_ht_luma; row++)
2113
13.1M
                    {
2114
13.1M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2115
13.1M
                    }
2116
                    /*Update in next location*/
2117
384k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2118
2119
384k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2120
2121
384k
                }
2122
2123
73.7k
                else if(1 == ps_sao->b3_y_type_idx)
2124
40.9k
                {
2125
40.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2126
40.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2127
40.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2128
40.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2129
2130
40.9k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2131
40.9k
                                                                              src_strd,
2132
40.9k
                                                                              pu1_src_left_luma,
2133
40.9k
                                                                              pu1_src_top_luma,
2134
40.9k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2135
40.9k
                                                                              ps_sao->b5_y_band_pos,
2136
40.9k
                                                                              ai1_offset_y,
2137
40.9k
                                                                              sao_wd_luma,
2138
40.9k
                                                                              sao_ht_luma
2139
40.9k
                                                                             );
2140
40.9k
                }
2141
2142
32.8k
                else // if(2 <= ps_sao->b3_y_type_idx)
2143
32.8k
                {
2144
32.8k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2145
32.8k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2146
32.8k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2147
32.8k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2148
2149
295k
                    for(i = 0; i < 8; i++)
2150
262k
                    {
2151
262k
                        au1_avail_luma[i] = 255;
2152
262k
                        au1_tile_slice_boundary[i] = 0;
2153
262k
                        au4_idx_l[i] = 0;
2154
262k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2155
262k
                    }
2156
                    /******************************************************************
2157
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2158
                     *
2159
                     *
2160
                     *          ____________
2161
                     *         |    |       |
2162
                     *         | L_T|       |
2163
                     *         |____|_______|____
2164
                     *         |    |       |    |
2165
                     *     L_L |  L |  L_R  |    |
2166
                     *         |____|_______|    |
2167
                     *              |            |
2168
                     *          L_D |            |
2169
                     *              |____________|
2170
                     *
2171
                     *****************************************************************/
2172
2173
                    /*In case of slices or tiles*/
2174
32.8k
                    {
2175
32.8k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2176
16.9k
                        {
2177
16.9k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2178
16.9k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2179
2180
16.9k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2181
16.9k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2182
2183
16.9k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2184
16.9k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2185
2186
16.9k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2187
16.9k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2188
2189
16.9k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2190
16.9k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2191
2192
16.9k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2193
6.54k
                            {
2194
6.54k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2195
409
                                {
2196
409
                                    au4_idx_l[2] = -1;
2197
409
                                    au4_idx_l[4] = -1;
2198
409
                                    au4_idx_l[5] = -1;
2199
409
                                }
2200
6.14k
                                else
2201
6.14k
                                {
2202
6.14k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2203
6.14k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2204
6.14k
                                }
2205
6.54k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2206
6.54k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2207
6.54k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2208
6.54k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2209
2210
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2211
6.54k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2212
409
                                {
2213
409
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2214
409
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2215
409
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2216
409
                                }
2217
6.14k
                                else
2218
6.14k
                                {
2219
6.14k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2220
6.14k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2221
2222
6.14k
                                }
2223
                                //TODO: ILF flag checks for [0] and [6] is missing.
2224
6.54k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2225
6.54k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2226
6.54k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
2228
6.54k
                                if(idx_l < au4_idx_l[5])
2229
68
                                {
2230
68
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2231
68
                                }
2232
2233
                                /*
2234
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2235
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2236
                                 */
2237
58.9k
                                for(i = 0; i < 8; i++)
2238
52.3k
                                {
2239
                                    /*Sets the edges that lie on the slice/tile boundary*/
2240
52.3k
                                    if(au4_idx_l[i] != idx_l)
2241
14.3k
                                    {
2242
14.3k
                                        au1_tile_slice_boundary[i] = 1;
2243
14.3k
                                    }
2244
38.0k
                                    else
2245
38.0k
                                    {
2246
38.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2247
38.0k
                                    }
2248
52.3k
                                }
2249
                                /*Reset indices*/
2250
58.9k
                                for(i = 0; i < 8; i++)
2251
52.3k
                                {
2252
52.3k
                                    au4_idx_l[i] = 0;
2253
52.3k
                                }
2254
6.54k
                            }
2255
2256
16.9k
                            if(ps_pps->i1_tiles_enabled_flag)
2257
10.5k
                            {
2258
                                /* Calculate availability flags at slice boundary */
2259
10.5k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2260
4.71k
                                {
2261
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2262
4.71k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2263
3.99k
                                    {
2264
3.99k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2265
1.12k
                                        {
2266
1.12k
                                            au4_idx_l[2] = -1;
2267
1.12k
                                            au4_idx_l[4] = -1;
2268
1.12k
                                            au4_idx_l[5] = -1;
2269
1.12k
                                        }
2270
2.86k
                                        else
2271
2.86k
                                        {
2272
2.86k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2273
2.86k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2274
2.86k
                                        }
2275
2276
3.99k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2277
3.99k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2278
3.99k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2279
3.99k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2280
2281
35.9k
                                        for(i = 0; i < 8; i++)
2282
31.9k
                                        {
2283
                                            /*Sets the edges that lie on the slice/tile boundary*/
2284
31.9k
                                            if(au4_idx_l[i] != idx_l)
2285
13.2k
                                            {
2286
13.2k
                                                au1_tile_slice_boundary[i] |= 1;
2287
13.2k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2288
13.2k
                                            }
2289
31.9k
                                        }
2290
3.99k
                                    }
2291
4.71k
                                }
2292
10.5k
                            }
2293
2294
152k
                            for(i = 0; i < 8; i++)
2295
135k
                            {
2296
                                /*Sets the edges that lie on the slice/tile boundary*/
2297
135k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2298
14.5k
                                {
2299
14.5k
                                    au1_avail_luma[i] = 0;
2300
14.5k
                                }
2301
135k
                            }
2302
16.9k
                        }
2303
32.8k
                    }
2304
32.8k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2305
0
                    {
2306
0
                        au1_avail_luma[0] = 0;
2307
0
                        au1_avail_luma[4] = 0;
2308
0
                        au1_avail_luma[6] = 0;
2309
0
                    }
2310
32.8k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2311
0
                    {
2312
0
                        au1_avail_luma[1] = 0;
2313
0
                        au1_avail_luma[5] = 0;
2314
0
                        au1_avail_luma[7] = 0;
2315
0
                    }
2316
2317
32.8k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2318
7.27k
                    {
2319
7.27k
                        au1_avail_luma[2] = 0;
2320
7.27k
                        au1_avail_luma[4] = 0;
2321
7.27k
                        au1_avail_luma[5] = 0;
2322
7.27k
                    }
2323
2324
32.8k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2325
3.59k
                    {
2326
3.59k
                        au1_avail_luma[3] = 0;
2327
3.59k
                        au1_avail_luma[6] = 0;
2328
3.59k
                        au1_avail_luma[7] = 0;
2329
3.59k
                    }
2330
2331
32.8k
                    {
2332
32.8k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2333
32.8k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2334
32.8k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2335
32.8k
                                                                          src_strd,
2336
32.8k
                                                                          pu1_src_left_luma,
2337
32.8k
                                                                          pu1_src_top_luma,
2338
32.8k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2339
32.8k
                                                                          au1_src_top_right,
2340
32.8k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2341
32.8k
                                                                          au1_avail_luma,
2342
32.8k
                                                                          ai1_offset_y,
2343
32.8k
                                                                          sao_wd_luma,
2344
32.8k
                                                                          sao_ht_luma);
2345
32.8k
                    }
2346
2347
32.8k
                }
2348
457k
            }
2349
3.85M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2350
789k
            {
2351
                /* Update left, top and top-left */
2352
16.8M
                for(row = 0; row < sao_ht_luma; row++)
2353
16.0M
                {
2354
16.0M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2355
16.0M
                }
2356
                /*Update in next location*/
2357
789k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2358
2359
789k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2360
789k
            }
2361
4.31M
        }
2362
2363
4.31M
        if(0 != sao_ht_chroma)
2364
3.82M
        {
2365
3.82M
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2366
293k
            {
2367
293k
                if(0 == ps_sao->b3_cb_type_idx)
2368
248k
                {
2369
4.99M
                    for(row = 0; row < sao_ht_chroma; row++)
2370
4.74M
                    {
2371
4.74M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2372
4.74M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2373
4.74M
                    }
2374
248k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2375
248k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2376
2377
248k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2378
248k
                }
2379
2380
44.8k
                else if(1 == ps_sao->b3_cb_type_idx)
2381
20.5k
                {
2382
20.5k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2383
20.5k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2384
20.5k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2385
20.5k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2386
2387
20.5k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2388
20.5k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2389
20.5k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2390
20.5k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2391
2392
20.5k
                    if(chroma_yuv420sp_vu)
2393
5.79k
                    {
2394
5.79k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2395
5.79k
                                                                                    src_strd,
2396
5.79k
                                                                                    pu1_src_left_chroma,
2397
5.79k
                                                                                    pu1_src_top_chroma,
2398
5.79k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2399
5.79k
                                                                                    ps_sao->b5_cr_band_pos,
2400
5.79k
                                                                                    ps_sao->b5_cb_band_pos,
2401
5.79k
                                                                                    ai1_offset_cr,
2402
5.79k
                                                                                    ai1_offset_cb,
2403
5.79k
                                                                                    sao_wd_chroma,
2404
5.79k
                                                                                    sao_ht_chroma
2405
5.79k
                                                                                   );
2406
5.79k
                    }
2407
14.7k
                    else
2408
14.7k
                    {
2409
14.7k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2410
14.7k
                                                                                    src_strd,
2411
14.7k
                                                                                    pu1_src_left_chroma,
2412
14.7k
                                                                                    pu1_src_top_chroma,
2413
14.7k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2414
14.7k
                                                                                    ps_sao->b5_cb_band_pos,
2415
14.7k
                                                                                    ps_sao->b5_cr_band_pos,
2416
14.7k
                                                                                    ai1_offset_cb,
2417
14.7k
                                                                                    ai1_offset_cr,
2418
14.7k
                                                                                    sao_wd_chroma,
2419
14.7k
                                                                                    sao_ht_chroma
2420
14.7k
                                                                                   );
2421
14.7k
                    }
2422
20.5k
                }
2423
2424
24.3k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2425
24.3k
                {
2426
24.3k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2427
24.3k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2428
24.3k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2429
24.3k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2430
2431
24.3k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2432
24.3k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2433
24.3k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2434
24.3k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2435
2436
219k
                    for(i = 0; i < 8; i++)
2437
195k
                    {
2438
195k
                        au1_avail_chroma[i] = 255;
2439
195k
                        au1_tile_slice_boundary[i] = 0;
2440
195k
                        au4_idx_l[i] = 0;
2441
195k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2442
195k
                    }
2443
                    /*In case of slices*/
2444
24.3k
                    {
2445
24.3k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2446
11.8k
                        {
2447
11.8k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2448
11.8k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2449
2450
11.8k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2451
11.8k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2452
2453
11.8k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2454
11.8k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2455
2456
11.8k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2457
11.8k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2458
2459
11.8k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2460
11.8k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2461
2462
11.8k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2463
2.11k
                            {
2464
2.11k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2465
126
                                {
2466
126
                                    au4_idx_l[2] = -1;
2467
126
                                    au4_idx_l[4] = -1;
2468
126
                                    au4_idx_l[5] = -1;
2469
126
                                }
2470
1.98k
                                else
2471
1.98k
                                {
2472
1.98k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2473
1.98k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2474
1.98k
                                }
2475
2.11k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2476
2.11k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2477
2.11k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2478
2.11k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2479
2480
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2481
2.11k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2482
126
                                {
2483
126
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2484
126
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2485
126
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2486
126
                                }
2487
1.98k
                                else
2488
1.98k
                                {
2489
1.98k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2490
1.98k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2491
1.98k
                                }
2492
2493
2.11k
                                if(au4_idx_l[5] > idx_l)
2494
68
                                {
2495
68
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2496
68
                                }
2497
2498
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2499
2.11k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2500
2.11k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2501
2.11k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2502
                                /*
2503
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2504
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2505
                                 */
2506
19.0k
                                for(i = 0; i < 8; i++)
2507
16.9k
                                {
2508
                                    /*Sets the edges that lie on the slice/tile boundary*/
2509
16.9k
                                    if(au4_idx_l[i] != idx_l)
2510
5.13k
                                    {
2511
5.13k
                                        au1_tile_slice_boundary[i] = 1;
2512
5.13k
                                    }
2513
11.7k
                                    else
2514
11.7k
                                    {
2515
11.7k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2516
11.7k
                                    }
2517
16.9k
                                }
2518
                                /*Reset indices*/
2519
19.0k
                                for(i = 0; i < 8; i++)
2520
16.9k
                                {
2521
16.9k
                                    au4_idx_l[i] = 0;
2522
16.9k
                                }
2523
2.11k
                            }
2524
11.8k
                            if(ps_pps->i1_tiles_enabled_flag)
2525
9.85k
                            {
2526
                                /* Calculate availability flags at slice boundary */
2527
9.85k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2528
4.77k
                                {
2529
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2530
4.77k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2531
4.13k
                                    {
2532
4.13k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2533
1.49k
                                        {
2534
1.49k
                                            au4_idx_l[2] = -1;
2535
1.49k
                                            au4_idx_l[4] = -1;
2536
1.49k
                                            au4_idx_l[5] = -1;
2537
1.49k
                                        }
2538
2.63k
                                        else
2539
2.63k
                                        {
2540
2.63k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2541
2.63k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2542
2.63k
                                        }
2543
2544
4.13k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2545
4.13k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2546
4.13k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2547
4.13k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2548
2549
37.2k
                                        for(i = 0; i < 8; i++)
2550
33.0k
                                        {
2551
                                            /*Sets the edges that lie on the slice/tile boundary*/
2552
33.0k
                                            if(au4_idx_l[i] != idx_l)
2553
13.1k
                                            {
2554
13.1k
                                                au1_tile_slice_boundary[i] |= 1;
2555
13.1k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2556
13.1k
                                            }
2557
33.0k
                                        }
2558
4.13k
                                    }
2559
4.77k
                                }
2560
9.85k
                            }
2561
106k
                            for(i = 0; i < 8; i++)
2562
94.6k
                            {
2563
                                /*Sets the edges that lie on the slice/tile boundary*/
2564
94.6k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2565
13.5k
                                {
2566
13.5k
                                    au1_avail_chroma[i] = 0;
2567
13.5k
                                }
2568
94.6k
                            }
2569
11.8k
                        }
2570
24.3k
                    }
2571
24.3k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2572
47
                    {
2573
47
                        au1_avail_chroma[0] = 0;
2574
47
                        au1_avail_chroma[4] = 0;
2575
47
                        au1_avail_chroma[6] = 0;
2576
47
                    }
2577
2578
24.3k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2579
0
                    {
2580
0
                        au1_avail_chroma[1] = 0;
2581
0
                        au1_avail_chroma[5] = 0;
2582
0
                        au1_avail_chroma[7] = 0;
2583
0
                    }
2584
2585
24.3k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2586
5.43k
                    {
2587
5.43k
                        au1_avail_chroma[2] = 0;
2588
5.43k
                        au1_avail_chroma[4] = 0;
2589
5.43k
                        au1_avail_chroma[5] = 0;
2590
5.43k
                    }
2591
2592
24.3k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2593
2.89k
                    {
2594
2.89k
                        au1_avail_chroma[3] = 0;
2595
2.89k
                        au1_avail_chroma[6] = 0;
2596
2.89k
                        au1_avail_chroma[7] = 0;
2597
2.89k
                    }
2598
2599
24.3k
                    {
2600
24.3k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2601
24.3k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2602
24.3k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2603
24.3k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2604
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2605
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2606
24.3k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2607
146
                        {
2608
146
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2609
146
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2610
146
                        }
2611
2612
2613
24.3k
                        if(chroma_yuv420sp_vu)
2614
6.46k
                        {
2615
6.46k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2616
6.46k
                                                                                 src_strd,
2617
6.46k
                                                                                 pu1_src_left_chroma,
2618
6.46k
                                                                                 pu1_src_top_chroma,
2619
6.46k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2620
6.46k
                                                                                 au1_src_top_right,
2621
6.46k
                                                                                 au1_src_bot_left,
2622
6.46k
                                                                                 au1_avail_chroma,
2623
6.46k
                                                                                 ai1_offset_cr,
2624
6.46k
                                                                                 ai1_offset_cb,
2625
6.46k
                                                                                 sao_wd_chroma,
2626
6.46k
                                                                                 sao_ht_chroma);
2627
6.46k
                        }
2628
17.9k
                        else
2629
17.9k
                        {
2630
17.9k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2631
17.9k
                                                                                 src_strd,
2632
17.9k
                                                                                 pu1_src_left_chroma,
2633
17.9k
                                                                                 pu1_src_top_chroma,
2634
17.9k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2635
17.9k
                                                                                 au1_src_top_right,
2636
17.9k
                                                                                 au1_src_bot_left,
2637
17.9k
                                                                                 au1_avail_chroma,
2638
17.9k
                                                                                 ai1_offset_cb,
2639
17.9k
                                                                                 ai1_offset_cr,
2640
17.9k
                                                                                 sao_wd_chroma,
2641
17.9k
                                                                                 sao_ht_chroma);
2642
17.9k
                        }
2643
24.3k
                    }
2644
2645
24.3k
                }
2646
293k
            }
2647
3.53M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2648
467k
            {
2649
6.25M
                for(row = 0; row < sao_ht_chroma; row++)
2650
5.78M
                {
2651
5.78M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2652
5.78M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2653
5.78M
                }
2654
467k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2655
467k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2656
2657
467k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2658
467k
            }
2659
2660
3.82M
        }
2661
4.31M
        pu1_src_luma += sao_wd_luma;
2662
4.31M
        pu1_src_chroma += sao_wd_chroma;
2663
4.31M
        ps_sao += 1;
2664
4.31M
    }
2665
2666
2667
    /* Current CTB */
2668
4.48M
    {
2669
4.48M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2670
4.48M
        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2671
4.48M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2672
4.48M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2673
4.48M
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2674
4.48M
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2675
4.48M
        WORD32 au4_idx_c[8], idx_c;
2676
2677
4.48M
        WORD32 remaining_rows;
2678
4.48M
        WORD32 remaining_cols;
2679
2680
4.48M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2681
4.48M
        if(remaining_cols <= SAO_SHIFT_CTB)
2682
170k
        {
2683
170k
            sao_wd_luma += remaining_cols;
2684
170k
        }
2685
4.48M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2686
4.48M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2687
170k
        {
2688
170k
            sao_wd_chroma += remaining_cols;
2689
170k
        }
2690
2691
4.48M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2692
4.48M
        if(remaining_rows <= SAO_SHIFT_CTB)
2693
413k
        {
2694
413k
            sao_ht_luma += remaining_rows;
2695
413k
        }
2696
4.48M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2697
4.48M
        if(remaining_rows <= SAO_SHIFT_CTB)
2698
413k
        {
2699
413k
            sao_ht_chroma += remaining_rows;
2700
413k
        }
2701
2702
4.48M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2703
4.48M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704
4.48M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2705
4.48M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
2707
4.48M
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2708
4.48M
        {
2709
4.48M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2710
495k
            {
2711
495k
                if(0 == ps_sao->b3_y_type_idx)
2712
413k
                {
2713
                    /* Update left, top and top-left */
2714
14.6M
                    for(row = 0; row < sao_ht_luma; row++)
2715
14.2M
                    {
2716
14.2M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2717
14.2M
                    }
2718
413k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2719
2720
413k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2721
2722
413k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2723
2724
413k
                }
2725
2726
81.8k
                else if(1 == ps_sao->b3_y_type_idx)
2727
45.4k
                {
2728
45.4k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2729
45.4k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2730
45.4k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2731
45.4k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2732
2733
45.4k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2734
45.4k
                                                                              src_strd,
2735
45.4k
                                                                              pu1_src_left_luma,
2736
45.4k
                                                                              pu1_src_top_luma,
2737
45.4k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2738
45.4k
                                                                              ps_sao->b5_y_band_pos,
2739
45.4k
                                                                              ai1_offset_y,
2740
45.4k
                                                                              sao_wd_luma,
2741
45.4k
                                                                              sao_ht_luma
2742
45.4k
                                                                             );
2743
45.4k
                }
2744
2745
36.4k
                else // if(2 <= ps_sao->b3_y_type_idx)
2746
36.4k
                {
2747
36.4k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2748
36.4k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2749
36.4k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2750
36.4k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2751
2752
327k
                    for(i = 0; i < 8; i++)
2753
291k
                    {
2754
291k
                        au1_avail_luma[i] = 255;
2755
291k
                        au1_tile_slice_boundary[i] = 0;
2756
291k
                        au4_idx_c[i] = 0;
2757
291k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2758
291k
                    }
2759
                    /******************************************************************
2760
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2761
                     *
2762
                     *
2763
                     *          ____________
2764
                     *         |    |       |
2765
                     *         |    | C_T   |
2766
                     *         |____|_______|____
2767
                     *         |    |       |    |
2768
                     *         | C_L|   C   | C_R|
2769
                     *         |____|_______|    |
2770
                     *              |  C_D       |
2771
                     *              |            |
2772
                     *              |____________|
2773
                     *
2774
                     *****************************************************************/
2775
2776
                    /*In case of slices*/
2777
36.4k
                    {
2778
36.4k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2779
18.1k
                        {
2780
18.1k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2781
18.1k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2782
2783
18.1k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2784
18.1k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2785
2786
18.1k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2787
18.1k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2788
2789
18.1k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2790
18.1k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2791
2792
18.1k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2793
18.1k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2794
2795
18.1k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2796
6.96k
                            {
2797
6.96k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2798
844
                                {
2799
844
                                    au4_idx_c[6] = -1;
2800
844
                                    au4_idx_c[0] = -1;
2801
844
                                    au4_idx_c[4] = -1;
2802
844
                                }
2803
6.12k
                                else
2804
6.12k
                                {
2805
6.12k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2806
6.12k
                                }
2807
2808
6.96k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2809
347
                                {
2810
347
                                    au4_idx_c[2] = -1;
2811
347
                                    au4_idx_c[5] = -1;
2812
347
                                    au4_idx_c[4] = -1;
2813
347
                                }
2814
6.62k
                                else
2815
6.62k
                                {
2816
6.62k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2817
6.62k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2818
6.62k
                                }
2819
6.96k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2820
6.96k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2821
6.96k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2822
2823
6.96k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2824
844
                                {
2825
844
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2826
844
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2827
844
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2828
844
                                }
2829
6.12k
                                else
2830
6.12k
                                {
2831
6.12k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2832
6.12k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2833
6.12k
                                }
2834
6.96k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2835
347
                                {
2836
347
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2837
347
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2838
347
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2839
347
                                }
2840
6.62k
                                else
2841
6.62k
                                {
2842
6.62k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2843
6.62k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2844
6.62k
                                }
2845
6.96k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2846
6.96k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2847
6.96k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
2849
6.96k
                                if(au4_idx_c[6] < idx_c)
2850
1.36k
                                {
2851
1.36k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2852
1.36k
                                }
2853
2854
                                /*
2855
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2856
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2857
                                 */
2858
62.7k
                                for(i = 0; i < 8; i++)
2859
55.7k
                                {
2860
                                    /*Sets the edges that lie on the slice/tile boundary*/
2861
55.7k
                                    if(au4_idx_c[i] != idx_c)
2862
16.0k
                                    {
2863
16.0k
                                        au1_tile_slice_boundary[i] = 1;
2864
16.0k
                                    }
2865
39.6k
                                    else
2866
39.6k
                                    {
2867
39.6k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2868
39.6k
                                    }
2869
55.7k
                                }
2870
                                /*Reset indices*/
2871
62.7k
                                for(i = 0; i < 8; i++)
2872
55.7k
                                {
2873
55.7k
                                    au4_idx_c[i] = 0;
2874
55.7k
                                }
2875
6.96k
                            }
2876
2877
18.1k
                            if(ps_pps->i1_tiles_enabled_flag)
2878
11.3k
                            {
2879
                                /* Calculate availability flags at slice boundary */
2880
11.3k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2881
5.05k
                                {
2882
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2883
5.05k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2884
4.36k
                                    {
2885
4.36k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2886
468
                                        {
2887
468
                                            au4_idx_c[6] = -1;
2888
468
                                            au4_idx_c[0] = -1;
2889
468
                                            au4_idx_c[4] = -1;
2890
468
                                        }
2891
3.89k
                                        else
2892
3.89k
                                        {
2893
3.89k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2894
3.89k
                                        }
2895
2896
4.36k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2897
1.15k
                                        {
2898
1.15k
                                            au4_idx_c[2] = -1;
2899
1.15k
                                            au4_idx_c[5] = -1;
2900
1.15k
                                            au4_idx_c[4] = -1;
2901
1.15k
                                        }
2902
3.21k
                                        else
2903
3.21k
                                        {
2904
3.21k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2905
3.21k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2906
3.21k
                                        }
2907
4.36k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2908
4.36k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2909
4.36k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2910
2911
39.2k
                                        for(i = 0; i < 8; i++)
2912
34.9k
                                        {
2913
                                            /*Sets the edges that lie on the slice/tile boundary*/
2914
34.9k
                                            if(au4_idx_c[i] != idx_c)
2915
14.9k
                                            {
2916
14.9k
                                                au1_tile_slice_boundary[i] |= 1;
2917
14.9k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2918
14.9k
                                            }
2919
34.9k
                                        }
2920
4.36k
                                    }
2921
5.05k
                                }
2922
11.3k
                            }
2923
2924
163k
                            for(i = 0; i < 8; i++)
2925
145k
                            {
2926
                                /*Sets the edges that lie on the slice/tile boundary*/
2927
145k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2928
17.1k
                                {
2929
17.1k
                                    au1_avail_luma[i] = 0;
2930
17.1k
                                }
2931
145k
                            }
2932
2933
18.1k
                        }
2934
36.4k
                    }
2935
36.4k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2936
4.65k
                    {
2937
4.65k
                        au1_avail_luma[0] = 0;
2938
4.65k
                        au1_avail_luma[4] = 0;
2939
4.65k
                        au1_avail_luma[6] = 0;
2940
4.65k
                    }
2941
2942
36.4k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2943
3.61k
                    {
2944
3.61k
                        au1_avail_luma[1] = 0;
2945
3.61k
                        au1_avail_luma[5] = 0;
2946
3.61k
                        au1_avail_luma[7] = 0;
2947
3.61k
                    }
2948
2949
36.4k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2950
8.23k
                    {
2951
8.23k
                        au1_avail_luma[2] = 0;
2952
8.23k
                        au1_avail_luma[4] = 0;
2953
8.23k
                        au1_avail_luma[5] = 0;
2954
8.23k
                    }
2955
2956
36.4k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2957
3.94k
                    {
2958
3.94k
                        au1_avail_luma[3] = 0;
2959
3.94k
                        au1_avail_luma[6] = 0;
2960
3.94k
                        au1_avail_luma[7] = 0;
2961
3.94k
                    }
2962
2963
36.4k
                    {
2964
36.4k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2965
36.4k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2966
2967
36.4k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2968
36.4k
                                                                          src_strd,
2969
36.4k
                                                                          pu1_src_left_luma,
2970
36.4k
                                                                          pu1_src_top_luma,
2971
36.4k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2972
36.4k
                                                                          au1_src_top_right,
2973
36.4k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2974
36.4k
                                                                          au1_avail_luma,
2975
36.4k
                                                                          ai1_offset_y,
2976
36.4k
                                                                          sao_wd_luma,
2977
36.4k
                                                                          sao_ht_luma);
2978
36.4k
                    }
2979
36.4k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2980
36.4k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2981
36.4k
                }
2982
495k
            }
2983
3.98M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2984
813k
            {
2985
                /* Update left, top and top-left */
2986
17.6M
                for(row = 0; row < sao_ht_luma; row++)
2987
16.8M
                {
2988
16.8M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2989
16.8M
                }
2990
813k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2991
2992
813k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2993
2994
813k
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2995
813k
            }
2996
4.48M
        }
2997
2998
4.48M
        if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2999
3.97M
        {
3000
3.97M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3001
317k
            {
3002
317k
                if(0 == ps_sao->b3_cb_type_idx)
3003
268k
                {
3004
5.33M
                    for(row = 0; row < sao_ht_chroma; row++)
3005
5.06M
                    {
3006
5.06M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3007
5.06M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3008
5.06M
                    }
3009
268k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3010
268k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3011
3012
268k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3013
3014
268k
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3015
268k
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3016
268k
                }
3017
3018
49.4k
                else if(1 == ps_sao->b3_cb_type_idx)
3019
22.2k
                {
3020
22.2k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3021
22.2k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3022
22.2k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3023
22.2k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3024
3025
22.2k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3026
22.2k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3027
22.2k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3028
22.2k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3029
3030
22.2k
                    if(chroma_yuv420sp_vu)
3031
6.20k
                    {
3032
6.20k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3033
6.20k
                                                                                    src_strd,
3034
6.20k
                                                                                    pu1_src_left_chroma,
3035
6.20k
                                                                                    pu1_src_top_chroma,
3036
6.20k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3037
6.20k
                                                                                    ps_sao->b5_cr_band_pos,
3038
6.20k
                                                                                    ps_sao->b5_cb_band_pos,
3039
6.20k
                                                                                    ai1_offset_cr,
3040
6.20k
                                                                                    ai1_offset_cb,
3041
6.20k
                                                                                    sao_wd_chroma,
3042
6.20k
                                                                                    sao_ht_chroma
3043
6.20k
                                                                                   );
3044
6.20k
                    }
3045
16.0k
                    else
3046
16.0k
                    {
3047
16.0k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3048
16.0k
                                                                                    src_strd,
3049
16.0k
                                                                                    pu1_src_left_chroma,
3050
16.0k
                                                                                    pu1_src_top_chroma,
3051
16.0k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3052
16.0k
                                                                                    ps_sao->b5_cb_band_pos,
3053
16.0k
                                                                                    ps_sao->b5_cr_band_pos,
3054
16.0k
                                                                                    ai1_offset_cb,
3055
16.0k
                                                                                    ai1_offset_cr,
3056
16.0k
                                                                                    sao_wd_chroma,
3057
16.0k
                                                                                    sao_ht_chroma
3058
16.0k
                                                                                   );
3059
16.0k
                    }
3060
22.2k
                }
3061
3062
27.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
3063
27.1k
                {
3064
27.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3065
27.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3066
27.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3067
27.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3068
3069
27.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3070
27.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3071
27.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3072
27.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3073
3074
244k
                    for(i = 0; i < 8; i++)
3075
217k
                    {
3076
217k
                        au1_avail_chroma[i] = 255;
3077
217k
                        au1_tile_slice_boundary[i] = 0;
3078
217k
                        au4_idx_c[i] = 0;
3079
217k
                        au4_ilf_across_tile_slice_enable[i] = 1;
3080
217k
                    }
3081
27.1k
                    {
3082
27.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3083
12.6k
                        {
3084
12.6k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3085
12.6k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3086
3087
12.6k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3088
12.6k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3089
3090
12.6k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3091
12.6k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3092
3093
12.6k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3094
12.6k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3095
3096
12.6k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
3097
12.6k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
3098
3099
12.6k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3100
2.06k
                            {
3101
2.06k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3102
230
                                {
3103
230
                                    au4_idx_c[0] = -1;
3104
230
                                    au4_idx_c[4] = -1;
3105
230
                                    au4_idx_c[6] = -1;
3106
230
                                }
3107
1.83k
                                else
3108
1.83k
                                {
3109
1.83k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3110
1.83k
                                }
3111
3112
2.06k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3113
124
                                {
3114
124
                                    au4_idx_c[2] = -1;
3115
124
                                    au4_idx_c[4] = -1;
3116
124
                                    au4_idx_c[5] = -1;
3117
124
                                }
3118
1.94k
                                else
3119
1.94k
                                {
3120
1.94k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3121
1.94k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3122
1.94k
                                }
3123
2.06k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3124
2.06k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3125
2.06k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3126
3127
2.06k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3128
230
                                {
3129
230
                                    au4_ilf_across_tile_slice_enable[0] = 0;
3130
230
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3131
230
                                    au4_ilf_across_tile_slice_enable[6] = 0;
3132
230
                                }
3133
1.83k
                                else
3134
1.83k
                                {
3135
1.83k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3136
1.83k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3137
1.83k
                                }
3138
3139
2.06k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3140
124
                                {
3141
124
                                    au4_ilf_across_tile_slice_enable[2] = 0;
3142
124
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3143
124
                                    au4_ilf_across_tile_slice_enable[5] = 0;
3144
124
                                }
3145
1.94k
                                else
3146
1.94k
                                {
3147
1.94k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3148
1.94k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3149
1.94k
                                }
3150
3151
2.06k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3152
2.06k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3153
2.06k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
3155
2.06k
                                if(idx_c > au4_idx_c[6])
3156
401
                                {
3157
401
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3158
401
                                }
3159
3160
                                /*
3161
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3162
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3163
                                 */
3164
18.6k
                                for(i = 0; i < 8; i++)
3165
16.5k
                                {
3166
                                    /*Sets the edges that lie on the slice/tile boundary*/
3167
16.5k
                                    if(au4_idx_c[i] != idx_c)
3168
5.67k
                                    {
3169
5.67k
                                        au1_tile_slice_boundary[i] = 1;
3170
5.67k
                                    }
3171
10.8k
                                    else
3172
10.8k
                                    {
3173
10.8k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
3174
10.8k
                                    }
3175
16.5k
                                }
3176
                                /*Reset indices*/
3177
18.6k
                                for(i = 0; i < 8; i++)
3178
16.5k
                                {
3179
16.5k
                                    au4_idx_c[i] = 0;
3180
16.5k
                                }
3181
2.06k
                            }
3182
3183
12.6k
                            if(ps_pps->i1_tiles_enabled_flag)
3184
10.5k
                            {
3185
                                /* Calculate availability flags at slice boundary */
3186
10.5k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3187
4.70k
                                {
3188
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3189
4.70k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3190
4.13k
                                    {
3191
4.13k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3192
405
                                        {
3193
405
                                            au4_idx_c[6] = -1;
3194
405
                                            au4_idx_c[0] = -1;
3195
405
                                            au4_idx_c[4] = -1;
3196
405
                                        }
3197
3.72k
                                        else
3198
3.72k
                                        {
3199
3.72k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3200
3.72k
                                        }
3201
3202
4.13k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3203
1.39k
                                        {
3204
1.39k
                                            au4_idx_c[2] = -1;
3205
1.39k
                                            au4_idx_c[5] = -1;
3206
1.39k
                                            au4_idx_c[4] = -1;
3207
1.39k
                                        }
3208
2.74k
                                        else
3209
2.74k
                                        {
3210
2.74k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3211
2.74k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3212
2.74k
                                        }
3213
4.13k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3214
4.13k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3215
4.13k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3216
3217
37.1k
                                        for(i = 0; i < 8; i++)
3218
33.0k
                                        {
3219
                                            /*Sets the edges that lie on the slice/tile boundary*/
3220
33.0k
                                            if(au4_idx_c[i] != idx_c)
3221
13.2k
                                            {
3222
13.2k
                                                au1_tile_slice_boundary[i] |= 1;
3223
13.2k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3224
13.2k
                                            }
3225
33.0k
                                        }
3226
4.13k
                                    }
3227
4.70k
                                }
3228
10.5k
                            }
3229
3230
113k
                            for(i = 0; i < 8; i++)
3231
100k
                            {
3232
                                /*Sets the edges that lie on the slice/tile boundary*/
3233
100k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3234
13.9k
                                {
3235
13.9k
                                    au1_avail_chroma[i] = 0;
3236
13.9k
                                }
3237
100k
                            }
3238
12.6k
                        }
3239
27.1k
                    }
3240
3241
27.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
3242
3.64k
                    {
3243
3.64k
                        au1_avail_chroma[0] = 0;
3244
3.64k
                        au1_avail_chroma[4] = 0;
3245
3.64k
                        au1_avail_chroma[6] = 0;
3246
3.64k
                    }
3247
3248
27.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3249
2.99k
                    {
3250
2.99k
                        au1_avail_chroma[1] = 0;
3251
2.99k
                        au1_avail_chroma[5] = 0;
3252
2.99k
                        au1_avail_chroma[7] = 0;
3253
2.99k
                    }
3254
3255
27.1k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
3256
6.26k
                    {
3257
6.26k
                        au1_avail_chroma[2] = 0;
3258
6.26k
                        au1_avail_chroma[4] = 0;
3259
6.26k
                        au1_avail_chroma[5] = 0;
3260
6.26k
                    }
3261
3262
27.1k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3263
3.40k
                    {
3264
3.40k
                        au1_avail_chroma[3] = 0;
3265
3.40k
                        au1_avail_chroma[6] = 0;
3266
3.40k
                        au1_avail_chroma[7] = 0;
3267
3.40k
                    }
3268
3269
27.1k
                    {
3270
27.1k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3271
27.1k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3272
3273
27.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3274
27.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3275
3276
27.1k
                        if(chroma_yuv420sp_vu)
3277
7.03k
                        {
3278
7.03k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3279
7.03k
                                                                                 src_strd,
3280
7.03k
                                                                                 pu1_src_left_chroma,
3281
7.03k
                                                                                 pu1_src_top_chroma,
3282
7.03k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3283
7.03k
                                                                                 au1_src_top_right,
3284
7.03k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3285
7.03k
                                                                                 au1_avail_chroma,
3286
7.03k
                                                                                 ai1_offset_cr,
3287
7.03k
                                                                                 ai1_offset_cb,
3288
7.03k
                                                                                 sao_wd_chroma,
3289
7.03k
                                                                                 sao_ht_chroma);
3290
7.03k
                        }
3291
20.1k
                        else
3292
20.1k
                        {
3293
20.1k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3294
20.1k
                                                                                 src_strd,
3295
20.1k
                                                                                 pu1_src_left_chroma,
3296
20.1k
                                                                                 pu1_src_top_chroma,
3297
20.1k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3298
20.1k
                                                                                 au1_src_top_right,
3299
20.1k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3300
20.1k
                                                                                 au1_avail_chroma,
3301
20.1k
                                                                                 ai1_offset_cb,
3302
20.1k
                                                                                 ai1_offset_cr,
3303
20.1k
                                                                                 sao_wd_chroma,
3304
20.1k
                                                                                 sao_ht_chroma);
3305
20.1k
                        }
3306
27.1k
                    }
3307
3308
27.1k
                }
3309
317k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3310
317k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3311
3312
317k
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3313
317k
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3314
317k
            }
3315
3.65M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3316
482k
            {
3317
6.52M
                for(row = 0; row < sao_ht_chroma; row++)
3318
6.04M
                {
3319
6.04M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3320
6.04M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3321
6.04M
                }
3322
482k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3323
482k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3324
3325
482k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3326
3327
482k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3328
482k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3329
482k
            }
3330
3331
3.97M
        }
3332
4.48M
    }
3333
3334
3335
3336
3337
/* If no loop filter is enabled copy the backed up values */
3338
4.48M
    {
3339
        /* Luma */
3340
4.48M
        if(no_loop_filter_enabled_luma)
3341
162k
        {
3342
162k
            UWORD32 u4_no_loop_filter_flag;
3343
162k
            WORD32 loop_filter_bit_pos;
3344
162k
            WORD32 log2_min_cu = 3;
3345
162k
            WORD32 min_cu = (1 << log2_min_cu);
3346
162k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3347
162k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3348
162k
            WORD32 sao_blk_wd = ctb_size;
3349
162k
            WORD32 remaining_rows;
3350
162k
            WORD32 remaining_cols;
3351
3352
162k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3353
162k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354
162k
            if(remaining_rows <= SAO_SHIFT_CTB)
3355
17.9k
                sao_blk_ht += remaining_rows;
3356
162k
            if(remaining_cols <= SAO_SHIFT_CTB)
3357
4.27k
                sao_blk_wd += remaining_cols;
3358
3359
162k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3360
162k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3361
3362
162k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3363
3364
162k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3365
162k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3366
162k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3367
156k
                loop_filter_bit_pos -= 1;
3368
3369
162k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3370
162k
                            (loop_filter_bit_pos >> 3);
3371
3372
162k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3373
1.38M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3374
1.21M
            {
3375
1.21M
                WORD32 tmp_wd = sao_blk_wd;
3376
3377
1.21M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3378
1.21M
                                (loop_filter_bit_pos & 7);
3379
1.21M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3380
3381
1.21M
                if(u4_no_loop_filter_flag)
3382
1.16M
                {
3383
2.43M
                    while(tmp_wd > 0)
3384
1.27M
                    {
3385
1.27M
                        if(CTZ(u4_no_loop_filter_flag))
3386
97.5k
                        {
3387
97.5k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3388
97.5k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389
97.5k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3390
97.5k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3391
97.5k
                        }
3392
1.17M
                        else
3393
1.17M
                        {
3394
10.3M
                            for(row = 0; row < min_cu; row++)
3395
9.20M
                            {
3396
493M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3397
484M
                                {
3398
484M
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3399
484M
                                }
3400
9.20M
                            }
3401
1.17M
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3402
1.17M
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403
1.17M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3404
1.17M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3405
1.17M
                        }
3406
1.27M
                    }
3407
3408
1.16M
                    pu1_src_tmp_luma -= sao_blk_wd;
3409
1.16M
                    pu1_src_backup_luma -= sao_blk_wd;
3410
1.16M
                }
3411
3412
1.21M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3413
1.21M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3414
1.21M
            }
3415
162k
        }
3416
3417
        /* Chroma */
3418
4.48M
        if(no_loop_filter_enabled_chroma)
3419
162k
        {
3420
162k
            UWORD32 u4_no_loop_filter_flag;
3421
162k
            WORD32 loop_filter_bit_pos;
3422
162k
            WORD32 log2_min_cu = 3;
3423
162k
            WORD32 min_cu = (1 << log2_min_cu);
3424
162k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3425
162k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3426
162k
            WORD32 sao_blk_wd = ctb_size;
3427
162k
            WORD32 remaining_rows;
3428
162k
            WORD32 remaining_cols;
3429
3430
162k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3431
162k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432
162k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3433
18.0k
                sao_blk_ht += remaining_rows;
3434
162k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3435
4.24k
                sao_blk_wd += remaining_cols;
3436
3437
162k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3438
162k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3439
3440
162k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3441
3442
162k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3443
162k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3444
162k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3445
156k
                loop_filter_bit_pos -= 2;
3446
3447
162k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3448
162k
                            (loop_filter_bit_pos >> 3);
3449
3450
162k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3451
1.37M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3452
1.21M
            {
3453
1.21M
                WORD32 tmp_wd = sao_blk_wd;
3454
3455
1.21M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3456
1.21M
                                (loop_filter_bit_pos & 7);
3457
1.21M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3458
3459
1.21M
                if(u4_no_loop_filter_flag)
3460
1.16M
                {
3461
2.43M
                    while(tmp_wd > 0)
3462
1.26M
                    {
3463
1.26M
                        if(CTZ(u4_no_loop_filter_flag))
3464
95.5k
                        {
3465
95.5k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3466
95.5k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467
95.5k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3468
95.5k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3469
95.5k
                        }
3470
1.17M
                        else
3471
1.17M
                        {
3472
5.82M
                            for(row = 0; row < min_cu / 2; row++)
3473
4.65M
                            {
3474
269M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3475
264M
                                {
3476
264M
                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3477
264M
                                }
3478
4.65M
                            }
3479
3480
1.17M
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3481
1.17M
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482
1.17M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3483
1.17M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3484
1.17M
                        }
3485
1.26M
                    }
3486
3487
1.16M
                    pu1_src_tmp_chroma -= sao_blk_wd;
3488
1.16M
                    pu1_src_backup_chroma -= sao_blk_wd;
3489
1.16M
                }
3490
3491
1.21M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3492
1.21M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3493
1.21M
            }
3494
162k
        }
3495
4.48M
    }
3496
3497
4.48M
}
3498