Coverage Report

Created: 2025-07-12 07:16

/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
85.3M
#define SAO_SHIFT_CTB    8
79
80
/**
81
 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82
 */
83
void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84
0
{
85
0
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86
0
    UWORD8 *pu1_src_luma;
87
0
    UWORD8 *pu1_src_chroma;
88
0
    WORD32 src_strd;
89
0
    WORD32 ctb_size;
90
0
    WORD32 log2_ctb_size;
91
0
    sps_t *ps_sps;
92
0
    sao_t *ps_sao;
93
0
    WORD32 row, col;
94
0
    UWORD8 au1_avail_luma[8];
95
0
    UWORD8 au1_avail_chroma[8];
96
0
    WORD32 i;
97
0
    UWORD8 *pu1_src_top_luma;
98
0
    UWORD8 *pu1_src_top_chroma;
99
0
    UWORD8 *pu1_src_left_luma;
100
0
    UWORD8 *pu1_src_left_chroma;
101
0
    UWORD8 au1_src_top_right[2];
102
0
    UWORD8 au1_src_bot_left[2];
103
0
    UWORD8 *pu1_no_loop_filter_flag;
104
0
    WORD32 loop_filter_strd;
105
106
    /* Only first 5 values are used, but arrays are large
107
     enough so that SIMD functions can read 64 bits at a time */
108
0
    WORD8 ai1_offset_y[8] = {0};
109
0
    WORD8 ai1_offset_cb[8] = {0};
110
0
    WORD8 ai1_offset_cr[8] = {0};
111
112
0
    PROFILE_DISABLE_SAO();
113
114
0
    ps_sps = ps_sao_ctxt->ps_sps;
115
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
116
0
    ctb_size = (1 << log2_ctb_size);
117
0
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118
0
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119
0
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121
0
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122
0
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124
    /* Current CTB */
125
0
    {
126
0
        WORD32 sao_wd_luma;
127
0
        WORD32 sao_wd_chroma;
128
0
        WORD32 sao_ht_luma;
129
0
        WORD32 sao_ht_chroma;
130
131
0
        WORD32 remaining_rows;
132
0
        WORD32 remaining_cols;
133
134
0
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135
0
        sao_wd_luma = MIN(ctb_size, remaining_cols);
136
0
        sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138
0
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139
0
        sao_ht_luma = MIN(ctb_size, remaining_rows);
140
0
        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142
0
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143
0
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144
0
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145
0
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147
0
        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148
0
                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149
0
                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151
0
        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152
0
        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153
0
        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154
0
        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156
0
        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157
0
        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158
0
        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159
0
        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161
0
        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162
0
        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163
0
        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164
0
        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166
0
        for(i = 0; i < 8; i++)
167
0
        {
168
0
            au1_avail_luma[i] = 255;
169
0
            au1_avail_chroma[i] = 255;
170
0
        }
171
172
173
0
        if(0 == ps_sao_ctxt->i4_ctb_x)
174
0
        {
175
0
            au1_avail_luma[0] = 0;
176
0
            au1_avail_luma[4] = 0;
177
0
            au1_avail_luma[6] = 0;
178
179
0
            au1_avail_chroma[0] = 0;
180
0
            au1_avail_chroma[4] = 0;
181
0
            au1_avail_chroma[6] = 0;
182
0
        }
183
184
0
        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185
0
        {
186
0
            au1_avail_luma[1] = 0;
187
0
            au1_avail_luma[5] = 0;
188
0
            au1_avail_luma[7] = 0;
189
190
0
            au1_avail_chroma[1] = 0;
191
0
            au1_avail_chroma[5] = 0;
192
0
            au1_avail_chroma[7] = 0;
193
0
        }
194
195
0
        if(0 == ps_sao_ctxt->i4_ctb_y)
196
0
        {
197
0
            au1_avail_luma[2] = 0;
198
0
            au1_avail_luma[4] = 0;
199
0
            au1_avail_luma[5] = 0;
200
201
0
            au1_avail_chroma[2] = 0;
202
0
            au1_avail_chroma[4] = 0;
203
0
            au1_avail_chroma[5] = 0;
204
0
        }
205
206
0
        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207
0
        {
208
0
            au1_avail_luma[3] = 0;
209
0
            au1_avail_luma[6] = 0;
210
0
            au1_avail_luma[7] = 0;
211
212
0
            au1_avail_chroma[3] = 0;
213
0
            au1_avail_chroma[6] = 0;
214
0
            au1_avail_chroma[7] = 0;
215
0
        }
216
217
218
0
        if(0 == ps_sao->b3_y_type_idx)
219
0
        {
220
            /* Update left, top and top-left */
221
0
            for(row = 0; row < sao_ht_luma; row++)
222
0
            {
223
0
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224
0
            }
225
0
            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229
0
        }
230
0
        else
231
0
        {
232
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235
0
            WORD32 no_loop_filter_enabled = 0;
236
237
            /* Check the loop filter flags and copy the original values for back up */
238
0
            {
239
0
                UWORD32 u4_no_loop_filter_flag;
240
0
                WORD32 min_cu = 8;
241
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244
0
                {
245
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246
0
                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249
0
                    if(u4_no_loop_filter_flag)
250
0
                    {
251
0
                        WORD32 tmp_wd = sao_wd_luma;
252
0
                        no_loop_filter_enabled = 1;
253
0
                        while(tmp_wd > 0)
254
0
                        {
255
0
                            if(CTZ(u4_no_loop_filter_flag))
256
0
                            {
257
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260
0
                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261
0
                            }
262
0
                            else
263
0
                            {
264
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265
0
                                {
266
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267
0
                                    {
268
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269
0
                                    }
270
0
                                }
271
272
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276
0
                            }
277
0
                        }
278
279
0
                        pu1_src_tmp -= sao_wd_luma;
280
0
                    }
281
282
0
                    pu1_src_tmp += min_cu * src_strd;
283
0
                    pu1_src_copy += min_cu * tmp_strd;
284
0
                }
285
0
            }
286
287
0
            if(1 == ps_sao->b3_y_type_idx)
288
0
            {
289
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290
0
                                                                          src_strd,
291
0
                                                                          pu1_src_left_luma,
292
0
                                                                          pu1_src_top_luma,
293
0
                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294
0
                                                                          ps_sao->b5_y_band_pos,
295
0
                                                                          ai1_offset_y,
296
0
                                                                          sao_wd_luma,
297
0
                                                                          sao_ht_luma);
298
0
            }
299
0
            else // if(2 <= ps_sao->b3_y_type_idx)
300
0
            {
301
0
                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302
0
                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303
0
                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304
0
                                                                  src_strd,
305
0
                                                                  pu1_src_left_luma,
306
0
                                                                  pu1_src_top_luma,
307
0
                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308
0
                                                                  au1_src_top_right,
309
0
                                                                  au1_src_bot_left,
310
0
                                                                  au1_avail_luma,
311
0
                                                                  ai1_offset_y,
312
0
                                                                  sao_wd_luma,
313
0
                                                                  sao_ht_luma);
314
0
            }
315
316
            /* Check the loop filter flags and copy the original values back if they are set */
317
0
            if(no_loop_filter_enabled)
318
0
            {
319
0
                UWORD32 u4_no_loop_filter_flag;
320
0
                WORD32 min_cu = 8;
321
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324
0
                {
325
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328
0
                    if(u4_no_loop_filter_flag)
329
0
                    {
330
0
                        WORD32 tmp_wd = sao_wd_luma;
331
0
                        while(tmp_wd > 0)
332
0
                        {
333
0
                            if(CTZ(u4_no_loop_filter_flag))
334
0
                            {
335
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339
0
                            }
340
0
                            else
341
0
                            {
342
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343
0
                                {
344
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345
0
                                    {
346
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347
0
                                    }
348
0
                                }
349
350
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354
0
                            }
355
0
                        }
356
357
0
                        pu1_src_tmp -= sao_wd_luma;
358
0
                    }
359
360
0
                    pu1_src_tmp += min_cu * src_strd;
361
0
                    pu1_src_copy += min_cu * tmp_strd;
362
0
                }
363
0
            }
364
365
0
        }
366
367
0
        if(0 == ps_sao->b3_cb_type_idx)
368
0
        {
369
0
            for(row = 0; row < sao_ht_chroma; row++)
370
0
            {
371
0
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372
0
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373
0
            }
374
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378
0
        }
379
0
        else
380
0
        {
381
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384
0
            WORD32 no_loop_filter_enabled = 0;
385
386
            /* Check the loop filter flags and copy the original values for back up */
387
0
            {
388
0
                UWORD32 u4_no_loop_filter_flag;
389
0
                WORD32 min_cu = 4;
390
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393
0
                {
394
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397
0
                    if(u4_no_loop_filter_flag)
398
0
                    {
399
0
                        WORD32 tmp_wd = sao_wd_chroma;
400
0
                        no_loop_filter_enabled = 1;
401
0
                        while(tmp_wd > 0)
402
0
                        {
403
0
                            if(CTZ(u4_no_loop_filter_flag))
404
0
                            {
405
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409
0
                            }
410
0
                            else
411
0
                            {
412
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413
0
                                {
414
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415
0
                                    {
416
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417
0
                                    }
418
0
                                }
419
420
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424
0
                            }
425
0
                        }
426
427
0
                        pu1_src_tmp -= sao_wd_chroma;
428
0
                    }
429
430
0
                    pu1_src_tmp += min_cu * src_strd;
431
0
                    pu1_src_copy += min_cu * tmp_strd;
432
0
                }
433
0
            }
434
435
0
            if(1 == ps_sao->b3_cb_type_idx)
436
0
            {
437
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438
0
                                                                            src_strd,
439
0
                                                                            pu1_src_left_chroma,
440
0
                                                                            pu1_src_top_chroma,
441
0
                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442
0
                                                                            ps_sao->b5_cb_band_pos,
443
0
                                                                            ps_sao->b5_cr_band_pos,
444
0
                                                                            ai1_offset_cb,
445
0
                                                                            ai1_offset_cr,
446
0
                                                                            sao_wd_chroma,
447
0
                                                                            sao_ht_chroma
448
0
                                                                           );
449
0
            }
450
0
            else // if(2 <= ps_sao->b3_cb_type_idx)
451
0
            {
452
0
                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453
0
                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454
0
                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455
0
                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456
0
                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457
0
                                                                     src_strd,
458
0
                                                                     pu1_src_left_chroma,
459
0
                                                                     pu1_src_top_chroma,
460
0
                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461
0
                                                                     au1_src_top_right,
462
0
                                                                     au1_src_bot_left,
463
0
                                                                     au1_avail_chroma,
464
0
                                                                     ai1_offset_cb,
465
0
                                                                     ai1_offset_cr,
466
0
                                                                     sao_wd_chroma,
467
0
                                                                     sao_ht_chroma);
468
0
            }
469
470
            /* Check the loop filter flags and copy the original values back if they are set */
471
0
            if(no_loop_filter_enabled)
472
0
            {
473
0
                UWORD32 u4_no_loop_filter_flag;
474
0
                WORD32 min_cu = 4;
475
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478
0
                {
479
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482
0
                    if(u4_no_loop_filter_flag)
483
0
                    {
484
0
                        WORD32 tmp_wd = sao_wd_chroma;
485
0
                        while(tmp_wd > 0)
486
0
                        {
487
0
                            if(CTZ(u4_no_loop_filter_flag))
488
0
                            {
489
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493
0
                            }
494
0
                            else
495
0
                            {
496
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497
0
                                {
498
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499
0
                                    {
500
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501
0
                                    }
502
0
                                }
503
504
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508
0
                            }
509
0
                        }
510
511
0
                        pu1_src_tmp -= sao_wd_chroma;
512
0
                    }
513
514
0
                    pu1_src_tmp += min_cu * src_strd;
515
0
                    pu1_src_copy += min_cu * tmp_strd;
516
0
                }
517
0
            }
518
519
0
        }
520
521
0
    }
522
0
}
523
524
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525
2.21M
{
526
2.21M
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527
2.21M
    UWORD8 *pu1_src_luma;
528
2.21M
    UWORD8 *pu1_src_chroma;
529
2.21M
    WORD32 src_strd;
530
2.21M
    WORD32 ctb_size;
531
2.21M
    WORD32 log2_ctb_size;
532
2.21M
    sps_t *ps_sps;
533
2.21M
    sao_t *ps_sao;
534
2.21M
    pps_t *ps_pps;
535
2.21M
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536
2.21M
    tile_t *ps_tile;
537
2.21M
    UWORD16 *pu1_slice_idx;
538
2.21M
    UWORD16 *pu1_tile_idx;
539
2.21M
    WORD32 row, col;
540
2.21M
    UWORD8 au1_avail_luma[8];
541
2.21M
    UWORD8 au1_avail_chroma[8];
542
2.21M
    UWORD8 au1_tile_slice_boundary[8];
543
2.21M
    UWORD8 au4_ilf_across_tile_slice_enable[8];
544
2.21M
    WORD32 i;
545
2.21M
    UWORD8 *pu1_src_top_luma;
546
2.21M
    UWORD8 *pu1_src_top_chroma;
547
2.21M
    UWORD8 *pu1_src_left_luma;
548
2.21M
    UWORD8 *pu1_src_left_chroma;
549
2.21M
    UWORD8 au1_src_top_right[2];
550
2.21M
    UWORD8 au1_src_bot_left[2];
551
2.21M
    UWORD8 *pu1_no_loop_filter_flag;
552
2.21M
    UWORD8 *pu1_src_backup_luma;
553
2.21M
    UWORD8 *pu1_src_backup_chroma;
554
2.21M
    WORD32 backup_strd;
555
2.21M
    WORD32 loop_filter_strd;
556
557
2.21M
    WORD32 no_loop_filter_enabled_luma = 0;
558
2.21M
    WORD32 no_loop_filter_enabled_chroma = 0;
559
2.21M
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560
2.21M
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561
2.21M
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
562
2.21M
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563
2.21M
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
564
2.21M
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565
2.21M
    UWORD8  u1_sao_src_top_left_luma_bot_left;
566
2.21M
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567
2.21M
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568
2.21M
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569
    /* Only first 5 values are used, but arrays are large
570
     enough so that SIMD functions can read 64 bits at a time */
571
2.21M
    WORD8 ai1_offset_y[8] = {0};
572
2.21M
    WORD8 ai1_offset_cb[8] = {0};
573
2.21M
    WORD8 ai1_offset_cr[8] = {0};
574
2.21M
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576
2.21M
    PROFILE_DISABLE_SAO();
577
578
2.21M
    ps_sps = ps_sao_ctxt->ps_sps;
579
2.21M
    ps_pps = ps_sao_ctxt->ps_pps;
580
2.21M
    ps_tile = ps_sao_ctxt->ps_tile;
581
582
2.21M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
583
2.21M
    ctb_size = (1 << log2_ctb_size);
584
2.21M
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585
2.21M
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586
2.21M
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588
2.21M
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589
2.21M
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590
2.21M
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591
2.21M
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593
    /*Stores the left value for each row ctbs- Needed for column tiles*/
594
2.21M
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595
2.21M
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596
2.21M
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597
2.21M
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598
2.21M
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599
2.21M
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600
2.21M
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601
2.21M
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602
2.21M
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603
2.21M
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605
2.21M
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606
2.21M
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607
2.21M
    backup_strd = 2 * MAX_CTB_SIZE;
608
609
2.21M
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611
2.21M
    {
612
        /* Check the loop filter flags and copy the original values for back up */
613
        /* Luma */
614
615
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616
         * can belong to different slice with their own sao_enable flag */
617
2.21M
        {
618
2.21M
            UWORD32 u4_no_loop_filter_flag;
619
2.21M
            WORD32 loop_filter_bit_pos;
620
2.21M
            WORD32 log2_min_cu = 3;
621
2.21M
            WORD32 min_cu = (1 << log2_min_cu);
622
2.21M
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623
2.21M
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624
2.21M
            WORD32 sao_blk_wd = ctb_size;
625
2.21M
            WORD32 remaining_rows;
626
2.21M
            WORD32 remaining_cols;
627
628
2.21M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629
2.21M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630
2.21M
            if(remaining_rows <= SAO_SHIFT_CTB)
631
159k
                sao_blk_ht += remaining_rows;
632
2.21M
            if(remaining_cols <= SAO_SHIFT_CTB)
633
126k
                sao_blk_wd += remaining_cols;
634
635
2.21M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636
2.21M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638
2.21M
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640
2.21M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641
2.21M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642
2.21M
            if(ps_sao_ctxt->i4_ctb_x > 0)
643
2.08M
                loop_filter_bit_pos -= 1;
644
645
2.21M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646
2.21M
                            (loop_filter_bit_pos >> 3);
647
648
2.21M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649
14.3M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650
12.1M
            {
651
12.1M
                WORD32 tmp_wd = sao_blk_wd;
652
653
12.1M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654
12.1M
                                (loop_filter_bit_pos & 7);
655
12.1M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657
12.1M
                if(u4_no_loop_filter_flag)
658
263k
                {
659
263k
                    no_loop_filter_enabled_luma = 1;
660
578k
                    while(tmp_wd > 0)
661
314k
                    {
662
314k
                        if(CTZ(u4_no_loop_filter_flag))
663
45.5k
                        {
664
45.5k
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665
45.5k
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666
45.5k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667
45.5k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668
45.5k
                        }
669
269k
                        else
670
269k
                        {
671
2.38M
                            for(row = 0; row < min_cu; row++)
672
2.11M
                            {
673
99.4M
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674
97.3M
                                {
675
97.3M
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676
97.3M
                                }
677
2.11M
                            }
678
269k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679
269k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680
269k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681
269k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682
269k
                        }
683
314k
                    }
684
685
263k
                    pu1_src_tmp_luma -= sao_blk_wd;
686
263k
                    pu1_src_backup_luma -= sao_blk_wd;
687
263k
                }
688
689
12.1M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
690
12.1M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
691
12.1M
            }
692
2.21M
        }
693
694
        /* Chroma */
695
696
2.21M
        {
697
2.21M
            UWORD32 u4_no_loop_filter_flag;
698
2.21M
            WORD32 loop_filter_bit_pos;
699
2.21M
            WORD32 log2_min_cu = 3;
700
2.21M
            WORD32 min_cu = (1 << log2_min_cu);
701
2.21M
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
702
2.21M
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
703
2.21M
            WORD32 sao_blk_wd = ctb_size;
704
2.21M
            WORD32 remaining_rows;
705
2.21M
            WORD32 remaining_cols;
706
707
2.21M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
708
2.21M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709
2.21M
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
710
159k
                sao_blk_ht += remaining_rows;
711
2.21M
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
712
126k
                sao_blk_wd += remaining_cols;
713
714
2.21M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
715
2.21M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
716
717
2.21M
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
718
719
2.21M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
720
2.21M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
721
2.21M
            if(ps_sao_ctxt->i4_ctb_x > 0)
722
2.08M
                loop_filter_bit_pos -= 2;
723
724
2.21M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
725
2.21M
                            (loop_filter_bit_pos >> 3);
726
727
2.21M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
728
14.3M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
729
12.1M
            {
730
12.1M
                WORD32 tmp_wd = sao_blk_wd;
731
732
12.1M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
733
12.1M
                                (loop_filter_bit_pos & 7);
734
12.1M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
735
736
12.1M
                if(u4_no_loop_filter_flag)
737
263k
                {
738
263k
                    no_loop_filter_enabled_chroma = 1;
739
576k
                    while(tmp_wd > 0)
740
313k
                    {
741
313k
                        if(CTZ(u4_no_loop_filter_flag))
742
44.6k
                        {
743
44.6k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
744
44.6k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745
44.6k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
746
44.6k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
747
44.6k
                        }
748
268k
                        else
749
268k
                        {
750
1.33M
                            for(row = 0; row < min_cu / 2; row++)
751
1.06M
                            {
752
57.6M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
753
56.5M
                                {
754
56.5M
                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
755
56.5M
                                }
756
1.06M
                            }
757
758
268k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
759
268k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760
268k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
761
268k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
762
268k
                        }
763
313k
                    }
764
765
263k
                    pu1_src_tmp_chroma -= sao_blk_wd;
766
263k
                    pu1_src_backup_chroma -= sao_blk_wd;
767
263k
                }
768
769
12.1M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
770
12.1M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
771
12.1M
            }
772
2.21M
        }
773
2.21M
    }
774
775
2.21M
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
776
777
    /* Top-left CTB */
778
2.21M
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
779
1.93M
    {
780
1.93M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
781
1.93M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
782
1.93M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
783
1.93M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
784
785
1.93M
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
786
1.93M
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
787
1.93M
        WORD32 au4_idx_tl[8], idx_tl;
788
789
1.93M
        slice_header_t *ps_slice_hdr_top_left;
790
1.93M
        {
791
1.93M
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
792
1.93M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
793
1.93M
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
794
1.93M
        }
795
796
797
1.93M
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
798
1.93M
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
799
1.93M
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
800
1.93M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
801
1.93M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
802
1.93M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
803
1.93M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
804
805
1.93M
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
806
606k
        {
807
606k
            if(0 == ps_sao->b3_y_type_idx)
808
555k
            {
809
                /* Update left, top and top-left */
810
5.00M
                for(row = 0; row < sao_ht_luma; row++)
811
4.44M
                {
812
4.44M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
813
4.44M
                }
814
555k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
815
816
555k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
817
818
819
555k
            }
820
821
50.7k
            else if(1 == ps_sao->b3_y_type_idx)
822
26.8k
            {
823
26.8k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
824
26.8k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
825
26.8k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
826
26.8k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
827
828
26.8k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
829
26.8k
                                                                          src_strd,
830
26.8k
                                                                          pu1_src_left_luma,
831
26.8k
                                                                          pu1_src_top_luma,
832
26.8k
                                                                          pu1_sao_src_luma_top_left_ctb,
833
26.8k
                                                                          ps_sao->b5_y_band_pos,
834
26.8k
                                                                          ai1_offset_y,
835
26.8k
                                                                          sao_wd_luma,
836
26.8k
                                                                          sao_ht_luma
837
26.8k
                                                                         );
838
26.8k
            }
839
840
23.9k
            else // if(2 <= ps_sao->b3_y_type_idx)
841
23.9k
            {
842
23.9k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
843
23.9k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
844
23.9k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
845
23.9k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
846
847
215k
                for(i = 0; i < 8; i++)
848
191k
                {
849
191k
                    au1_avail_luma[i] = 255;
850
191k
                    au1_tile_slice_boundary[i] = 0;
851
191k
                    au4_idx_tl[i] = 0;
852
191k
                    au4_ilf_across_tile_slice_enable[i] = 1;
853
191k
                }
854
855
                /******************************************************************
856
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
857
                 *
858
                 *          TL_T
859
                 *       4  _2__5________
860
                 *     0   |    |       |
861
                 *    TL_L | TL | 1 TL_R|
862
                 *         |____|_______|____
863
                 *        6|TL_D|7      |    |
864
                 *         | 3  |       |    |
865
                 *         |____|_______|    |
866
                 *              |            |
867
                 *              |            |
868
                 *              |____________|
869
                 *
870
                 *****************************************************************/
871
872
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
873
23.9k
                {
874
23.9k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
875
13.0k
                    {
876
13.0k
                        {
877
                            /*Assuming that sao shift is uniform along x and y directions*/
878
13.0k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
879
0
                            {
880
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
881
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
882
0
                            }
883
13.0k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
884
13.0k
                            {
885
13.0k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
886
13.0k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
887
13.0k
                            }
888
13.0k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
889
13.0k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
890
891
13.0k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
892
13.0k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
893
894
13.0k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
895
13.0k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
896
897
13.0k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
898
13.0k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
899
13.0k
                        }
900
901
13.0k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
902
5.15k
                        {
903
                            /*Calculate slice indices for neighbor pixels*/
904
5.15k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
905
5.15k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
906
5.15k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
907
5.15k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
908
5.15k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
909
5.15k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
911
5.15k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
912
0
                            {
913
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
914
0
                                {
915
0
                                    au4_idx_tl[6] = -1;
916
0
                                    au4_idx_tl[4] = -1;
917
0
                                }
918
0
                                else
919
0
                                {
920
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
921
0
                                }
922
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
923
0
                                {
924
0
                                    au4_idx_tl[5] = -1;
925
0
                                    au4_idx_tl[4] = -1;
926
0
                                }
927
0
                                else
928
0
                                {
929
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
930
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
931
0
                                }
932
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
933
0
                            }
934
935
                            /* Verify that the neighbor ctbs dont cross pic boundary.
936
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
937
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
938
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
939
                             * the respective pixel's flags are checked
940
                             */
941
942
5.15k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
943
0
                            {
944
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
945
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
946
0
                            }
947
5.15k
                            else
948
5.15k
                            {
949
5.15k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
950
5.15k
                            }
951
5.15k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
952
0
                            {
953
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
954
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
955
0
                            }
956
5.15k
                            else
957
5.15k
                            {
958
5.15k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959
5.15k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960
5.15k
                            }
961
5.15k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
962
5.15k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963
5.15k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
964
5.15k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
965
5.15k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
966
967
5.15k
                            if(au4_idx_tl[5] > idx_tl)
968
433
                            {
969
433
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
970
433
                            }
971
972
                            /*
973
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
974
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
975
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
976
                             * the respective pixel's flags are checked
977
                             */
978
46.3k
                            for(i = 0; i < 8; i++)
979
41.2k
                            {
980
                                /*Sets the edges that lie on the slice/tile boundary*/
981
41.2k
                                if(au4_idx_tl[i] != idx_tl)
982
9.00k
                                {
983
9.00k
                                    au1_tile_slice_boundary[i] = 1;
984
9.00k
                                }
985
32.2k
                                else
986
32.2k
                                {
987
32.2k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
988
32.2k
                                }
989
41.2k
                            }
990
991
5.15k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
992
5.15k
                        }
993
994
13.0k
                        if(ps_pps->i1_tiles_enabled_flag)
995
8.17k
                        {
996
                            /* Calculate availability flags at slice boundary */
997
8.17k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
998
4.13k
                            {
999
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1000
4.13k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1001
3.95k
                                {
1002
                                    /*Set the boundary arrays*/
1003
                                    /*Calculate tile indices for neighbor pixels*/
1004
3.95k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1005
3.95k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1006
3.95k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1007
3.95k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1008
3.95k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1009
3.95k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
1011
3.95k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1012
0
                                    {
1013
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1014
0
                                        {
1015
0
                                            au4_idx_tl[6] = -1;
1016
0
                                            au4_idx_tl[4] = -1;
1017
0
                                        }
1018
0
                                        else
1019
0
                                        {
1020
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1021
0
                                        }
1022
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1023
0
                                        {
1024
0
                                            au4_idx_tl[5] = -1;
1025
0
                                            au4_idx_tl[4] = -1;
1026
0
                                        }
1027
0
                                        else
1028
0
                                        {
1029
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1030
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1031
0
                                        }
1032
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1033
0
                                    }
1034
35.5k
                                    for(i = 0; i < 8; i++)
1035
31.6k
                                    {
1036
                                        /*Sets the edges that lie on the tile boundary*/
1037
31.6k
                                        if(au4_idx_tl[i] != idx_tl)
1038
13.0k
                                        {
1039
13.0k
                                            au1_tile_slice_boundary[i] |= 1;
1040
13.0k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1041
13.0k
                                        }
1042
31.6k
                                    }
1043
3.95k
                                }
1044
4.13k
                            }
1045
8.17k
                        }
1046
1047
1048
                        /*Set availability flags based on tile and slice boundaries*/
1049
117k
                        for(i = 0; i < 8; i++)
1050
104k
                        {
1051
                            /*Sets the edges that lie on the slice/tile boundary*/
1052
104k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1053
13.2k
                            {
1054
13.2k
                                au1_avail_luma[i] = 0;
1055
13.2k
                            }
1056
104k
                        }
1057
13.0k
                    }
1058
23.9k
                }
1059
1060
23.9k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1061
0
                {
1062
0
                    au1_avail_luma[0] = 0;
1063
0
                    au1_avail_luma[4] = 0;
1064
0
                    au1_avail_luma[6] = 0;
1065
0
                }
1066
1067
23.9k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1068
0
                {
1069
0
                    au1_avail_luma[1] = 0;
1070
0
                    au1_avail_luma[5] = 0;
1071
0
                    au1_avail_luma[7] = 0;
1072
0
                }
1073
                //y==1 case
1074
23.9k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1075
0
                {
1076
0
                    au1_avail_luma[2] = 0;
1077
0
                    au1_avail_luma[4] = 0;
1078
0
                    au1_avail_luma[5] = 0;
1079
0
                }
1080
23.9k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1081
0
                {
1082
0
                    au1_avail_luma[3] = 0;
1083
0
                    au1_avail_luma[6] = 0;
1084
0
                    au1_avail_luma[7] = 0;
1085
0
                }
1086
1087
23.9k
                {
1088
23.9k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1089
23.9k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1090
23.9k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1091
23.9k
                                                                      src_strd,
1092
23.9k
                                                                      pu1_src_left_luma,
1093
23.9k
                                                                      pu1_src_top_luma,
1094
23.9k
                                                                      pu1_sao_src_luma_top_left_ctb,
1095
23.9k
                                                                      au1_src_top_right,
1096
23.9k
                                                                      &u1_sao_src_top_left_luma_bot_left,
1097
23.9k
                                                                      au1_avail_luma,
1098
23.9k
                                                                      ai1_offset_y,
1099
23.9k
                                                                      sao_wd_luma,
1100
23.9k
                                                                      sao_ht_luma);
1101
23.9k
                }
1102
23.9k
            }
1103
1104
606k
        }
1105
1.32M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1106
401k
        {
1107
            /* Update left, top and top-left */
1108
3.61M
            for(row = 0; row < sao_ht_luma; row++)
1109
3.21M
            {
1110
3.21M
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1111
3.21M
            }
1112
401k
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1113
1114
401k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1115
401k
        }
1116
1117
1.93M
        if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1118
538k
        {
1119
538k
            if(0 == ps_sao->b3_cb_type_idx)
1120
501k
            {
1121
4.51M
                for(row = 0; row < sao_ht_chroma; row++)
1122
4.01M
                {
1123
4.01M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1124
4.01M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1125
4.01M
                }
1126
501k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1127
501k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1128
1129
501k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1130
1131
501k
            }
1132
1133
36.7k
            else if(1 == ps_sao->b3_cb_type_idx)
1134
18.0k
            {
1135
18.0k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1136
18.0k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1137
18.0k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1138
18.0k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1139
1140
18.0k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1141
18.0k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1142
18.0k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1143
18.0k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1144
1145
18.0k
                if(chroma_yuv420sp_vu)
1146
5.63k
                {
1147
5.63k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1148
5.63k
                                                                                src_strd,
1149
5.63k
                                                                                pu1_src_left_chroma,
1150
5.63k
                                                                                pu1_src_top_chroma,
1151
5.63k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1152
5.63k
                                                                                ps_sao->b5_cr_band_pos,
1153
5.63k
                                                                                ps_sao->b5_cb_band_pos,
1154
5.63k
                                                                                ai1_offset_cr,
1155
5.63k
                                                                                ai1_offset_cb,
1156
5.63k
                                                                                sao_wd_chroma,
1157
5.63k
                                                                                sao_ht_chroma
1158
5.63k
                                                                               );
1159
5.63k
                }
1160
12.4k
                else
1161
12.4k
                {
1162
12.4k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1163
12.4k
                                                                                src_strd,
1164
12.4k
                                                                                pu1_src_left_chroma,
1165
12.4k
                                                                                pu1_src_top_chroma,
1166
12.4k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1167
12.4k
                                                                                ps_sao->b5_cb_band_pos,
1168
12.4k
                                                                                ps_sao->b5_cr_band_pos,
1169
12.4k
                                                                                ai1_offset_cb,
1170
12.4k
                                                                                ai1_offset_cr,
1171
12.4k
                                                                                sao_wd_chroma,
1172
12.4k
                                                                                sao_ht_chroma
1173
12.4k
                                                                               );
1174
12.4k
                }
1175
18.0k
            }
1176
1177
18.6k
            else // if(2 <= ps_sao->b3_cb_type_idx)
1178
18.6k
            {
1179
18.6k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1180
18.6k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1181
18.6k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1182
18.6k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1183
1184
18.6k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1185
18.6k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1186
18.6k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1187
18.6k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1188
168k
                for(i = 0; i < 8; i++)
1189
149k
                {
1190
149k
                    au1_avail_chroma[i] = 255;
1191
149k
                    au1_tile_slice_boundary[i] = 0;
1192
149k
                    au4_idx_tl[i] = 0;
1193
149k
                    au4_ilf_across_tile_slice_enable[i] = 1;
1194
149k
                }
1195
                /*In case of slices*/
1196
18.6k
                {
1197
18.6k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1198
10.6k
                    {
1199
10.6k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1200
1.05k
                        {
1201
1.05k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1202
1.05k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1203
1.05k
                        }
1204
9.62k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1205
9.02k
                        {
1206
9.02k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1207
9.02k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1208
9.02k
                        }
1209
10.6k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1210
10.6k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1211
1212
10.6k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1213
10.6k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1214
1215
10.6k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1216
10.6k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1217
1218
10.6k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1219
10.6k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1220
1221
10.6k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1222
3.32k
                        {
1223
1224
3.32k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1225
3.32k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1226
3.32k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1227
3.32k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1228
3.32k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1229
3.32k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
1231
3.32k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1232
1.49k
                            {
1233
1.49k
                                if(ps_sao_ctxt->i4_ctb_x == 1)
1234
364
                                {
1235
364
                                    au4_idx_tl[6] = -1;
1236
364
                                    au4_idx_tl[4] = -1;
1237
364
                                }
1238
1.13k
                                else
1239
1.13k
                                {
1240
1.13k
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1241
1.13k
                                }
1242
1.49k
                                if(ps_sao_ctxt->i4_ctb_y == 1)
1243
75
                                {
1244
75
                                    au4_idx_tl[5] = -1;
1245
75
                                    au4_idx_tl[4] = -1;
1246
75
                                }
1247
1.42k
                                else
1248
1.42k
                                {
1249
1.42k
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1250
1.42k
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1251
1.42k
                                }
1252
1.49k
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1253
1.49k
                            }
1254
1255
                            /* Verify that the neighbor ctbs don't cross pic boundary
1256
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1257
3.32k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1258
364
                            {
1259
364
                                au4_ilf_across_tile_slice_enable[4] = 0;
1260
364
                                au4_ilf_across_tile_slice_enable[6] = 0;
1261
364
                            }
1262
2.95k
                            else
1263
2.95k
                            {
1264
2.95k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1265
2.95k
                            }
1266
3.32k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1267
75
                            {
1268
75
                                au4_ilf_across_tile_slice_enable[5] = 0;
1269
75
                                au4_ilf_across_tile_slice_enable[4] = 0;
1270
75
                            }
1271
3.24k
                            else
1272
3.24k
                            {
1273
3.24k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1274
3.24k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1275
3.24k
                            }
1276
3.32k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1277
3.32k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278
3.32k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1279
3.32k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1280
3.32k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1281
                            /*
1282
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1283
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1284
                             */
1285
29.9k
                            for(i = 0; i < 8; i++)
1286
26.5k
                            {
1287
                                /*Sets the edges that lie on the slice/tile boundary*/
1288
26.5k
                                if(au4_idx_tl[i] != idx_tl)
1289
4.28k
                                {
1290
4.28k
                                    au1_tile_slice_boundary[i] = 1;
1291
4.28k
                                }
1292
22.2k
                                else
1293
22.2k
                                {
1294
22.2k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
1295
22.2k
                                }
1296
26.5k
                            }
1297
1298
                            /*Reset indices*/
1299
29.9k
                            for(i = 0; i < 8; i++)
1300
26.5k
                            {
1301
26.5k
                                au4_idx_tl[i] = 0;
1302
26.5k
                            }
1303
3.32k
                        }
1304
10.6k
                        if(ps_pps->i1_tiles_enabled_flag)
1305
7.53k
                        {
1306
                            /* Calculate availability flags at slice boundary */
1307
7.53k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1308
3.70k
                            {
1309
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1310
3.70k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1311
3.52k
                                {
1312
                                    /*Set the boundary arrays*/
1313
                                    /*Calculate tile indices for neighbor pixels*/
1314
3.52k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1315
3.52k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1316
3.52k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1317
3.52k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1318
3.52k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1319
3.52k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
1321
3.52k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1322
0
                                    {
1323
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1324
0
                                        {
1325
0
                                            au4_idx_tl[6] = -1;
1326
0
                                            au4_idx_tl[4] = -1;
1327
0
                                        }
1328
0
                                        else
1329
0
                                        {
1330
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1331
0
                                        }
1332
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1333
0
                                        {
1334
0
                                            au4_idx_tl[5] = -1;
1335
0
                                            au4_idx_tl[4] = -1;
1336
0
                                        }
1337
0
                                        else
1338
0
                                        {
1339
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1340
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1341
0
                                        }
1342
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1343
0
                                    }
1344
31.7k
                                    for(i = 0; i < 8; i++)
1345
28.2k
                                    {
1346
                                        /*Sets the edges that lie on the tile boundary*/
1347
28.2k
                                        if(au4_idx_tl[i] != idx_tl)
1348
11.0k
                                        {
1349
11.0k
                                            au1_tile_slice_boundary[i] |= 1;
1350
11.0k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1351
11.0k
                                        }
1352
28.2k
                                    }
1353
3.52k
                                }
1354
3.70k
                            }
1355
7.53k
                        }
1356
1357
96.1k
                        for(i = 0; i < 8; i++)
1358
85.4k
                        {
1359
                            /*Sets the edges that lie on the slice/tile boundary*/
1360
85.4k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1361
11.7k
                            {
1362
11.7k
                                au1_avail_chroma[i] = 0;
1363
11.7k
                            }
1364
85.4k
                        }
1365
10.6k
                    }
1366
18.6k
                }
1367
1368
18.6k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1369
450
                {
1370
450
                    au1_avail_chroma[0] = 0;
1371
450
                    au1_avail_chroma[4] = 0;
1372
450
                    au1_avail_chroma[6] = 0;
1373
450
                }
1374
18.6k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1375
0
                {
1376
0
                    au1_avail_chroma[1] = 0;
1377
0
                    au1_avail_chroma[5] = 0;
1378
0
                    au1_avail_chroma[7] = 0;
1379
0
                }
1380
1381
18.6k
                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1382
450
                {
1383
450
                    au1_avail_chroma[2] = 0;
1384
450
                    au1_avail_chroma[4] = 0;
1385
450
                    au1_avail_chroma[5] = 0;
1386
450
                }
1387
18.6k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1388
0
                {
1389
0
                    au1_avail_chroma[3] = 0;
1390
0
                    au1_avail_chroma[6] = 0;
1391
0
                    au1_avail_chroma[7] = 0;
1392
0
                }
1393
1394
18.6k
                {
1395
18.6k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1396
18.6k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1397
18.6k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1398
18.6k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1399
18.6k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1400
1.89k
                    {
1401
1.89k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1402
1.89k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1403
1.89k
                    }
1404
1405
18.6k
                    if(chroma_yuv420sp_vu)
1406
5.59k
                    {
1407
5.59k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1408
5.59k
                                                                             src_strd,
1409
5.59k
                                                                             pu1_src_left_chroma,
1410
5.59k
                                                                             pu1_src_top_chroma,
1411
5.59k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1412
5.59k
                                                                             au1_src_top_right,
1413
5.59k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1414
5.59k
                                                                             au1_avail_chroma,
1415
5.59k
                                                                             ai1_offset_cr,
1416
5.59k
                                                                             ai1_offset_cb,
1417
5.59k
                                                                             sao_wd_chroma,
1418
5.59k
                                                                             sao_ht_chroma);
1419
5.59k
                    }
1420
13.0k
                    else
1421
13.0k
                    {
1422
13.0k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1423
13.0k
                                                                             src_strd,
1424
13.0k
                                                                             pu1_src_left_chroma,
1425
13.0k
                                                                             pu1_src_top_chroma,
1426
13.0k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1427
13.0k
                                                                             au1_src_top_right,
1428
13.0k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1429
13.0k
                                                                             au1_avail_chroma,
1430
13.0k
                                                                             ai1_offset_cb,
1431
13.0k
                                                                             ai1_offset_cr,
1432
13.0k
                                                                             sao_wd_chroma,
1433
13.0k
                                                                             sao_ht_chroma);
1434
13.0k
                    }
1435
18.6k
                }
1436
18.6k
            }
1437
538k
        }
1438
1.39M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1439
496k
        {
1440
4.46M
            for(row = 0; row < sao_ht_chroma; row++)
1441
3.97M
            {
1442
3.97M
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1443
3.97M
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1444
3.97M
            }
1445
496k
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1446
496k
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1447
1448
496k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1449
496k
        }
1450
1451
1.93M
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1452
1.93M
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1453
1.93M
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1454
1.93M
    }
1455
1456
1457
    /* Top CTB */
1458
2.21M
    if((ps_sao_ctxt->i4_ctb_y > 0))
1459
2.05M
    {
1460
2.05M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1461
2.05M
        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1462
2.05M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1463
2.05M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1464
1465
2.05M
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1466
2.05M
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1467
2.05M
        WORD32 au4_idx_t[8], idx_t;
1468
1469
2.05M
        WORD32 remaining_cols;
1470
1471
2.05M
        slice_header_t *ps_slice_hdr_top;
1472
2.05M
        {
1473
2.05M
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1474
2.05M
                                        (ps_sao_ctxt->i4_ctb_x);
1475
2.05M
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1476
2.05M
        }
1477
1478
2.05M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1479
2.05M
        if(remaining_cols <= SAO_SHIFT_CTB)
1480
117k
        {
1481
117k
            sao_wd_luma += remaining_cols;
1482
117k
        }
1483
2.05M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1484
2.05M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1485
117k
        {
1486
117k
            sao_wd_chroma += remaining_cols;
1487
117k
        }
1488
1489
2.05M
        pu1_src_luma -= (sao_ht_luma * src_strd);
1490
2.05M
        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1491
2.05M
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1492
2.05M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1493
2.05M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494
2.05M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1495
2.05M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1496
1497
2.05M
        if(0 != sao_wd_luma)
1498
2.05M
        {
1499
2.05M
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1500
637k
            {
1501
637k
                if(0 == ps_sao->b3_y_type_idx)
1502
580k
                {
1503
                    /* Update left, top and top-left */
1504
5.22M
                    for(row = 0; row < sao_ht_luma; row++)
1505
4.64M
                    {
1506
4.64M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1507
4.64M
                    }
1508
580k
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1509
1510
580k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1511
1512
580k
                }
1513
1514
56.0k
                else if(1 == ps_sao->b3_y_type_idx)
1515
29.4k
                {
1516
29.4k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1517
29.4k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1518
29.4k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1519
29.4k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1520
1521
29.4k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1522
29.4k
                                                                              src_strd,
1523
29.4k
                                                                              pu1_src_left_luma,
1524
29.4k
                                                                              pu1_src_top_luma,
1525
29.4k
                                                                              pu1_sao_src_luma_top_left_ctb,
1526
29.4k
                                                                              ps_sao->b5_y_band_pos,
1527
29.4k
                                                                              ai1_offset_y,
1528
29.4k
                                                                              sao_wd_luma,
1529
29.4k
                                                                              sao_ht_luma
1530
29.4k
                                                                             );
1531
29.4k
                }
1532
1533
26.5k
                else // if(2 <= ps_sao->b3_y_type_idx)
1534
26.5k
                {
1535
26.5k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1536
26.5k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1537
26.5k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1538
26.5k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1539
1540
26.5k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1541
26.5k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1542
26.5k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1543
1544
239k
                    for(i = 0; i < 8; i++)
1545
212k
                    {
1546
1547
212k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1548
212k
                    }
1549
                    /******************************************************************
1550
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1551
                     *
1552
                     *               T_T
1553
                     *          ____________
1554
                     *         |    |       |
1555
                     *         | T_L|  T    |T_R
1556
                     *         |    | ______|____
1557
                     *         |    |  T_D  |    |
1558
                     *         |    |       |    |
1559
                     *         |____|_______|    |
1560
                     *              |            |
1561
                     *              |            |
1562
                     *              |____________|
1563
                     *
1564
                     *****************************************************************/
1565
1566
                    /*In case of slices*/
1567
26.5k
                    {
1568
26.5k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1569
14.0k
                        {
1570
1571
14.0k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1572
14.0k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1573
1574
14.0k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1575
14.0k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1576
1577
14.0k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1578
14.0k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1579
1580
14.0k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1581
14.0k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1582
1583
14.0k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1584
14.0k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1585
1586
14.0k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1587
5.52k
                            {
1588
                                /*Calculate neighbor ctb slice indices*/
1589
5.52k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1590
722
                                {
1591
722
                                    au4_idx_t[0] = -1;
1592
722
                                    au4_idx_t[6] = -1;
1593
722
                                    au4_idx_t[4] = -1;
1594
722
                                }
1595
4.79k
                                else
1596
4.79k
                                {
1597
4.79k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1598
4.79k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1599
4.79k
                                }
1600
5.52k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1601
5.52k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1602
5.52k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1603
5.52k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604
1605
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1606
5.52k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1607
722
                                {
1608
722
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1609
722
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1610
722
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1611
722
                                }
1612
4.79k
                                else
1613
4.79k
                                {
1614
4.79k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1615
4.79k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1616
4.79k
                                }
1617
1618
1619
1620
5.52k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621
5.52k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622
5.52k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1623
5.52k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1624
5.52k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
1626
5.52k
                                if(au4_idx_t[6] < idx_t)
1627
723
                                {
1628
723
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1629
723
                                }
1630
1631
                                /*
1632
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1633
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1634
                                 */
1635
1636
49.6k
                                for(i = 0; i < 8; i++)
1637
44.1k
                                {
1638
                                    /*Sets the edges that lie on the slice/tile boundary*/
1639
44.1k
                                    if(au4_idx_t[i] != idx_t)
1640
10.8k
                                    {
1641
10.8k
                                        au1_tile_slice_boundary[i] = 1;
1642
                                        /*Check for slice flag at such boundaries*/
1643
10.8k
                                    }
1644
33.2k
                                    else
1645
33.2k
                                    {
1646
33.2k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1647
33.2k
                                    }
1648
44.1k
                                }
1649
                                /*Reset indices*/
1650
49.6k
                                for(i = 0; i < 8; i++)
1651
44.1k
                                {
1652
44.1k
                                    au4_idx_t[i] = 0;
1653
44.1k
                                }
1654
5.52k
                            }
1655
1656
14.0k
                            if(ps_pps->i1_tiles_enabled_flag)
1657
8.83k
                            {
1658
                                /* Calculate availability flags at slice boundary */
1659
8.83k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1660
4.60k
                                {
1661
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1662
4.60k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1663
4.49k
                                    {
1664
                                        /*Calculate neighbor ctb slice indices*/
1665
4.49k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1666
700
                                        {
1667
700
                                            au4_idx_t[0] = -1;
1668
700
                                            au4_idx_t[6] = -1;
1669
700
                                            au4_idx_t[4] = -1;
1670
700
                                        }
1671
3.79k
                                        else
1672
3.79k
                                        {
1673
3.79k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1674
3.79k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1675
3.79k
                                        }
1676
4.49k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1677
4.49k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1678
4.49k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1679
4.49k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680
1681
40.4k
                                        for(i = 0; i < 8; i++)
1682
35.9k
                                        {
1683
                                            /*Sets the edges that lie on the tile boundary*/
1684
35.9k
                                            if(au4_idx_t[i] != idx_t)
1685
15.9k
                                            {
1686
15.9k
                                                au1_tile_slice_boundary[i] |= 1;
1687
15.9k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1688
15.9k
                                            }
1689
35.9k
                                        }
1690
4.49k
                                    }
1691
4.60k
                                }
1692
8.83k
                            }
1693
1694
126k
                            for(i = 0; i < 8; i++)
1695
112k
                            {
1696
                                /*Sets the edges that lie on the slice/tile boundary*/
1697
112k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1698
17.6k
                                {
1699
17.6k
                                    au1_avail_luma[i] = 0;
1700
17.6k
                                }
1701
112k
                            }
1702
14.0k
                        }
1703
26.5k
                    }
1704
1705
1706
26.5k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1707
3.49k
                    {
1708
3.49k
                        au1_avail_luma[0] = 0;
1709
3.49k
                        au1_avail_luma[4] = 0;
1710
3.49k
                        au1_avail_luma[6] = 0;
1711
3.49k
                    }
1712
1713
26.5k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1714
2.65k
                    {
1715
2.65k
                        au1_avail_luma[1] = 0;
1716
2.65k
                        au1_avail_luma[5] = 0;
1717
2.65k
                        au1_avail_luma[7] = 0;
1718
2.65k
                    }
1719
1720
26.5k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1721
0
                    {
1722
0
                        au1_avail_luma[2] = 0;
1723
0
                        au1_avail_luma[4] = 0;
1724
0
                        au1_avail_luma[5] = 0;
1725
0
                    }
1726
1727
26.5k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1728
0
                    {
1729
0
                        au1_avail_luma[3] = 0;
1730
0
                        au1_avail_luma[6] = 0;
1731
0
                        au1_avail_luma[7] = 0;
1732
0
                    }
1733
1734
26.5k
                    {
1735
26.5k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1736
26.5k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1737
26.5k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1738
26.5k
                                                                          src_strd,
1739
26.5k
                                                                          pu1_src_left_luma,
1740
26.5k
                                                                          pu1_src_top_luma,
1741
26.5k
                                                                          pu1_sao_src_luma_top_left_ctb,
1742
26.5k
                                                                          au1_src_top_right,
1743
26.5k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1744
26.5k
                                                                          au1_avail_luma,
1745
26.5k
                                                                          ai1_offset_y,
1746
26.5k
                                                                          sao_wd_luma,
1747
26.5k
                                                                          sao_ht_luma);
1748
26.5k
                    }
1749
26.5k
                }
1750
637k
            }
1751
1.41M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1752
420k
            {
1753
                /* Update left, top and top-left */
1754
3.78M
                for(row = 0; row < sao_ht_luma; row++)
1755
3.36M
                {
1756
3.36M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1757
3.36M
                }
1758
420k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1759
1760
420k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1761
420k
            }
1762
2.05M
        }
1763
1764
2.05M
        if(0 != sao_wd_chroma)
1765
1.69M
        {
1766
1.69M
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1767
285k
            {
1768
285k
                if(0 == ps_sao->b3_cb_type_idx)
1769
248k
                {
1770
1771
2.23M
                    for(row = 0; row < sao_ht_chroma; row++)
1772
1.99M
                    {
1773
1.99M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1774
1.99M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1775
1.99M
                    }
1776
248k
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1777
248k
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1778
1779
248k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1780
1781
248k
                }
1782
1783
36.9k
                else if(1 == ps_sao->b3_cb_type_idx)
1784
18.0k
                {
1785
18.0k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1786
18.0k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1787
18.0k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1788
18.0k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1789
1790
18.0k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1791
18.0k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1792
18.0k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1793
18.0k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1794
1795
18.0k
                    if(chroma_yuv420sp_vu)
1796
4.98k
                    {
1797
4.98k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1798
4.98k
                                                                                    src_strd,
1799
4.98k
                                                                                    pu1_src_left_chroma,
1800
4.98k
                                                                                    pu1_src_top_chroma,
1801
4.98k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1802
4.98k
                                                                                    ps_sao->b5_cr_band_pos,
1803
4.98k
                                                                                    ps_sao->b5_cb_band_pos,
1804
4.98k
                                                                                    ai1_offset_cr,
1805
4.98k
                                                                                    ai1_offset_cb,
1806
4.98k
                                                                                    sao_wd_chroma,
1807
4.98k
                                                                                    sao_ht_chroma
1808
4.98k
                                                                                   );
1809
4.98k
                    }
1810
13.0k
                    else
1811
13.0k
                    {
1812
13.0k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1813
13.0k
                                                                                    src_strd,
1814
13.0k
                                                                                    pu1_src_left_chroma,
1815
13.0k
                                                                                    pu1_src_top_chroma,
1816
13.0k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1817
13.0k
                                                                                    ps_sao->b5_cb_band_pos,
1818
13.0k
                                                                                    ps_sao->b5_cr_band_pos,
1819
13.0k
                                                                                    ai1_offset_cb,
1820
13.0k
                                                                                    ai1_offset_cr,
1821
13.0k
                                                                                    sao_wd_chroma,
1822
13.0k
                                                                                    sao_ht_chroma
1823
13.0k
                                                                                   );
1824
13.0k
                    }
1825
18.0k
                }
1826
18.8k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1827
18.8k
                {
1828
18.8k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1829
18.8k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1830
18.8k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1831
18.8k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1832
1833
18.8k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1834
18.8k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1835
18.8k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1836
18.8k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1837
1838
170k
                    for(i = 0; i < 8; i++)
1839
151k
                    {
1840
151k
                        au1_avail_chroma[i] = 255;
1841
151k
                        au1_tile_slice_boundary[i] = 0;
1842
151k
                        au4_idx_t[i] = 0;
1843
151k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1844
151k
                    }
1845
1846
18.8k
                    {
1847
18.8k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1848
10.1k
                        {
1849
10.1k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1850
10.1k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1851
1852
10.1k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1853
10.1k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1854
1855
10.1k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1856
10.1k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1857
1858
10.1k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1859
10.1k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1860
1861
10.1k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1862
10.1k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1863
1864
10.1k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1865
2.13k
                            {
1866
2.13k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1867
239
                                {
1868
239
                                    au4_idx_t[0] = -1;
1869
239
                                    au4_idx_t[6] = -1;
1870
239
                                    au4_idx_t[4] = -1;
1871
239
                                }
1872
1.89k
                                else
1873
1.89k
                                {
1874
1.89k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1875
1.89k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1876
1.89k
                                }
1877
2.13k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1878
2.13k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1879
2.13k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1880
2.13k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881
1882
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1883
1884
2.13k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1885
239
                                {
1886
239
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1887
239
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1888
239
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1889
239
                                }
1890
1.89k
                                else
1891
1.89k
                                {
1892
1.89k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1893
1.89k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1894
1.89k
                                }
1895
1896
2.13k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1897
2.13k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898
2.13k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1899
2.13k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1900
2.13k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
1902
2.13k
                                if(idx_t > au4_idx_t[6])
1903
239
                                {
1904
239
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1905
239
                                }
1906
1907
                                /*
1908
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1909
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1910
                                 */
1911
19.1k
                                for(i = 0; i < 8; i++)
1912
17.0k
                                {
1913
                                    /*Sets the edges that lie on the slice/tile boundary*/
1914
17.0k
                                    if(au4_idx_t[i] != idx_t)
1915
3.58k
                                    {
1916
3.58k
                                        au1_tile_slice_boundary[i] = 1;
1917
3.58k
                                    }
1918
13.4k
                                    else
1919
13.4k
                                    {
1920
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1921
13.4k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1922
13.4k
                                    }
1923
17.0k
                                }
1924
                                /*Reset indices*/
1925
19.1k
                                for(i = 0; i < 8; i++)
1926
17.0k
                                {
1927
17.0k
                                    au4_idx_t[i] = 0;
1928
17.0k
                                }
1929
2.13k
                            }
1930
10.1k
                            if(ps_pps->i1_tiles_enabled_flag)
1931
8.12k
                            {
1932
                                /* Calculate availability flags at slice boundary */
1933
8.12k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1934
3.16k
                                {
1935
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1936
3.16k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1937
3.07k
                                    {
1938
                                        /*Calculate neighbor ctb slice indices*/
1939
3.07k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1940
627
                                        {
1941
627
                                            au4_idx_t[0] = -1;
1942
627
                                            au4_idx_t[6] = -1;
1943
627
                                            au4_idx_t[4] = -1;
1944
627
                                        }
1945
2.44k
                                        else
1946
2.44k
                                        {
1947
2.44k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1948
2.44k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1949
2.44k
                                        }
1950
3.07k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1951
3.07k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1952
3.07k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1953
3.07k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954
1955
27.6k
                                        for(i = 0; i < 8; i++)
1956
24.5k
                                        {
1957
                                            /*Sets the edges that lie on the tile boundary*/
1958
24.5k
                                            if(au4_idx_t[i] != idx_t)
1959
10.3k
                                            {
1960
10.3k
                                                au1_tile_slice_boundary[i] |= 1;
1961
10.3k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1962
10.3k
                                            }
1963
24.5k
                                        }
1964
3.07k
                                    }
1965
3.16k
                                }
1966
8.12k
                            }
1967
91.2k
                            for(i = 0; i < 8; i++)
1968
81.1k
                            {
1969
                                /*Sets the edges that lie on the slice/tile boundary*/
1970
81.1k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1971
10.8k
                                {
1972
10.8k
                                    au1_avail_chroma[i] = 0;
1973
10.8k
                                }
1974
81.1k
                            }
1975
1976
10.1k
                        }
1977
18.8k
                    }
1978
18.8k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1979
3.27k
                    {
1980
3.27k
                        au1_avail_chroma[0] = 0;
1981
3.27k
                        au1_avail_chroma[4] = 0;
1982
3.27k
                        au1_avail_chroma[6] = 0;
1983
3.27k
                    }
1984
1985
18.8k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1986
2.30k
                    {
1987
2.30k
                        au1_avail_chroma[1] = 0;
1988
2.30k
                        au1_avail_chroma[5] = 0;
1989
2.30k
                        au1_avail_chroma[7] = 0;
1990
2.30k
                    }
1991
1992
18.8k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1993
13
                    {
1994
13
                        au1_avail_chroma[2] = 0;
1995
13
                        au1_avail_chroma[4] = 0;
1996
13
                        au1_avail_chroma[5] = 0;
1997
13
                    }
1998
1999
18.8k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2000
0
                    {
2001
0
                        au1_avail_chroma[3] = 0;
2002
0
                        au1_avail_chroma[6] = 0;
2003
0
                        au1_avail_chroma[7] = 0;
2004
0
                    }
2005
2006
18.8k
                    {
2007
18.8k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2008
18.8k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2009
18.8k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2010
18.8k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2011
2012
18.8k
                        if(chroma_yuv420sp_vu)
2013
5.10k
                        {
2014
5.10k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2015
5.10k
                                                                                 src_strd,
2016
5.10k
                                                                                 pu1_src_left_chroma,
2017
5.10k
                                                                                 pu1_src_top_chroma,
2018
5.10k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2019
5.10k
                                                                                 au1_src_top_right,
2020
5.10k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2021
5.10k
                                                                                 au1_avail_chroma,
2022
5.10k
                                                                                 ai1_offset_cr,
2023
5.10k
                                                                                 ai1_offset_cb,
2024
5.10k
                                                                                 sao_wd_chroma,
2025
5.10k
                                                                                 sao_ht_chroma);
2026
5.10k
                        }
2027
13.7k
                        else
2028
13.7k
                        {
2029
13.7k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2030
13.7k
                                                                                 src_strd,
2031
13.7k
                                                                                 pu1_src_left_chroma,
2032
13.7k
                                                                                 pu1_src_top_chroma,
2033
13.7k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2034
13.7k
                                                                                 au1_src_top_right,
2035
13.7k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2036
13.7k
                                                                                 au1_avail_chroma,
2037
13.7k
                                                                                 ai1_offset_cb,
2038
13.7k
                                                                                 ai1_offset_cr,
2039
13.7k
                                                                                 sao_wd_chroma,
2040
13.7k
                                                                                 sao_ht_chroma);
2041
13.7k
                        }
2042
18.8k
                    }
2043
2044
18.8k
                }
2045
285k
            }
2046
1.40M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2047
455k
            {
2048
4.10M
                for(row = 0; row < sao_ht_chroma; row++)
2049
3.64M
                {
2050
3.64M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2051
3.64M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2052
3.64M
                }
2053
455k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2054
455k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2055
2056
455k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2057
455k
            }
2058
1.69M
        }
2059
2060
2.05M
        pu1_src_luma += sao_ht_luma * src_strd;
2061
2.05M
        pu1_src_chroma += sao_ht_chroma * src_strd;
2062
2.05M
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2063
2.05M
    }
2064
2065
    /* Left CTB */
2066
2.21M
    if(ps_sao_ctxt->i4_ctb_x > 0)
2067
2.08M
    {
2068
2.08M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2069
2.08M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2070
2.08M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2071
2.08M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2072
2073
2.08M
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2074
2.08M
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2075
2.08M
        WORD32 au4_idx_l[8], idx_l;
2076
2077
2.08M
        WORD32 remaining_rows;
2078
2.08M
        slice_header_t *ps_slice_hdr_left;
2079
2.08M
        {
2080
2.08M
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2081
2.08M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
2082
2.08M
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2083
2.08M
        }
2084
2085
2.08M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2086
2.08M
        if(remaining_rows <= SAO_SHIFT_CTB)
2087
150k
        {
2088
150k
            sao_ht_luma += remaining_rows;
2089
150k
        }
2090
2.08M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2091
2.08M
        if(remaining_rows <= SAO_SHIFT_CTB)
2092
150k
        {
2093
150k
            sao_ht_chroma += remaining_rows;
2094
150k
        }
2095
2096
2.08M
        pu1_src_luma -= sao_wd_luma;
2097
2.08M
        pu1_src_chroma -= sao_wd_chroma;
2098
2.08M
        ps_sao -= 1;
2099
2.08M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2100
2.08M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2101
2.08M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2102
2.08M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
2104
2105
2.08M
        if(0 != sao_ht_luma)
2106
2.08M
        {
2107
2.08M
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2108
648k
            {
2109
648k
                if(0 == ps_sao->b3_y_type_idx)
2110
591k
                {
2111
                    /* Update left, top and top-left */
2112
15.4M
                    for(row = 0; row < sao_ht_luma; row++)
2113
14.9M
                    {
2114
14.9M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2115
14.9M
                    }
2116
                    /*Update in next location*/
2117
591k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2118
2119
591k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2120
2121
591k
                }
2122
2123
57.1k
                else if(1 == ps_sao->b3_y_type_idx)
2124
30.2k
                {
2125
30.2k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2126
30.2k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2127
30.2k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2128
30.2k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2129
2130
30.2k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2131
30.2k
                                                                              src_strd,
2132
30.2k
                                                                              pu1_src_left_luma,
2133
30.2k
                                                                              pu1_src_top_luma,
2134
30.2k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2135
30.2k
                                                                              ps_sao->b5_y_band_pos,
2136
30.2k
                                                                              ai1_offset_y,
2137
30.2k
                                                                              sao_wd_luma,
2138
30.2k
                                                                              sao_ht_luma
2139
30.2k
                                                                             );
2140
30.2k
                }
2141
2142
26.9k
                else // if(2 <= ps_sao->b3_y_type_idx)
2143
26.9k
                {
2144
26.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2145
26.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2146
26.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2147
26.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2148
2149
242k
                    for(i = 0; i < 8; i++)
2150
215k
                    {
2151
215k
                        au1_avail_luma[i] = 255;
2152
215k
                        au1_tile_slice_boundary[i] = 0;
2153
215k
                        au4_idx_l[i] = 0;
2154
215k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2155
215k
                    }
2156
                    /******************************************************************
2157
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2158
                     *
2159
                     *
2160
                     *          ____________
2161
                     *         |    |       |
2162
                     *         | L_T|       |
2163
                     *         |____|_______|____
2164
                     *         |    |       |    |
2165
                     *     L_L |  L |  L_R  |    |
2166
                     *         |____|_______|    |
2167
                     *              |            |
2168
                     *          L_D |            |
2169
                     *              |____________|
2170
                     *
2171
                     *****************************************************************/
2172
2173
                    /*In case of slices or tiles*/
2174
26.9k
                    {
2175
26.9k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2176
13.4k
                        {
2177
13.4k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2178
13.4k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2179
2180
13.4k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2181
13.4k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2182
2183
13.4k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2184
13.4k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2185
2186
13.4k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2187
13.4k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2188
2189
13.4k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2190
13.4k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2191
2192
13.4k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2193
4.82k
                            {
2194
4.82k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2195
364
                                {
2196
364
                                    au4_idx_l[2] = -1;
2197
364
                                    au4_idx_l[4] = -1;
2198
364
                                    au4_idx_l[5] = -1;
2199
364
                                }
2200
4.46k
                                else
2201
4.46k
                                {
2202
4.46k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2203
4.46k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2204
4.46k
                                }
2205
4.82k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2206
4.82k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2207
4.82k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2208
4.82k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2209
2210
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2211
4.82k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2212
364
                                {
2213
364
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2214
364
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2215
364
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2216
364
                                }
2217
4.46k
                                else
2218
4.46k
                                {
2219
4.46k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2220
4.46k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2221
2222
4.46k
                                }
2223
                                //TODO: ILF flag checks for [0] and [6] is missing.
2224
4.82k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2225
4.82k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2226
4.82k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
2228
4.82k
                                if(idx_l < au4_idx_l[5])
2229
57
                                {
2230
57
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2231
57
                                }
2232
2233
                                /*
2234
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2235
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2236
                                 */
2237
43.4k
                                for(i = 0; i < 8; i++)
2238
38.6k
                                {
2239
                                    /*Sets the edges that lie on the slice/tile boundary*/
2240
38.6k
                                    if(au4_idx_l[i] != idx_l)
2241
9.67k
                                    {
2242
9.67k
                                        au1_tile_slice_boundary[i] = 1;
2243
9.67k
                                    }
2244
28.9k
                                    else
2245
28.9k
                                    {
2246
28.9k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2247
28.9k
                                    }
2248
38.6k
                                }
2249
                                /*Reset indices*/
2250
43.4k
                                for(i = 0; i < 8; i++)
2251
38.6k
                                {
2252
38.6k
                                    au4_idx_l[i] = 0;
2253
38.6k
                                }
2254
4.82k
                            }
2255
2256
13.4k
                            if(ps_pps->i1_tiles_enabled_flag)
2257
8.92k
                            {
2258
                                /* Calculate availability flags at slice boundary */
2259
8.92k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2260
4.40k
                                {
2261
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2262
4.40k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2263
4.21k
                                    {
2264
4.21k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2265
1.34k
                                        {
2266
1.34k
                                            au4_idx_l[2] = -1;
2267
1.34k
                                            au4_idx_l[4] = -1;
2268
1.34k
                                            au4_idx_l[5] = -1;
2269
1.34k
                                        }
2270
2.86k
                                        else
2271
2.86k
                                        {
2272
2.86k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2273
2.86k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2274
2.86k
                                        }
2275
2276
4.21k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2277
4.21k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2278
4.21k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2279
4.21k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2280
2281
37.8k
                                        for(i = 0; i < 8; i++)
2282
33.6k
                                        {
2283
                                            /*Sets the edges that lie on the slice/tile boundary*/
2284
33.6k
                                            if(au4_idx_l[i] != idx_l)
2285
14.0k
                                            {
2286
14.0k
                                                au1_tile_slice_boundary[i] |= 1;
2287
14.0k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2288
14.0k
                                            }
2289
33.6k
                                        }
2290
4.21k
                                    }
2291
4.40k
                                }
2292
8.92k
                            }
2293
2294
121k
                            for(i = 0; i < 8; i++)
2295
107k
                            {
2296
                                /*Sets the edges that lie on the slice/tile boundary*/
2297
107k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2298
15.2k
                                {
2299
15.2k
                                    au1_avail_luma[i] = 0;
2300
15.2k
                                }
2301
107k
                            }
2302
13.4k
                        }
2303
26.9k
                    }
2304
26.9k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2305
0
                    {
2306
0
                        au1_avail_luma[0] = 0;
2307
0
                        au1_avail_luma[4] = 0;
2308
0
                        au1_avail_luma[6] = 0;
2309
0
                    }
2310
26.9k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2311
0
                    {
2312
0
                        au1_avail_luma[1] = 0;
2313
0
                        au1_avail_luma[5] = 0;
2314
0
                        au1_avail_luma[7] = 0;
2315
0
                    }
2316
2317
26.9k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2318
6.21k
                    {
2319
6.21k
                        au1_avail_luma[2] = 0;
2320
6.21k
                        au1_avail_luma[4] = 0;
2321
6.21k
                        au1_avail_luma[5] = 0;
2322
6.21k
                    }
2323
2324
26.9k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2325
2.98k
                    {
2326
2.98k
                        au1_avail_luma[3] = 0;
2327
2.98k
                        au1_avail_luma[6] = 0;
2328
2.98k
                        au1_avail_luma[7] = 0;
2329
2.98k
                    }
2330
2331
26.9k
                    {
2332
26.9k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2333
26.9k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2334
26.9k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2335
26.9k
                                                                          src_strd,
2336
26.9k
                                                                          pu1_src_left_luma,
2337
26.9k
                                                                          pu1_src_top_luma,
2338
26.9k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2339
26.9k
                                                                          au1_src_top_right,
2340
26.9k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2341
26.9k
                                                                          au1_avail_luma,
2342
26.9k
                                                                          ai1_offset_y,
2343
26.9k
                                                                          sao_wd_luma,
2344
26.9k
                                                                          sao_ht_luma);
2345
26.9k
                    }
2346
2347
26.9k
                }
2348
648k
            }
2349
1.43M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2350
422k
            {
2351
                /* Update left, top and top-left */
2352
11.6M
                for(row = 0; row < sao_ht_luma; row++)
2353
11.2M
                {
2354
11.2M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2355
11.2M
                }
2356
                /*Update in next location*/
2357
422k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2358
2359
422k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2360
422k
            }
2361
2.08M
        }
2362
2363
2.08M
        if(0 != sao_ht_chroma)
2364
1.72M
        {
2365
1.72M
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2366
285k
            {
2367
285k
                if(0 == ps_sao->b3_cb_type_idx)
2368
249k
                {
2369
4.89M
                    for(row = 0; row < sao_ht_chroma; row++)
2370
4.64M
                    {
2371
4.64M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2372
4.64M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2373
4.64M
                    }
2374
249k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2375
249k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2376
2377
249k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2378
249k
                }
2379
2380
36.6k
                else if(1 == ps_sao->b3_cb_type_idx)
2381
17.9k
                {
2382
17.9k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2383
17.9k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2384
17.9k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2385
17.9k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2386
2387
17.9k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2388
17.9k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2389
17.9k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2390
17.9k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2391
2392
17.9k
                    if(chroma_yuv420sp_vu)
2393
4.99k
                    {
2394
4.99k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2395
4.99k
                                                                                    src_strd,
2396
4.99k
                                                                                    pu1_src_left_chroma,
2397
4.99k
                                                                                    pu1_src_top_chroma,
2398
4.99k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2399
4.99k
                                                                                    ps_sao->b5_cr_band_pos,
2400
4.99k
                                                                                    ps_sao->b5_cb_band_pos,
2401
4.99k
                                                                                    ai1_offset_cr,
2402
4.99k
                                                                                    ai1_offset_cb,
2403
4.99k
                                                                                    sao_wd_chroma,
2404
4.99k
                                                                                    sao_ht_chroma
2405
4.99k
                                                                                   );
2406
4.99k
                    }
2407
12.9k
                    else
2408
12.9k
                    {
2409
12.9k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2410
12.9k
                                                                                    src_strd,
2411
12.9k
                                                                                    pu1_src_left_chroma,
2412
12.9k
                                                                                    pu1_src_top_chroma,
2413
12.9k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2414
12.9k
                                                                                    ps_sao->b5_cb_band_pos,
2415
12.9k
                                                                                    ps_sao->b5_cr_band_pos,
2416
12.9k
                                                                                    ai1_offset_cb,
2417
12.9k
                                                                                    ai1_offset_cr,
2418
12.9k
                                                                                    sao_wd_chroma,
2419
12.9k
                                                                                    sao_ht_chroma
2420
12.9k
                                                                                   );
2421
12.9k
                    }
2422
17.9k
                }
2423
2424
18.7k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2425
18.7k
                {
2426
18.7k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2427
18.7k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2428
18.7k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2429
18.7k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2430
2431
18.7k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2432
18.7k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2433
18.7k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2434
18.7k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2435
2436
169k
                    for(i = 0; i < 8; i++)
2437
150k
                    {
2438
150k
                        au1_avail_chroma[i] = 255;
2439
150k
                        au1_tile_slice_boundary[i] = 0;
2440
150k
                        au4_idx_l[i] = 0;
2441
150k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2442
150k
                    }
2443
                    /*In case of slices*/
2444
18.7k
                    {
2445
18.7k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2446
9.49k
                        {
2447
9.49k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2448
9.49k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2449
2450
9.49k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2451
9.49k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2452
2453
9.49k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2454
9.49k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2455
2456
9.49k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2457
9.49k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2458
2459
9.49k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2460
9.49k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2461
2462
9.49k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2463
1.90k
                            {
2464
1.90k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2465
97
                                {
2466
97
                                    au4_idx_l[2] = -1;
2467
97
                                    au4_idx_l[4] = -1;
2468
97
                                    au4_idx_l[5] = -1;
2469
97
                                }
2470
1.81k
                                else
2471
1.81k
                                {
2472
1.81k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2473
1.81k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2474
1.81k
                                }
2475
1.90k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2476
1.90k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2477
1.90k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2478
1.90k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2479
2480
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2481
1.90k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2482
97
                                {
2483
97
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2484
97
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2485
97
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2486
97
                                }
2487
1.81k
                                else
2488
1.81k
                                {
2489
1.81k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2490
1.81k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2491
1.81k
                                }
2492
2493
1.90k
                                if(au4_idx_l[5] > idx_l)
2494
54
                                {
2495
54
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2496
54
                                }
2497
2498
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2499
1.90k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2500
1.90k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2501
1.90k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2502
                                /*
2503
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2504
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2505
                                 */
2506
17.1k
                                for(i = 0; i < 8; i++)
2507
15.2k
                                {
2508
                                    /*Sets the edges that lie on the slice/tile boundary*/
2509
15.2k
                                    if(au4_idx_l[i] != idx_l)
2510
3.47k
                                    {
2511
3.47k
                                        au1_tile_slice_boundary[i] = 1;
2512
3.47k
                                    }
2513
11.7k
                                    else
2514
11.7k
                                    {
2515
11.7k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2516
11.7k
                                    }
2517
15.2k
                                }
2518
                                /*Reset indices*/
2519
17.1k
                                for(i = 0; i < 8; i++)
2520
15.2k
                                {
2521
15.2k
                                    au4_idx_l[i] = 0;
2522
15.2k
                                }
2523
1.90k
                            }
2524
9.49k
                            if(ps_pps->i1_tiles_enabled_flag)
2525
7.74k
                            {
2526
                                /* Calculate availability flags at slice boundary */
2527
7.74k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2528
4.14k
                                {
2529
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2530
4.14k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2531
3.93k
                                    {
2532
3.93k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2533
1.19k
                                        {
2534
1.19k
                                            au4_idx_l[2] = -1;
2535
1.19k
                                            au4_idx_l[4] = -1;
2536
1.19k
                                            au4_idx_l[5] = -1;
2537
1.19k
                                        }
2538
2.74k
                                        else
2539
2.74k
                                        {
2540
2.74k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2541
2.74k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2542
2.74k
                                        }
2543
2544
3.93k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2545
3.93k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2546
3.93k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2547
3.93k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2548
2549
35.3k
                                        for(i = 0; i < 8; i++)
2550
31.4k
                                        {
2551
                                            /*Sets the edges that lie on the slice/tile boundary*/
2552
31.4k
                                            if(au4_idx_l[i] != idx_l)
2553
12.3k
                                            {
2554
12.3k
                                                au1_tile_slice_boundary[i] |= 1;
2555
12.3k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2556
12.3k
                                            }
2557
31.4k
                                        }
2558
3.93k
                                    }
2559
4.14k
                                }
2560
7.74k
                            }
2561
85.4k
                            for(i = 0; i < 8; i++)
2562
75.9k
                            {
2563
                                /*Sets the edges that lie on the slice/tile boundary*/
2564
75.9k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2565
12.8k
                                {
2566
12.8k
                                    au1_avail_chroma[i] = 0;
2567
12.8k
                                }
2568
75.9k
                            }
2569
9.49k
                        }
2570
18.7k
                    }
2571
18.7k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2572
36
                    {
2573
36
                        au1_avail_chroma[0] = 0;
2574
36
                        au1_avail_chroma[4] = 0;
2575
36
                        au1_avail_chroma[6] = 0;
2576
36
                    }
2577
2578
18.7k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2579
0
                    {
2580
0
                        au1_avail_chroma[1] = 0;
2581
0
                        au1_avail_chroma[5] = 0;
2582
0
                        au1_avail_chroma[7] = 0;
2583
0
                    }
2584
2585
18.7k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2586
3.66k
                    {
2587
3.66k
                        au1_avail_chroma[2] = 0;
2588
3.66k
                        au1_avail_chroma[4] = 0;
2589
3.66k
                        au1_avail_chroma[5] = 0;
2590
3.66k
                    }
2591
2592
18.7k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2593
2.19k
                    {
2594
2.19k
                        au1_avail_chroma[3] = 0;
2595
2.19k
                        au1_avail_chroma[6] = 0;
2596
2.19k
                        au1_avail_chroma[7] = 0;
2597
2.19k
                    }
2598
2599
18.7k
                    {
2600
18.7k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2601
18.7k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2602
18.7k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2603
18.7k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2604
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2605
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2606
18.7k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2607
166
                        {
2608
166
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2609
166
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2610
166
                        }
2611
2612
2613
18.7k
                        if(chroma_yuv420sp_vu)
2614
5.04k
                        {
2615
5.04k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2616
5.04k
                                                                                 src_strd,
2617
5.04k
                                                                                 pu1_src_left_chroma,
2618
5.04k
                                                                                 pu1_src_top_chroma,
2619
5.04k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2620
5.04k
                                                                                 au1_src_top_right,
2621
5.04k
                                                                                 au1_src_bot_left,
2622
5.04k
                                                                                 au1_avail_chroma,
2623
5.04k
                                                                                 ai1_offset_cr,
2624
5.04k
                                                                                 ai1_offset_cb,
2625
5.04k
                                                                                 sao_wd_chroma,
2626
5.04k
                                                                                 sao_ht_chroma);
2627
5.04k
                        }
2628
13.7k
                        else
2629
13.7k
                        {
2630
13.7k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2631
13.7k
                                                                                 src_strd,
2632
13.7k
                                                                                 pu1_src_left_chroma,
2633
13.7k
                                                                                 pu1_src_top_chroma,
2634
13.7k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2635
13.7k
                                                                                 au1_src_top_right,
2636
13.7k
                                                                                 au1_src_bot_left,
2637
13.7k
                                                                                 au1_avail_chroma,
2638
13.7k
                                                                                 ai1_offset_cb,
2639
13.7k
                                                                                 ai1_offset_cr,
2640
13.7k
                                                                                 sao_wd_chroma,
2641
13.7k
                                                                                 sao_ht_chroma);
2642
13.7k
                        }
2643
18.7k
                    }
2644
2645
18.7k
                }
2646
285k
            }
2647
1.43M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2648
463k
            {
2649
5.83M
                for(row = 0; row < sao_ht_chroma; row++)
2650
5.37M
                {
2651
5.37M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2652
5.37M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2653
5.37M
                }
2654
463k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2655
463k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2656
2657
463k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2658
463k
            }
2659
2660
1.72M
        }
2661
2.08M
        pu1_src_luma += sao_wd_luma;
2662
2.08M
        pu1_src_chroma += sao_wd_chroma;
2663
2.08M
        ps_sao += 1;
2664
2.08M
    }
2665
2666
2667
    /* Current CTB */
2668
2.21M
    {
2669
2.21M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2670
2.21M
        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2671
2.21M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2672
2.21M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2673
2.21M
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2674
2.21M
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2675
2.21M
        WORD32 au4_idx_c[8], idx_c;
2676
2677
2.21M
        WORD32 remaining_rows;
2678
2.21M
        WORD32 remaining_cols;
2679
2680
2.21M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2681
2.21M
        if(remaining_cols <= SAO_SHIFT_CTB)
2682
126k
        {
2683
126k
            sao_wd_luma += remaining_cols;
2684
126k
        }
2685
2.21M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2686
2.21M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2687
126k
        {
2688
126k
            sao_wd_chroma += remaining_cols;
2689
126k
        }
2690
2691
2.21M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2692
2.21M
        if(remaining_rows <= SAO_SHIFT_CTB)
2693
159k
        {
2694
159k
            sao_ht_luma += remaining_rows;
2695
159k
        }
2696
2.21M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2697
2.21M
        if(remaining_rows <= SAO_SHIFT_CTB)
2698
159k
        {
2699
159k
            sao_ht_chroma += remaining_rows;
2700
159k
        }
2701
2702
2.21M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2703
2.21M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704
2.21M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2705
2.21M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
2707
2.21M
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2708
2.21M
        {
2709
2.21M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2710
682k
            {
2711
682k
                if(0 == ps_sao->b3_y_type_idx)
2712
619k
                {
2713
                    /* Update left, top and top-left */
2714
16.5M
                    for(row = 0; row < sao_ht_luma; row++)
2715
15.9M
                    {
2716
15.9M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2717
15.9M
                    }
2718
619k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2719
2720
619k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2721
2722
619k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2723
2724
619k
                }
2725
2726
63.8k
                else if(1 == ps_sao->b3_y_type_idx)
2727
33.9k
                {
2728
33.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2729
33.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2730
33.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2731
33.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2732
2733
33.9k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2734
33.9k
                                                                              src_strd,
2735
33.9k
                                                                              pu1_src_left_luma,
2736
33.9k
                                                                              pu1_src_top_luma,
2737
33.9k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2738
33.9k
                                                                              ps_sao->b5_y_band_pos,
2739
33.9k
                                                                              ai1_offset_y,
2740
33.9k
                                                                              sao_wd_luma,
2741
33.9k
                                                                              sao_ht_luma
2742
33.9k
                                                                             );
2743
33.9k
                }
2744
2745
29.9k
                else // if(2 <= ps_sao->b3_y_type_idx)
2746
29.9k
                {
2747
29.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2748
29.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2749
29.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2750
29.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2751
2752
269k
                    for(i = 0; i < 8; i++)
2753
239k
                    {
2754
239k
                        au1_avail_luma[i] = 255;
2755
239k
                        au1_tile_slice_boundary[i] = 0;
2756
239k
                        au4_idx_c[i] = 0;
2757
239k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2758
239k
                    }
2759
                    /******************************************************************
2760
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2761
                     *
2762
                     *
2763
                     *          ____________
2764
                     *         |    |       |
2765
                     *         |    | C_T   |
2766
                     *         |____|_______|____
2767
                     *         |    |       |    |
2768
                     *         | C_L|   C   | C_R|
2769
                     *         |____|_______|    |
2770
                     *              |  C_D       |
2771
                     *              |            |
2772
                     *              |____________|
2773
                     *
2774
                     *****************************************************************/
2775
2776
                    /*In case of slices*/
2777
29.9k
                    {
2778
29.9k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2779
14.5k
                        {
2780
14.5k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2781
14.5k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2782
2783
14.5k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2784
14.5k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2785
2786
14.5k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2787
14.5k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2788
2789
14.5k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2790
14.5k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2791
2792
14.5k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2793
14.5k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2794
2795
14.5k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2796
5.12k
                            {
2797
5.12k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2798
602
                                {
2799
602
                                    au4_idx_c[6] = -1;
2800
602
                                    au4_idx_c[0] = -1;
2801
602
                                    au4_idx_c[4] = -1;
2802
602
                                }
2803
4.51k
                                else
2804
4.51k
                                {
2805
4.51k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2806
4.51k
                                }
2807
2808
5.12k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2809
322
                                {
2810
322
                                    au4_idx_c[2] = -1;
2811
322
                                    au4_idx_c[5] = -1;
2812
322
                                    au4_idx_c[4] = -1;
2813
322
                                }
2814
4.79k
                                else
2815
4.79k
                                {
2816
4.79k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2817
4.79k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2818
4.79k
                                }
2819
5.12k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2820
5.12k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2821
5.12k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2822
2823
5.12k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2824
602
                                {
2825
602
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2826
602
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2827
602
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2828
602
                                }
2829
4.51k
                                else
2830
4.51k
                                {
2831
4.51k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2832
4.51k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2833
4.51k
                                }
2834
5.12k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2835
322
                                {
2836
322
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2837
322
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2838
322
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2839
322
                                }
2840
4.79k
                                else
2841
4.79k
                                {
2842
4.79k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2843
4.79k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2844
4.79k
                                }
2845
5.12k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2846
5.12k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2847
5.12k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
2849
5.12k
                                if(au4_idx_c[6] < idx_c)
2850
986
                                {
2851
986
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2852
986
                                }
2853
2854
                                /*
2855
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2856
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2857
                                 */
2858
46.0k
                                for(i = 0; i < 8; i++)
2859
40.9k
                                {
2860
                                    /*Sets the edges that lie on the slice/tile boundary*/
2861
40.9k
                                    if(au4_idx_c[i] != idx_c)
2862
10.9k
                                    {
2863
10.9k
                                        au1_tile_slice_boundary[i] = 1;
2864
10.9k
                                    }
2865
30.0k
                                    else
2866
30.0k
                                    {
2867
30.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2868
30.0k
                                    }
2869
40.9k
                                }
2870
                                /*Reset indices*/
2871
46.0k
                                for(i = 0; i < 8; i++)
2872
40.9k
                                {
2873
40.9k
                                    au4_idx_c[i] = 0;
2874
40.9k
                                }
2875
5.12k
                            }
2876
2877
14.5k
                            if(ps_pps->i1_tiles_enabled_flag)
2878
9.65k
                            {
2879
                                /* Calculate availability flags at slice boundary */
2880
9.65k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2881
4.85k
                                {
2882
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2883
4.85k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2884
4.75k
                                    {
2885
4.75k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2886
583
                                        {
2887
583
                                            au4_idx_c[6] = -1;
2888
583
                                            au4_idx_c[0] = -1;
2889
583
                                            au4_idx_c[4] = -1;
2890
583
                                        }
2891
4.17k
                                        else
2892
4.17k
                                        {
2893
4.17k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2894
4.17k
                                        }
2895
2896
4.75k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2897
1.36k
                                        {
2898
1.36k
                                            au4_idx_c[2] = -1;
2899
1.36k
                                            au4_idx_c[5] = -1;
2900
1.36k
                                            au4_idx_c[4] = -1;
2901
1.36k
                                        }
2902
3.38k
                                        else
2903
3.38k
                                        {
2904
3.38k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2905
3.38k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2906
3.38k
                                        }
2907
4.75k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2908
4.75k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2909
4.75k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2910
2911
42.7k
                                        for(i = 0; i < 8; i++)
2912
38.0k
                                        {
2913
                                            /*Sets the edges that lie on the slice/tile boundary*/
2914
38.0k
                                            if(au4_idx_c[i] != idx_c)
2915
16.4k
                                            {
2916
16.4k
                                                au1_tile_slice_boundary[i] |= 1;
2917
16.4k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2918
16.4k
                                            }
2919
38.0k
                                        }
2920
4.75k
                                    }
2921
4.85k
                                }
2922
9.65k
                            }
2923
2924
131k
                            for(i = 0; i < 8; i++)
2925
116k
                            {
2926
                                /*Sets the edges that lie on the slice/tile boundary*/
2927
116k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2928
18.2k
                                {
2929
18.2k
                                    au1_avail_luma[i] = 0;
2930
18.2k
                                }
2931
116k
                            }
2932
2933
14.5k
                        }
2934
29.9k
                    }
2935
29.9k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2936
3.88k
                    {
2937
3.88k
                        au1_avail_luma[0] = 0;
2938
3.88k
                        au1_avail_luma[4] = 0;
2939
3.88k
                        au1_avail_luma[6] = 0;
2940
3.88k
                    }
2941
2942
29.9k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2943
2.98k
                    {
2944
2.98k
                        au1_avail_luma[1] = 0;
2945
2.98k
                        au1_avail_luma[5] = 0;
2946
2.98k
                        au1_avail_luma[7] = 0;
2947
2.98k
                    }
2948
2949
29.9k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2950
6.85k
                    {
2951
6.85k
                        au1_avail_luma[2] = 0;
2952
6.85k
                        au1_avail_luma[4] = 0;
2953
6.85k
                        au1_avail_luma[5] = 0;
2954
6.85k
                    }
2955
2956
29.9k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2957
3.31k
                    {
2958
3.31k
                        au1_avail_luma[3] = 0;
2959
3.31k
                        au1_avail_luma[6] = 0;
2960
3.31k
                        au1_avail_luma[7] = 0;
2961
3.31k
                    }
2962
2963
29.9k
                    {
2964
29.9k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2965
29.9k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2966
2967
29.9k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2968
29.9k
                                                                          src_strd,
2969
29.9k
                                                                          pu1_src_left_luma,
2970
29.9k
                                                                          pu1_src_top_luma,
2971
29.9k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2972
29.9k
                                                                          au1_src_top_right,
2973
29.9k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2974
29.9k
                                                                          au1_avail_luma,
2975
29.9k
                                                                          ai1_offset_y,
2976
29.9k
                                                                          sao_wd_luma,
2977
29.9k
                                                                          sao_ht_luma);
2978
29.9k
                    }
2979
29.9k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2980
29.9k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2981
29.9k
                }
2982
682k
            }
2983
1.52M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2984
442k
            {
2985
                /* Update left, top and top-left */
2986
12.3M
                for(row = 0; row < sao_ht_luma; row++)
2987
11.8M
                {
2988
11.8M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2989
11.8M
                }
2990
442k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2991
2992
442k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2993
2994
442k
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2995
442k
            }
2996
2.21M
        }
2997
2998
2.21M
        if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2999
1.83M
        {
3000
1.83M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3001
313k
            {
3002
313k
                if(0 == ps_sao->b3_cb_type_idx)
3003
272k
                {
3004
5.28M
                    for(row = 0; row < sao_ht_chroma; row++)
3005
5.00M
                    {
3006
5.00M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3007
5.00M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3008
5.00M
                    }
3009
272k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3010
272k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3011
3012
272k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3013
3014
272k
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3015
272k
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3016
272k
                }
3017
3018
40.9k
                else if(1 == ps_sao->b3_cb_type_idx)
3019
19.8k
                {
3020
19.8k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3021
19.8k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3022
19.8k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3023
19.8k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3024
3025
19.8k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3026
19.8k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3027
19.8k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3028
19.8k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3029
3030
19.8k
                    if(chroma_yuv420sp_vu)
3031
5.30k
                    {
3032
5.30k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3033
5.30k
                                                                                    src_strd,
3034
5.30k
                                                                                    pu1_src_left_chroma,
3035
5.30k
                                                                                    pu1_src_top_chroma,
3036
5.30k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3037
5.30k
                                                                                    ps_sao->b5_cr_band_pos,
3038
5.30k
                                                                                    ps_sao->b5_cb_band_pos,
3039
5.30k
                                                                                    ai1_offset_cr,
3040
5.30k
                                                                                    ai1_offset_cb,
3041
5.30k
                                                                                    sao_wd_chroma,
3042
5.30k
                                                                                    sao_ht_chroma
3043
5.30k
                                                                                   );
3044
5.30k
                    }
3045
14.5k
                    else
3046
14.5k
                    {
3047
14.5k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3048
14.5k
                                                                                    src_strd,
3049
14.5k
                                                                                    pu1_src_left_chroma,
3050
14.5k
                                                                                    pu1_src_top_chroma,
3051
14.5k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3052
14.5k
                                                                                    ps_sao->b5_cb_band_pos,
3053
14.5k
                                                                                    ps_sao->b5_cr_band_pos,
3054
14.5k
                                                                                    ai1_offset_cb,
3055
14.5k
                                                                                    ai1_offset_cr,
3056
14.5k
                                                                                    sao_wd_chroma,
3057
14.5k
                                                                                    sao_ht_chroma
3058
14.5k
                                                                                   );
3059
14.5k
                    }
3060
19.8k
                }
3061
3062
21.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
3063
21.1k
                {
3064
21.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3065
21.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3066
21.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3067
21.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3068
3069
21.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3070
21.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3071
21.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3072
21.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3073
3074
190k
                    for(i = 0; i < 8; i++)
3075
168k
                    {
3076
168k
                        au1_avail_chroma[i] = 255;
3077
168k
                        au1_tile_slice_boundary[i] = 0;
3078
168k
                        au4_idx_c[i] = 0;
3079
168k
                        au4_ilf_across_tile_slice_enable[i] = 1;
3080
168k
                    }
3081
21.1k
                    {
3082
21.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3083
10.2k
                        {
3084
10.2k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3085
10.2k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3086
3087
10.2k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3088
10.2k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3089
3090
10.2k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3091
10.2k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3092
3093
10.2k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3094
10.2k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3095
3096
10.2k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
3097
10.2k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
3098
3099
10.2k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3100
1.83k
                            {
3101
1.83k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3102
204
                                {
3103
204
                                    au4_idx_c[0] = -1;
3104
204
                                    au4_idx_c[4] = -1;
3105
204
                                    au4_idx_c[6] = -1;
3106
204
                                }
3107
1.63k
                                else
3108
1.63k
                                {
3109
1.63k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3110
1.63k
                                }
3111
3112
1.83k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3113
106
                                {
3114
106
                                    au4_idx_c[2] = -1;
3115
106
                                    au4_idx_c[4] = -1;
3116
106
                                    au4_idx_c[5] = -1;
3117
106
                                }
3118
1.73k
                                else
3119
1.73k
                                {
3120
1.73k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3121
1.73k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3122
1.73k
                                }
3123
1.83k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3124
1.83k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3125
1.83k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3126
3127
1.83k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3128
204
                                {
3129
204
                                    au4_ilf_across_tile_slice_enable[0] = 0;
3130
204
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3131
204
                                    au4_ilf_across_tile_slice_enable[6] = 0;
3132
204
                                }
3133
1.63k
                                else
3134
1.63k
                                {
3135
1.63k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3136
1.63k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3137
1.63k
                                }
3138
3139
1.83k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3140
106
                                {
3141
106
                                    au4_ilf_across_tile_slice_enable[2] = 0;
3142
106
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3143
106
                                    au4_ilf_across_tile_slice_enable[5] = 0;
3144
106
                                }
3145
1.73k
                                else
3146
1.73k
                                {
3147
1.73k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3148
1.73k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3149
1.73k
                                }
3150
3151
1.83k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3152
1.83k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3153
1.83k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
3155
1.83k
                                if(idx_c > au4_idx_c[6])
3156
316
                                {
3157
316
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3158
316
                                }
3159
3160
                                /*
3161
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3162
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3163
                                 */
3164
16.5k
                                for(i = 0; i < 8; i++)
3165
14.7k
                                {
3166
                                    /*Sets the edges that lie on the slice/tile boundary*/
3167
14.7k
                                    if(au4_idx_c[i] != idx_c)
3168
3.85k
                                    {
3169
3.85k
                                        au1_tile_slice_boundary[i] = 1;
3170
3.85k
                                    }
3171
10.8k
                                    else
3172
10.8k
                                    {
3173
10.8k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
3174
10.8k
                                    }
3175
14.7k
                                }
3176
                                /*Reset indices*/
3177
16.5k
                                for(i = 0; i < 8; i++)
3178
14.7k
                                {
3179
14.7k
                                    au4_idx_c[i] = 0;
3180
14.7k
                                }
3181
1.83k
                            }
3182
3183
10.2k
                            if(ps_pps->i1_tiles_enabled_flag)
3184
8.54k
                            {
3185
                                /* Calculate availability flags at slice boundary */
3186
8.54k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3187
3.67k
                                {
3188
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3189
3.67k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3190
3.56k
                                    {
3191
3.56k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3192
527
                                        {
3193
527
                                            au4_idx_c[6] = -1;
3194
527
                                            au4_idx_c[0] = -1;
3195
527
                                            au4_idx_c[4] = -1;
3196
527
                                        }
3197
3.04k
                                        else
3198
3.04k
                                        {
3199
3.04k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3200
3.04k
                                        }
3201
3202
3.56k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3203
1.07k
                                        {
3204
1.07k
                                            au4_idx_c[2] = -1;
3205
1.07k
                                            au4_idx_c[5] = -1;
3206
1.07k
                                            au4_idx_c[4] = -1;
3207
1.07k
                                        }
3208
2.49k
                                        else
3209
2.49k
                                        {
3210
2.49k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3211
2.49k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3212
2.49k
                                        }
3213
3.56k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3214
3.56k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3215
3.56k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3216
3217
32.1k
                                        for(i = 0; i < 8; i++)
3218
28.5k
                                        {
3219
                                            /*Sets the edges that lie on the slice/tile boundary*/
3220
28.5k
                                            if(au4_idx_c[i] != idx_c)
3221
11.4k
                                            {
3222
11.4k
                                                au1_tile_slice_boundary[i] |= 1;
3223
11.4k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3224
11.4k
                                            }
3225
28.5k
                                        }
3226
3.56k
                                    }
3227
3.67k
                                }
3228
8.54k
                            }
3229
3230
92.5k
                            for(i = 0; i < 8; i++)
3231
82.2k
                            {
3232
                                /*Sets the edges that lie on the slice/tile boundary*/
3233
82.2k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3234
12.1k
                                {
3235
12.1k
                                    au1_avail_chroma[i] = 0;
3236
12.1k
                                }
3237
82.2k
                            }
3238
10.2k
                        }
3239
21.1k
                    }
3240
3241
21.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
3242
3.76k
                    {
3243
3.76k
                        au1_avail_chroma[0] = 0;
3244
3.76k
                        au1_avail_chroma[4] = 0;
3245
3.76k
                        au1_avail_chroma[6] = 0;
3246
3.76k
                    }
3247
3248
21.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3249
2.51k
                    {
3250
2.51k
                        au1_avail_chroma[1] = 0;
3251
2.51k
                        au1_avail_chroma[5] = 0;
3252
2.51k
                        au1_avail_chroma[7] = 0;
3253
2.51k
                    }
3254
3255
21.1k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
3256
4.43k
                    {
3257
4.43k
                        au1_avail_chroma[2] = 0;
3258
4.43k
                        au1_avail_chroma[4] = 0;
3259
4.43k
                        au1_avail_chroma[5] = 0;
3260
4.43k
                    }
3261
3262
21.1k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3263
2.66k
                    {
3264
2.66k
                        au1_avail_chroma[3] = 0;
3265
2.66k
                        au1_avail_chroma[6] = 0;
3266
2.66k
                        au1_avail_chroma[7] = 0;
3267
2.66k
                    }
3268
3269
21.1k
                    {
3270
21.1k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3271
21.1k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3272
3273
21.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3274
21.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3275
3276
21.1k
                        if(chroma_yuv420sp_vu)
3277
5.53k
                        {
3278
5.53k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3279
5.53k
                                                                                 src_strd,
3280
5.53k
                                                                                 pu1_src_left_chroma,
3281
5.53k
                                                                                 pu1_src_top_chroma,
3282
5.53k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3283
5.53k
                                                                                 au1_src_top_right,
3284
5.53k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3285
5.53k
                                                                                 au1_avail_chroma,
3286
5.53k
                                                                                 ai1_offset_cr,
3287
5.53k
                                                                                 ai1_offset_cb,
3288
5.53k
                                                                                 sao_wd_chroma,
3289
5.53k
                                                                                 sao_ht_chroma);
3290
5.53k
                        }
3291
15.5k
                        else
3292
15.5k
                        {
3293
15.5k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3294
15.5k
                                                                                 src_strd,
3295
15.5k
                                                                                 pu1_src_left_chroma,
3296
15.5k
                                                                                 pu1_src_top_chroma,
3297
15.5k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3298
15.5k
                                                                                 au1_src_top_right,
3299
15.5k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3300
15.5k
                                                                                 au1_avail_chroma,
3301
15.5k
                                                                                 ai1_offset_cb,
3302
15.5k
                                                                                 ai1_offset_cr,
3303
15.5k
                                                                                 sao_wd_chroma,
3304
15.5k
                                                                                 sao_ht_chroma);
3305
15.5k
                        }
3306
21.1k
                    }
3307
3308
21.1k
                }
3309
313k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3310
313k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3311
3312
313k
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3313
313k
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3314
313k
            }
3315
1.52M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3316
483k
            {
3317
6.14M
                for(row = 0; row < sao_ht_chroma; row++)
3318
5.66M
                {
3319
5.66M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3320
5.66M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3321
5.66M
                }
3322
483k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3323
483k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3324
3325
483k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3326
3327
483k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3328
483k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3329
483k
            }
3330
3331
1.83M
        }
3332
2.21M
    }
3333
3334
3335
3336
3337
/* If no loop filter is enabled copy the backed up values */
3338
2.21M
    {
3339
        /* Luma */
3340
2.21M
        if(no_loop_filter_enabled_luma)
3341
41.8k
        {
3342
41.8k
            UWORD32 u4_no_loop_filter_flag;
3343
41.8k
            WORD32 loop_filter_bit_pos;
3344
41.8k
            WORD32 log2_min_cu = 3;
3345
41.8k
            WORD32 min_cu = (1 << log2_min_cu);
3346
41.8k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3347
41.8k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3348
41.8k
            WORD32 sao_blk_wd = ctb_size;
3349
41.8k
            WORD32 remaining_rows;
3350
41.8k
            WORD32 remaining_cols;
3351
3352
41.8k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3353
41.8k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354
41.8k
            if(remaining_rows <= SAO_SHIFT_CTB)
3355
4.53k
                sao_blk_ht += remaining_rows;
3356
41.8k
            if(remaining_cols <= SAO_SHIFT_CTB)
3357
2.17k
                sao_blk_wd += remaining_cols;
3358
3359
41.8k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3360
41.8k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3361
3362
41.8k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3363
3364
41.8k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3365
41.8k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3366
41.8k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3367
38.2k
                loop_filter_bit_pos -= 1;
3368
3369
41.8k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3370
41.8k
                            (loop_filter_bit_pos >> 3);
3371
3372
41.8k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3373
340k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3374
299k
            {
3375
299k
                WORD32 tmp_wd = sao_blk_wd;
3376
3377
299k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3378
299k
                                (loop_filter_bit_pos & 7);
3379
299k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3380
3381
299k
                if(u4_no_loop_filter_flag)
3382
264k
                {
3383
579k
                    while(tmp_wd > 0)
3384
315k
                    {
3385
315k
                        if(CTZ(u4_no_loop_filter_flag))
3386
45.5k
                        {
3387
45.5k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3388
45.5k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389
45.5k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3390
45.5k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3391
45.5k
                        }
3392
269k
                        else
3393
269k
                        {
3394
2.39M
                            for(row = 0; row < min_cu; row++)
3395
2.12M
                            {
3396
108M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3397
106M
                                {
3398
106M
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3399
106M
                                }
3400
2.12M
                            }
3401
269k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3402
269k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403
269k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3404
269k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3405
269k
                        }
3406
315k
                    }
3407
3408
264k
                    pu1_src_tmp_luma -= sao_blk_wd;
3409
264k
                    pu1_src_backup_luma -= sao_blk_wd;
3410
264k
                }
3411
3412
299k
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3413
299k
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3414
299k
            }
3415
41.8k
        }
3416
3417
        /* Chroma */
3418
2.21M
        if(no_loop_filter_enabled_chroma)
3419
41.3k
        {
3420
41.3k
            UWORD32 u4_no_loop_filter_flag;
3421
41.3k
            WORD32 loop_filter_bit_pos;
3422
41.3k
            WORD32 log2_min_cu = 3;
3423
41.3k
            WORD32 min_cu = (1 << log2_min_cu);
3424
41.3k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3425
41.3k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3426
41.3k
            WORD32 sao_blk_wd = ctb_size;
3427
41.3k
            WORD32 remaining_rows;
3428
41.3k
            WORD32 remaining_cols;
3429
3430
41.3k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3431
41.3k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432
41.3k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3433
4.53k
                sao_blk_ht += remaining_rows;
3434
41.3k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3435
2.15k
                sao_blk_wd += remaining_cols;
3436
3437
41.3k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3438
41.3k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3439
3440
41.3k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3441
3442
41.3k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3443
41.3k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3444
41.3k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3445
37.8k
                loop_filter_bit_pos -= 2;
3446
3447
41.3k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3448
41.3k
                            (loop_filter_bit_pos >> 3);
3449
3450
41.3k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3451
336k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3452
295k
            {
3453
295k
                WORD32 tmp_wd = sao_blk_wd;
3454
3455
295k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3456
295k
                                (loop_filter_bit_pos & 7);
3457
295k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3458
3459
295k
                if(u4_no_loop_filter_flag)
3460
263k
                {
3461
576k
                    while(tmp_wd > 0)
3462
313k
                    {
3463
313k
                        if(CTZ(u4_no_loop_filter_flag))
3464
44.6k
                        {
3465
44.6k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3466
44.6k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467
44.6k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3468
44.6k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3469
44.6k
                        }
3470
268k
                        else
3471
268k
                        {
3472
1.33M
                            for(row = 0; row < min_cu / 2; row++)
3473
1.06M
                            {
3474
58.4M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3475
57.3M
                                {
3476
57.3M
                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3477
57.3M
                                }
3478
1.06M
                            }
3479
3480
268k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3481
268k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482
268k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3483
268k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3484
268k
                        }
3485
313k
                    }
3486
3487
263k
                    pu1_src_tmp_chroma -= sao_blk_wd;
3488
263k
                    pu1_src_backup_chroma -= sao_blk_wd;
3489
263k
                }
3490
3491
295k
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3492
295k
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3493
295k
            }
3494
41.3k
        }
3495
2.21M
    }
3496
3497
2.21M
}
3498