Coverage Report

Created: 2025-12-14 06:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
203M
#define SAO_SHIFT_CTB    8
79
80
/**
81
 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82
 */
83
void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84
0
{
85
0
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86
0
    UWORD8 *pu1_src_luma;
87
0
    UWORD8 *pu1_src_chroma;
88
0
    WORD32 src_strd;
89
0
    WORD32 ctb_size;
90
0
    WORD32 log2_ctb_size;
91
0
    sps_t *ps_sps;
92
0
    sao_t *ps_sao;
93
0
    WORD32 row, col;
94
0
    UWORD8 au1_avail_luma[8];
95
0
    UWORD8 au1_avail_chroma[8];
96
0
    WORD32 i;
97
0
    UWORD8 *pu1_src_top_luma;
98
0
    UWORD8 *pu1_src_top_chroma;
99
0
    UWORD8 *pu1_src_left_luma;
100
0
    UWORD8 *pu1_src_left_chroma;
101
0
    UWORD8 au1_src_top_right[2];
102
0
    UWORD8 au1_src_bot_left[2];
103
0
    UWORD8 *pu1_no_loop_filter_flag;
104
0
    WORD32 loop_filter_strd;
105
106
    /* Only first 5 values are used, but arrays are large
107
     enough so that SIMD functions can read 64 bits at a time */
108
0
    WORD8 ai1_offset_y[8] = {0};
109
0
    WORD8 ai1_offset_cb[8] = {0};
110
0
    WORD8 ai1_offset_cr[8] = {0};
111
112
0
    PROFILE_DISABLE_SAO();
113
114
0
    ps_sps = ps_sao_ctxt->ps_sps;
115
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
116
0
    ctb_size = (1 << log2_ctb_size);
117
0
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118
0
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119
0
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121
0
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122
0
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124
    /* Current CTB */
125
0
    {
126
0
        WORD32 sao_wd_luma;
127
0
        WORD32 sao_wd_chroma;
128
0
        WORD32 sao_ht_luma;
129
0
        WORD32 sao_ht_chroma;
130
131
0
        WORD32 remaining_rows;
132
0
        WORD32 remaining_cols;
133
134
0
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135
0
        sao_wd_luma = MIN(ctb_size, remaining_cols);
136
0
        sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138
0
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139
0
        sao_ht_luma = MIN(ctb_size, remaining_rows);
140
0
        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142
0
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143
0
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144
0
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145
0
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147
0
        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148
0
                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149
0
                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151
0
        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152
0
        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153
0
        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154
0
        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156
0
        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157
0
        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158
0
        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159
0
        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161
0
        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162
0
        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163
0
        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164
0
        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166
0
        for(i = 0; i < 8; i++)
167
0
        {
168
0
            au1_avail_luma[i] = 255;
169
0
            au1_avail_chroma[i] = 255;
170
0
        }
171
172
173
0
        if(0 == ps_sao_ctxt->i4_ctb_x)
174
0
        {
175
0
            au1_avail_luma[0] = 0;
176
0
            au1_avail_luma[4] = 0;
177
0
            au1_avail_luma[6] = 0;
178
179
0
            au1_avail_chroma[0] = 0;
180
0
            au1_avail_chroma[4] = 0;
181
0
            au1_avail_chroma[6] = 0;
182
0
        }
183
184
0
        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185
0
        {
186
0
            au1_avail_luma[1] = 0;
187
0
            au1_avail_luma[5] = 0;
188
0
            au1_avail_luma[7] = 0;
189
190
0
            au1_avail_chroma[1] = 0;
191
0
            au1_avail_chroma[5] = 0;
192
0
            au1_avail_chroma[7] = 0;
193
0
        }
194
195
0
        if(0 == ps_sao_ctxt->i4_ctb_y)
196
0
        {
197
0
            au1_avail_luma[2] = 0;
198
0
            au1_avail_luma[4] = 0;
199
0
            au1_avail_luma[5] = 0;
200
201
0
            au1_avail_chroma[2] = 0;
202
0
            au1_avail_chroma[4] = 0;
203
0
            au1_avail_chroma[5] = 0;
204
0
        }
205
206
0
        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207
0
        {
208
0
            au1_avail_luma[3] = 0;
209
0
            au1_avail_luma[6] = 0;
210
0
            au1_avail_luma[7] = 0;
211
212
0
            au1_avail_chroma[3] = 0;
213
0
            au1_avail_chroma[6] = 0;
214
0
            au1_avail_chroma[7] = 0;
215
0
        }
216
217
218
0
        if(0 == ps_sao->b3_y_type_idx)
219
0
        {
220
            /* Update left, top and top-left */
221
0
            for(row = 0; row < sao_ht_luma; row++)
222
0
            {
223
0
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224
0
            }
225
0
            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229
0
        }
230
0
        else
231
0
        {
232
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235
0
            WORD32 no_loop_filter_enabled = 0;
236
237
            /* Check the loop filter flags and copy the original values for back up */
238
0
            {
239
0
                UWORD32 u4_no_loop_filter_flag;
240
0
                WORD32 min_cu = 8;
241
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244
0
                {
245
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246
0
                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249
0
                    if(u4_no_loop_filter_flag)
250
0
                    {
251
0
                        WORD32 tmp_wd = sao_wd_luma;
252
0
                        no_loop_filter_enabled = 1;
253
0
                        while(tmp_wd > 0)
254
0
                        {
255
0
                            if(CTZ(u4_no_loop_filter_flag))
256
0
                            {
257
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260
0
                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261
0
                            }
262
0
                            else
263
0
                            {
264
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265
0
                                {
266
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267
0
                                    {
268
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269
0
                                    }
270
0
                                }
271
272
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276
0
                            }
277
0
                        }
278
279
0
                        pu1_src_tmp -= sao_wd_luma;
280
0
                    }
281
282
0
                    pu1_src_tmp += min_cu * src_strd;
283
0
                    pu1_src_copy += min_cu * tmp_strd;
284
0
                }
285
0
            }
286
287
0
            if(1 == ps_sao->b3_y_type_idx)
288
0
            {
289
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290
0
                                                                          src_strd,
291
0
                                                                          pu1_src_left_luma,
292
0
                                                                          pu1_src_top_luma,
293
0
                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294
0
                                                                          ps_sao->b5_y_band_pos,
295
0
                                                                          ai1_offset_y,
296
0
                                                                          sao_wd_luma,
297
0
                                                                          sao_ht_luma);
298
0
            }
299
0
            else // if(2 <= ps_sao->b3_y_type_idx)
300
0
            {
301
0
                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302
0
                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303
0
                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304
0
                                                                  src_strd,
305
0
                                                                  pu1_src_left_luma,
306
0
                                                                  pu1_src_top_luma,
307
0
                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308
0
                                                                  au1_src_top_right,
309
0
                                                                  au1_src_bot_left,
310
0
                                                                  au1_avail_luma,
311
0
                                                                  ai1_offset_y,
312
0
                                                                  sao_wd_luma,
313
0
                                                                  sao_ht_luma);
314
0
            }
315
316
            /* Check the loop filter flags and copy the original values back if they are set */
317
0
            if(no_loop_filter_enabled)
318
0
            {
319
0
                UWORD32 u4_no_loop_filter_flag;
320
0
                WORD32 min_cu = 8;
321
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324
0
                {
325
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328
0
                    if(u4_no_loop_filter_flag)
329
0
                    {
330
0
                        WORD32 tmp_wd = sao_wd_luma;
331
0
                        while(tmp_wd > 0)
332
0
                        {
333
0
                            if(CTZ(u4_no_loop_filter_flag))
334
0
                            {
335
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339
0
                            }
340
0
                            else
341
0
                            {
342
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343
0
                                {
344
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345
0
                                    {
346
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347
0
                                    }
348
0
                                }
349
350
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354
0
                            }
355
0
                        }
356
357
0
                        pu1_src_tmp -= sao_wd_luma;
358
0
                    }
359
360
0
                    pu1_src_tmp += min_cu * src_strd;
361
0
                    pu1_src_copy += min_cu * tmp_strd;
362
0
                }
363
0
            }
364
365
0
        }
366
367
0
        if(0 == ps_sao->b3_cb_type_idx)
368
0
        {
369
0
            for(row = 0; row < sao_ht_chroma; row++)
370
0
            {
371
0
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372
0
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373
0
            }
374
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378
0
        }
379
0
        else
380
0
        {
381
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384
0
            WORD32 no_loop_filter_enabled = 0;
385
386
            /* Check the loop filter flags and copy the original values for back up */
387
0
            {
388
0
                UWORD32 u4_no_loop_filter_flag;
389
0
                WORD32 min_cu = 4;
390
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393
0
                {
394
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397
0
                    if(u4_no_loop_filter_flag)
398
0
                    {
399
0
                        WORD32 tmp_wd = sao_wd_chroma;
400
0
                        no_loop_filter_enabled = 1;
401
0
                        while(tmp_wd > 0)
402
0
                        {
403
0
                            if(CTZ(u4_no_loop_filter_flag))
404
0
                            {
405
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409
0
                            }
410
0
                            else
411
0
                            {
412
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413
0
                                {
414
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415
0
                                    {
416
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417
0
                                    }
418
0
                                }
419
420
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424
0
                            }
425
0
                        }
426
427
0
                        pu1_src_tmp -= sao_wd_chroma;
428
0
                    }
429
430
0
                    pu1_src_tmp += min_cu * src_strd;
431
0
                    pu1_src_copy += min_cu * tmp_strd;
432
0
                }
433
0
            }
434
435
0
            if(1 == ps_sao->b3_cb_type_idx)
436
0
            {
437
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438
0
                                                                            src_strd,
439
0
                                                                            pu1_src_left_chroma,
440
0
                                                                            pu1_src_top_chroma,
441
0
                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442
0
                                                                            ps_sao->b5_cb_band_pos,
443
0
                                                                            ps_sao->b5_cr_band_pos,
444
0
                                                                            ai1_offset_cb,
445
0
                                                                            ai1_offset_cr,
446
0
                                                                            sao_wd_chroma,
447
0
                                                                            sao_ht_chroma
448
0
                                                                           );
449
0
            }
450
0
            else // if(2 <= ps_sao->b3_cb_type_idx)
451
0
            {
452
0
                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453
0
                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454
0
                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455
0
                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456
0
                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457
0
                                                                     src_strd,
458
0
                                                                     pu1_src_left_chroma,
459
0
                                                                     pu1_src_top_chroma,
460
0
                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461
0
                                                                     au1_src_top_right,
462
0
                                                                     au1_src_bot_left,
463
0
                                                                     au1_avail_chroma,
464
0
                                                                     ai1_offset_cb,
465
0
                                                                     ai1_offset_cr,
466
0
                                                                     sao_wd_chroma,
467
0
                                                                     sao_ht_chroma);
468
0
            }
469
470
            /* Check the loop filter flags and copy the original values back if they are set */
471
0
            if(no_loop_filter_enabled)
472
0
            {
473
0
                UWORD32 u4_no_loop_filter_flag;
474
0
                WORD32 min_cu = 4;
475
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478
0
                {
479
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482
0
                    if(u4_no_loop_filter_flag)
483
0
                    {
484
0
                        WORD32 tmp_wd = sao_wd_chroma;
485
0
                        while(tmp_wd > 0)
486
0
                        {
487
0
                            if(CTZ(u4_no_loop_filter_flag))
488
0
                            {
489
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493
0
                            }
494
0
                            else
495
0
                            {
496
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497
0
                                {
498
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499
0
                                    {
500
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501
0
                                    }
502
0
                                }
503
504
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508
0
                            }
509
0
                        }
510
511
0
                        pu1_src_tmp -= sao_wd_chroma;
512
0
                    }
513
514
0
                    pu1_src_tmp += min_cu * src_strd;
515
0
                    pu1_src_copy += min_cu * tmp_strd;
516
0
                }
517
0
            }
518
519
0
        }
520
521
0
    }
522
0
}
523
524
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525
5.25M
{
526
5.25M
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527
5.25M
    UWORD8 *pu1_src_luma;
528
5.25M
    UWORD8 *pu1_src_chroma;
529
5.25M
    WORD32 src_strd;
530
5.25M
    WORD32 ctb_size;
531
5.25M
    WORD32 log2_ctb_size;
532
5.25M
    sps_t *ps_sps;
533
5.25M
    sao_t *ps_sao;
534
5.25M
    pps_t *ps_pps;
535
5.25M
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536
5.25M
    tile_t *ps_tile;
537
5.25M
    UWORD16 *pu1_slice_idx;
538
5.25M
    UWORD16 *pu1_tile_idx;
539
5.25M
    WORD32 row, col;
540
5.25M
    UWORD8 au1_avail_luma[8];
541
5.25M
    UWORD8 au1_avail_chroma[8];
542
5.25M
    UWORD8 au1_tile_slice_boundary[8];
543
5.25M
    UWORD8 au4_ilf_across_tile_slice_enable[8];
544
5.25M
    WORD32 i;
545
5.25M
    UWORD8 *pu1_src_top_luma;
546
5.25M
    UWORD8 *pu1_src_top_chroma;
547
5.25M
    UWORD8 *pu1_src_left_luma;
548
5.25M
    UWORD8 *pu1_src_left_chroma;
549
5.25M
    UWORD8 au1_src_top_right[2];
550
5.25M
    UWORD8 au1_src_bot_left[2];
551
5.25M
    UWORD8 *pu1_no_loop_filter_flag;
552
5.25M
    UWORD8 *pu1_src_backup_luma;
553
5.25M
    UWORD8 *pu1_src_backup_chroma;
554
5.25M
    WORD32 backup_strd;
555
5.25M
    WORD32 loop_filter_strd;
556
557
5.25M
    WORD32 no_loop_filter_enabled_luma = 0;
558
5.25M
    WORD32 no_loop_filter_enabled_chroma = 0;
559
5.25M
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560
5.25M
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561
5.25M
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
562
5.25M
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563
5.25M
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
564
5.25M
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565
5.25M
    UWORD8  u1_sao_src_top_left_luma_bot_left;
566
5.25M
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567
5.25M
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568
5.25M
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569
    /* Only first 5 values are used, but arrays are large
570
     enough so that SIMD functions can read 64 bits at a time */
571
5.25M
    WORD8 ai1_offset_y[8] = {0};
572
5.25M
    WORD8 ai1_offset_cb[8] = {0};
573
5.25M
    WORD8 ai1_offset_cr[8] = {0};
574
5.25M
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576
5.25M
    PROFILE_DISABLE_SAO();
577
578
5.25M
    ps_sps = ps_sao_ctxt->ps_sps;
579
5.25M
    ps_pps = ps_sao_ctxt->ps_pps;
580
5.25M
    ps_tile = ps_sao_ctxt->ps_tile;
581
582
5.25M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
583
5.25M
    ctb_size = (1 << log2_ctb_size);
584
5.25M
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585
5.25M
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586
5.25M
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588
5.25M
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589
5.25M
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590
5.25M
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591
5.25M
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593
    /*Stores the left value for each row ctbs- Needed for column tiles*/
594
5.25M
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595
5.25M
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596
5.25M
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597
5.25M
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598
5.25M
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599
5.25M
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600
5.25M
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601
5.25M
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602
5.25M
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603
5.25M
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605
5.25M
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606
5.25M
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607
5.25M
    backup_strd = 2 * MAX_CTB_SIZE;
608
609
5.25M
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611
5.25M
    {
612
        /* Check the loop filter flags and copy the original values for back up */
613
        /* Luma */
614
615
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616
         * can belong to different slice with their own sao_enable flag */
617
5.25M
        {
618
5.25M
            UWORD32 u4_no_loop_filter_flag;
619
5.25M
            WORD32 loop_filter_bit_pos;
620
5.25M
            WORD32 log2_min_cu = 3;
621
5.25M
            WORD32 min_cu = (1 << log2_min_cu);
622
5.25M
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623
5.25M
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624
5.25M
            WORD32 sao_blk_wd = ctb_size;
625
5.25M
            WORD32 remaining_rows;
626
5.25M
            WORD32 remaining_cols;
627
628
5.25M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629
5.25M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630
5.25M
            if(remaining_rows <= SAO_SHIFT_CTB)
631
487k
                sao_blk_ht += remaining_rows;
632
5.25M
            if(remaining_cols <= SAO_SHIFT_CTB)
633
190k
                sao_blk_wd += remaining_cols;
634
635
5.25M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636
5.25M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638
5.25M
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640
5.25M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641
5.25M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642
5.25M
            if(ps_sao_ctxt->i4_ctb_x > 0)
643
5.06M
                loop_filter_bit_pos -= 1;
644
645
5.25M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646
5.25M
                            (loop_filter_bit_pos >> 3);
647
648
5.25M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649
38.9M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650
33.6M
            {
651
33.6M
                WORD32 tmp_wd = sao_blk_wd;
652
653
33.6M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654
33.6M
                                (loop_filter_bit_pos & 7);
655
33.6M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657
33.6M
                if(u4_no_loop_filter_flag)
658
1.00M
                {
659
1.00M
                    no_loop_filter_enabled_luma = 1;
660
2.18M
                    while(tmp_wd > 0)
661
1.18M
                    {
662
1.18M
                        if(CTZ(u4_no_loop_filter_flag))
663
155k
                        {
664
155k
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665
155k
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666
155k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667
155k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668
155k
                        }
669
1.02M
                        else
670
1.02M
                        {
671
8.98M
                            for(row = 0; row < min_cu; row++)
672
7.95M
                            {
673
386M
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674
378M
                                {
675
378M
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676
378M
                                }
677
7.95M
                            }
678
1.02M
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679
1.02M
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680
1.02M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681
1.02M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682
1.02M
                        }
683
1.18M
                    }
684
685
1.00M
                    pu1_src_tmp_luma -= sao_blk_wd;
686
1.00M
                    pu1_src_backup_luma -= sao_blk_wd;
687
1.00M
                }
688
689
33.6M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
690
33.6M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
691
33.6M
            }
692
5.25M
        }
693
694
        /* Chroma */
695
696
5.25M
        if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
697
5.25M
        {
698
5.25M
            UWORD32 u4_no_loop_filter_flag;
699
5.25M
            WORD32 loop_filter_bit_pos;
700
5.25M
            WORD32 log2_min_cu = 3;
701
5.25M
            WORD32 min_cu = (1 << log2_min_cu);
702
5.25M
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
703
5.25M
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
704
5.25M
            WORD32 sao_blk_wd = ctb_size;
705
5.25M
            WORD32 remaining_rows;
706
5.25M
            WORD32 remaining_cols;
707
708
5.25M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709
5.25M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
710
5.25M
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
711
487k
                sao_blk_ht += remaining_rows;
712
5.25M
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
713
190k
                sao_blk_wd += remaining_cols;
714
715
5.25M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
716
5.25M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
717
718
5.25M
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
719
720
5.25M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
721
5.25M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
722
5.25M
            if(ps_sao_ctxt->i4_ctb_x > 0)
723
5.06M
                loop_filter_bit_pos -= 2;
724
725
5.25M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
726
5.25M
                            (loop_filter_bit_pos >> 3);
727
728
5.25M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
729
38.9M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
730
33.6M
            {
731
33.6M
                WORD32 tmp_wd = sao_blk_wd;
732
733
33.6M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
734
33.6M
                                (loop_filter_bit_pos & 7);
735
33.6M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
736
737
33.6M
                if(u4_no_loop_filter_flag)
738
1.00M
                {
739
1.00M
                    no_loop_filter_enabled_chroma = 1;
740
2.18M
                    while(tmp_wd > 0)
741
1.17M
                    {
742
1.17M
                        if(CTZ(u4_no_loop_filter_flag))
743
150k
                        {
744
150k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745
150k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
746
150k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
747
150k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
748
150k
                        }
749
1.02M
                        else
750
1.02M
                        {
751
5.07M
                            for(row = 0; row < min_cu / 2; row++)
752
4.05M
                            {
753
230M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
754
226M
                                {
755
226M
                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
756
226M
                                }
757
4.05M
                            }
758
759
1.02M
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760
1.02M
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
761
1.02M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
762
1.02M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
763
1.02M
                        }
764
1.17M
                    }
765
766
1.00M
                    pu1_src_tmp_chroma -= sao_blk_wd;
767
1.00M
                    pu1_src_backup_chroma -= sao_blk_wd;
768
1.00M
                }
769
770
33.6M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
771
33.6M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
772
33.6M
            }
773
5.25M
        }
774
5.25M
    }
775
776
5.25M
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
777
778
    /* Top-left CTB */
779
5.25M
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
780
4.59M
    {
781
4.59M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
782
4.59M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
783
4.59M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
784
4.59M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
785
786
4.59M
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
787
4.59M
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
788
4.59M
        WORD32 au4_idx_tl[8], idx_tl;
789
790
4.59M
        slice_header_t *ps_slice_hdr_top_left;
791
4.59M
        {
792
4.59M
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
793
4.59M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
794
4.59M
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
795
4.59M
        }
796
797
798
4.59M
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
799
4.59M
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
800
4.59M
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
801
4.59M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
802
4.59M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
803
4.59M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
804
4.59M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
805
806
4.59M
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
807
352k
        {
808
352k
            if(0 == ps_sao->b3_y_type_idx)
809
296k
            {
810
                /* Update left, top and top-left */
811
2.66M
                for(row = 0; row < sao_ht_luma; row++)
812
2.37M
                {
813
2.37M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
814
2.37M
                }
815
296k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
816
817
296k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
818
819
820
296k
            }
821
822
56.0k
            else if(1 == ps_sao->b3_y_type_idx)
823
27.5k
            {
824
27.5k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
825
27.5k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
826
27.5k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
827
27.5k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
828
829
27.5k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
830
27.5k
                                                                          src_strd,
831
27.5k
                                                                          pu1_src_left_luma,
832
27.5k
                                                                          pu1_src_top_luma,
833
27.5k
                                                                          pu1_sao_src_luma_top_left_ctb,
834
27.5k
                                                                          ps_sao->b5_y_band_pos,
835
27.5k
                                                                          ai1_offset_y,
836
27.5k
                                                                          sao_wd_luma,
837
27.5k
                                                                          sao_ht_luma
838
27.5k
                                                                         );
839
27.5k
            }
840
841
28.4k
            else // if(2 <= ps_sao->b3_y_type_idx)
842
28.4k
            {
843
28.4k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
844
28.4k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
845
28.4k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
846
28.4k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
847
848
256k
                for(i = 0; i < 8; i++)
849
227k
                {
850
227k
                    au1_avail_luma[i] = 255;
851
227k
                    au1_tile_slice_boundary[i] = 0;
852
227k
                    au4_idx_tl[i] = 0;
853
227k
                    au4_ilf_across_tile_slice_enable[i] = 1;
854
227k
                }
855
856
                /******************************************************************
857
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
858
                 *
859
                 *          TL_T
860
                 *       4  _2__5________
861
                 *     0   |    |       |
862
                 *    TL_L | TL | 1 TL_R|
863
                 *         |____|_______|____
864
                 *        6|TL_D|7      |    |
865
                 *         | 3  |       |    |
866
                 *         |____|_______|    |
867
                 *              |            |
868
                 *              |            |
869
                 *              |____________|
870
                 *
871
                 *****************************************************************/
872
873
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
874
28.4k
                {
875
28.4k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
876
15.8k
                    {
877
15.8k
                        {
878
                            /*Assuming that sao shift is uniform along x and y directions*/
879
15.8k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
880
0
                            {
881
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
882
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
883
0
                            }
884
15.8k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
885
15.8k
                            {
886
15.8k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
887
15.8k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
888
15.8k
                            }
889
15.8k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
890
15.8k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
891
892
15.8k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
893
15.8k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
894
895
15.8k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
896
15.8k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
897
898
15.8k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
899
15.8k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
900
15.8k
                        }
901
902
15.8k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
903
8.62k
                        {
904
                            /*Calculate slice indices for neighbor pixels*/
905
8.62k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
906
8.62k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
907
8.62k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
908
8.62k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
909
8.62k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
8.62k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
911
912
8.62k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
913
0
                            {
914
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
915
0
                                {
916
0
                                    au4_idx_tl[6] = -1;
917
0
                                    au4_idx_tl[4] = -1;
918
0
                                }
919
0
                                else
920
0
                                {
921
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
922
0
                                }
923
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
924
0
                                {
925
0
                                    au4_idx_tl[5] = -1;
926
0
                                    au4_idx_tl[4] = -1;
927
0
                                }
928
0
                                else
929
0
                                {
930
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
931
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
932
0
                                }
933
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
934
0
                            }
935
936
                            /* Verify that the neighbor ctbs dont cross pic boundary.
937
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
938
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
939
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
940
                             * the respective pixel's flags are checked
941
                             */
942
943
8.62k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
944
0
                            {
945
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
946
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
947
0
                            }
948
8.62k
                            else
949
8.62k
                            {
950
8.62k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
951
8.62k
                            }
952
8.62k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
953
0
                            {
954
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
955
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
956
0
                            }
957
8.62k
                            else
958
8.62k
                            {
959
8.62k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960
8.62k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
961
8.62k
                            }
962
8.62k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963
8.62k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
964
8.62k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
965
8.62k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
966
8.62k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
967
968
8.62k
                            if(au4_idx_tl[5] > idx_tl)
969
631
                            {
970
631
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
971
631
                            }
972
973
                            /*
974
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
975
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
976
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
977
                             * the respective pixel's flags are checked
978
                             */
979
77.5k
                            for(i = 0; i < 8; i++)
980
68.9k
                            {
981
                                /*Sets the edges that lie on the slice/tile boundary*/
982
68.9k
                                if(au4_idx_tl[i] != idx_tl)
983
16.4k
                                {
984
16.4k
                                    au1_tile_slice_boundary[i] = 1;
985
16.4k
                                }
986
52.5k
                                else
987
52.5k
                                {
988
52.5k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
989
52.5k
                                }
990
68.9k
                            }
991
992
8.62k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
993
8.62k
                        }
994
995
15.8k
                        if(ps_pps->i1_tiles_enabled_flag)
996
7.38k
                        {
997
                            /* Calculate availability flags at slice boundary */
998
7.38k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
999
4.20k
                            {
1000
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1001
4.20k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1002
4.03k
                                {
1003
                                    /*Set the boundary arrays*/
1004
                                    /*Calculate tile indices for neighbor pixels*/
1005
4.03k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1006
4.03k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1007
4.03k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1008
4.03k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1009
4.03k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
4.03k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1011
1012
4.03k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1013
0
                                    {
1014
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1015
0
                                        {
1016
0
                                            au4_idx_tl[6] = -1;
1017
0
                                            au4_idx_tl[4] = -1;
1018
0
                                        }
1019
0
                                        else
1020
0
                                        {
1021
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1022
0
                                        }
1023
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1024
0
                                        {
1025
0
                                            au4_idx_tl[5] = -1;
1026
0
                                            au4_idx_tl[4] = -1;
1027
0
                                        }
1028
0
                                        else
1029
0
                                        {
1030
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1031
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1032
0
                                        }
1033
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1034
0
                                    }
1035
36.3k
                                    for(i = 0; i < 8; i++)
1036
32.2k
                                    {
1037
                                        /*Sets the edges that lie on the tile boundary*/
1038
32.2k
                                        if(au4_idx_tl[i] != idx_tl)
1039
13.2k
                                        {
1040
13.2k
                                            au1_tile_slice_boundary[i] |= 1;
1041
13.2k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1042
13.2k
                                        }
1043
32.2k
                                    }
1044
4.03k
                                }
1045
4.20k
                            }
1046
7.38k
                        }
1047
1048
1049
                        /*Set availability flags based on tile and slice boundaries*/
1050
142k
                        for(i = 0; i < 8; i++)
1051
126k
                        {
1052
                            /*Sets the edges that lie on the slice/tile boundary*/
1053
126k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1054
13.5k
                            {
1055
13.5k
                                au1_avail_luma[i] = 0;
1056
13.5k
                            }
1057
126k
                        }
1058
15.8k
                    }
1059
28.4k
                }
1060
1061
28.4k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1062
0
                {
1063
0
                    au1_avail_luma[0] = 0;
1064
0
                    au1_avail_luma[4] = 0;
1065
0
                    au1_avail_luma[6] = 0;
1066
0
                }
1067
1068
28.4k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1069
0
                {
1070
0
                    au1_avail_luma[1] = 0;
1071
0
                    au1_avail_luma[5] = 0;
1072
0
                    au1_avail_luma[7] = 0;
1073
0
                }
1074
                //y==1 case
1075
28.4k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1076
0
                {
1077
0
                    au1_avail_luma[2] = 0;
1078
0
                    au1_avail_luma[4] = 0;
1079
0
                    au1_avail_luma[5] = 0;
1080
0
                }
1081
28.4k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1082
0
                {
1083
0
                    au1_avail_luma[3] = 0;
1084
0
                    au1_avail_luma[6] = 0;
1085
0
                    au1_avail_luma[7] = 0;
1086
0
                }
1087
1088
28.4k
                {
1089
28.4k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1090
28.4k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1091
28.4k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1092
28.4k
                                                                      src_strd,
1093
28.4k
                                                                      pu1_src_left_luma,
1094
28.4k
                                                                      pu1_src_top_luma,
1095
28.4k
                                                                      pu1_sao_src_luma_top_left_ctb,
1096
28.4k
                                                                      au1_src_top_right,
1097
28.4k
                                                                      &u1_sao_src_top_left_luma_bot_left,
1098
28.4k
                                                                      au1_avail_luma,
1099
28.4k
                                                                      ai1_offset_y,
1100
28.4k
                                                                      sao_wd_luma,
1101
28.4k
                                                                      sao_ht_luma);
1102
28.4k
                }
1103
28.4k
            }
1104
1105
352k
        }
1106
4.23M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1107
860k
        {
1108
            /* Update left, top and top-left */
1109
7.74M
            for(row = 0; row < sao_ht_luma; row++)
1110
6.88M
            {
1111
6.88M
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1112
6.88M
            }
1113
860k
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1114
1115
860k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1116
860k
        }
1117
1118
4.59M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1119
296k
        {
1120
296k
            if(0 == ps_sao->b3_cb_type_idx)
1121
262k
            {
1122
2.35M
                for(row = 0; row < sao_ht_chroma; row++)
1123
2.09M
                {
1124
2.09M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1125
2.09M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1126
2.09M
                }
1127
262k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1128
262k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1129
1130
262k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1131
1132
262k
            }
1133
1134
34.6k
            else if(1 == ps_sao->b3_cb_type_idx)
1135
16.0k
            {
1136
16.0k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1137
16.0k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1138
16.0k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1139
16.0k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1140
1141
16.0k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1142
16.0k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1143
16.0k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1144
16.0k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1145
1146
16.0k
                if(chroma_yuv420sp_vu)
1147
599
                {
1148
599
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1149
599
                                                                                src_strd,
1150
599
                                                                                pu1_src_left_chroma,
1151
599
                                                                                pu1_src_top_chroma,
1152
599
                                                                                pu1_sao_src_chroma_top_left_ctb,
1153
599
                                                                                ps_sao->b5_cr_band_pos,
1154
599
                                                                                ps_sao->b5_cb_band_pos,
1155
599
                                                                                ai1_offset_cr,
1156
599
                                                                                ai1_offset_cb,
1157
599
                                                                                sao_wd_chroma,
1158
599
                                                                                sao_ht_chroma
1159
599
                                                                               );
1160
599
                }
1161
15.4k
                else
1162
15.4k
                {
1163
15.4k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1164
15.4k
                                                                                src_strd,
1165
15.4k
                                                                                pu1_src_left_chroma,
1166
15.4k
                                                                                pu1_src_top_chroma,
1167
15.4k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1168
15.4k
                                                                                ps_sao->b5_cb_band_pos,
1169
15.4k
                                                                                ps_sao->b5_cr_band_pos,
1170
15.4k
                                                                                ai1_offset_cb,
1171
15.4k
                                                                                ai1_offset_cr,
1172
15.4k
                                                                                sao_wd_chroma,
1173
15.4k
                                                                                sao_ht_chroma
1174
15.4k
                                                                               );
1175
15.4k
                }
1176
16.0k
            }
1177
1178
18.5k
            else // if(2 <= ps_sao->b3_cb_type_idx)
1179
18.5k
            {
1180
18.5k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1181
18.5k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1182
18.5k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1183
18.5k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1184
1185
18.5k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1186
18.5k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1187
18.5k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1188
18.5k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1189
167k
                for(i = 0; i < 8; i++)
1190
148k
                {
1191
148k
                    au1_avail_chroma[i] = 255;
1192
148k
                    au1_tile_slice_boundary[i] = 0;
1193
148k
                    au4_idx_tl[i] = 0;
1194
148k
                    au4_ilf_across_tile_slice_enable[i] = 1;
1195
148k
                }
1196
                /*In case of slices*/
1197
18.5k
                {
1198
18.5k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1199
10.9k
                    {
1200
10.9k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1201
1.53k
                        {
1202
1.53k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1203
1.53k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1204
1.53k
                        }
1205
9.45k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1206
8.78k
                        {
1207
8.78k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1208
8.78k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1209
8.78k
                        }
1210
10.9k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1211
10.9k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1212
1213
10.9k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1214
10.9k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1215
1216
10.9k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1217
10.9k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1218
1219
10.9k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1220
10.9k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1221
1222
10.9k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1223
4.35k
                        {
1224
1225
4.35k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1226
4.35k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1227
4.35k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1228
4.35k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1229
4.35k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
4.35k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1231
1232
4.35k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1233
2.07k
                            {
1234
2.07k
                                if(ps_sao_ctxt->i4_ctb_x == 1)
1235
542
                                {
1236
542
                                    au4_idx_tl[6] = -1;
1237
542
                                    au4_idx_tl[4] = -1;
1238
542
                                }
1239
1.52k
                                else
1240
1.52k
                                {
1241
1.52k
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1242
1.52k
                                }
1243
2.07k
                                if(ps_sao_ctxt->i4_ctb_y == 1)
1244
41
                                {
1245
41
                                    au4_idx_tl[5] = -1;
1246
41
                                    au4_idx_tl[4] = -1;
1247
41
                                }
1248
2.02k
                                else
1249
2.02k
                                {
1250
2.02k
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1251
2.02k
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1252
2.02k
                                }
1253
2.07k
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1254
2.07k
                            }
1255
1256
                            /* Verify that the neighbor ctbs don't cross pic boundary
1257
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1258
4.35k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1259
542
                            {
1260
542
                                au4_ilf_across_tile_slice_enable[4] = 0;
1261
542
                                au4_ilf_across_tile_slice_enable[6] = 0;
1262
542
                            }
1263
3.81k
                            else
1264
3.81k
                            {
1265
3.81k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1266
3.81k
                            }
1267
4.35k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1268
41
                            {
1269
41
                                au4_ilf_across_tile_slice_enable[5] = 0;
1270
41
                                au4_ilf_across_tile_slice_enable[4] = 0;
1271
41
                            }
1272
4.31k
                            else
1273
4.31k
                            {
1274
4.31k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1275
4.31k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1276
4.31k
                            }
1277
4.35k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278
4.35k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1279
4.35k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1280
4.35k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1281
4.35k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1282
                            /*
1283
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1284
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1285
                             */
1286
39.2k
                            for(i = 0; i < 8; i++)
1287
34.8k
                            {
1288
                                /*Sets the edges that lie on the slice/tile boundary*/
1289
34.8k
                                if(au4_idx_tl[i] != idx_tl)
1290
6.66k
                                {
1291
6.66k
                                    au1_tile_slice_boundary[i] = 1;
1292
6.66k
                                }
1293
28.1k
                                else
1294
28.1k
                                {
1295
28.1k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
1296
28.1k
                                }
1297
34.8k
                            }
1298
1299
                            /*Reset indices*/
1300
39.2k
                            for(i = 0; i < 8; i++)
1301
34.8k
                            {
1302
34.8k
                                au4_idx_tl[i] = 0;
1303
34.8k
                            }
1304
4.35k
                        }
1305
10.9k
                        if(ps_pps->i1_tiles_enabled_flag)
1306
6.69k
                        {
1307
                            /* Calculate availability flags at slice boundary */
1308
6.69k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1309
2.71k
                            {
1310
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1311
2.71k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1312
2.61k
                                {
1313
                                    /*Set the boundary arrays*/
1314
                                    /*Calculate tile indices for neighbor pixels*/
1315
2.61k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1316
2.61k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1317
2.61k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1318
2.61k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1319
2.61k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
2.61k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1321
1322
2.61k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1323
0
                                    {
1324
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1325
0
                                        {
1326
0
                                            au4_idx_tl[6] = -1;
1327
0
                                            au4_idx_tl[4] = -1;
1328
0
                                        }
1329
0
                                        else
1330
0
                                        {
1331
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1332
0
                                        }
1333
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1334
0
                                        {
1335
0
                                            au4_idx_tl[5] = -1;
1336
0
                                            au4_idx_tl[4] = -1;
1337
0
                                        }
1338
0
                                        else
1339
0
                                        {
1340
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1341
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1342
0
                                        }
1343
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1344
0
                                    }
1345
23.4k
                                    for(i = 0; i < 8; i++)
1346
20.8k
                                    {
1347
                                        /*Sets the edges that lie on the tile boundary*/
1348
20.8k
                                        if(au4_idx_tl[i] != idx_tl)
1349
8.09k
                                        {
1350
8.09k
                                            au1_tile_slice_boundary[i] |= 1;
1351
8.09k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1352
8.09k
                                        }
1353
20.8k
                                    }
1354
2.61k
                                }
1355
2.71k
                            }
1356
6.69k
                        }
1357
1358
98.9k
                        for(i = 0; i < 8; i++)
1359
87.9k
                        {
1360
                            /*Sets the edges that lie on the slice/tile boundary*/
1361
87.9k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1362
9.17k
                            {
1363
9.17k
                                au1_avail_chroma[i] = 0;
1364
9.17k
                            }
1365
87.9k
                        }
1366
10.9k
                    }
1367
18.5k
                }
1368
1369
18.5k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1370
673
                {
1371
673
                    au1_avail_chroma[0] = 0;
1372
673
                    au1_avail_chroma[4] = 0;
1373
673
                    au1_avail_chroma[6] = 0;
1374
673
                }
1375
18.5k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1376
0
                {
1377
0
                    au1_avail_chroma[1] = 0;
1378
0
                    au1_avail_chroma[5] = 0;
1379
0
                    au1_avail_chroma[7] = 0;
1380
0
                }
1381
1382
18.5k
                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1383
412
                {
1384
412
                    au1_avail_chroma[2] = 0;
1385
412
                    au1_avail_chroma[4] = 0;
1386
412
                    au1_avail_chroma[5] = 0;
1387
412
                }
1388
18.5k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1389
0
                {
1390
0
                    au1_avail_chroma[3] = 0;
1391
0
                    au1_avail_chroma[6] = 0;
1392
0
                    au1_avail_chroma[7] = 0;
1393
0
                }
1394
1395
18.5k
                {
1396
18.5k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1397
18.5k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1398
18.5k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1399
18.5k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1400
18.5k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1401
2.45k
                    {
1402
2.45k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1403
2.45k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1404
2.45k
                    }
1405
1406
18.5k
                    if(chroma_yuv420sp_vu)
1407
923
                    {
1408
923
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1409
923
                                                                             src_strd,
1410
923
                                                                             pu1_src_left_chroma,
1411
923
                                                                             pu1_src_top_chroma,
1412
923
                                                                             pu1_sao_src_chroma_top_left_ctb,
1413
923
                                                                             au1_src_top_right,
1414
923
                                                                             au1_sao_src_top_left_chroma_bot_left,
1415
923
                                                                             au1_avail_chroma,
1416
923
                                                                             ai1_offset_cr,
1417
923
                                                                             ai1_offset_cb,
1418
923
                                                                             sao_wd_chroma,
1419
923
                                                                             sao_ht_chroma);
1420
923
                    }
1421
17.6k
                    else
1422
17.6k
                    {
1423
17.6k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1424
17.6k
                                                                             src_strd,
1425
17.6k
                                                                             pu1_src_left_chroma,
1426
17.6k
                                                                             pu1_src_top_chroma,
1427
17.6k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1428
17.6k
                                                                             au1_src_top_right,
1429
17.6k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1430
17.6k
                                                                             au1_avail_chroma,
1431
17.6k
                                                                             ai1_offset_cb,
1432
17.6k
                                                                             ai1_offset_cr,
1433
17.6k
                                                                             sao_wd_chroma,
1434
17.6k
                                                                             sao_ht_chroma);
1435
17.6k
                    }
1436
18.5k
                }
1437
18.5k
            }
1438
296k
        }
1439
4.29M
        else if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)))
1440
909k
        {
1441
8.18M
            for(row = 0; row < sao_ht_chroma; row++)
1442
7.27M
            {
1443
7.27M
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1444
7.27M
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1445
7.27M
            }
1446
909k
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1447
909k
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1448
1449
909k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1450
909k
        }
1451
1452
4.59M
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1453
4.59M
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1454
4.59M
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1455
4.59M
    }
1456
1457
1458
    /* Top CTB */
1459
5.25M
    if((ps_sao_ctxt->i4_ctb_y > 0))
1460
4.76M
    {
1461
4.76M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1462
4.76M
        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1463
4.76M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1464
4.76M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1465
1466
4.76M
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1467
4.76M
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1468
4.76M
        WORD32 au4_idx_t[8], idx_t;
1469
1470
4.76M
        WORD32 remaining_cols;
1471
1472
4.76M
        slice_header_t *ps_slice_hdr_top;
1473
4.76M
        {
1474
4.76M
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1475
4.76M
                                        (ps_sao_ctxt->i4_ctb_x);
1476
4.76M
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1477
4.76M
        }
1478
1479
4.76M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1480
4.76M
        if(remaining_cols <= SAO_SHIFT_CTB)
1481
174k
        {
1482
174k
            sao_wd_luma += remaining_cols;
1483
174k
        }
1484
4.76M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1485
4.76M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1486
174k
        {
1487
174k
            sao_wd_chroma += remaining_cols;
1488
174k
        }
1489
1490
4.76M
        pu1_src_luma -= (sao_ht_luma * src_strd);
1491
4.76M
        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1492
4.76M
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1493
4.76M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494
4.76M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1495
4.76M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1496
4.76M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1497
1498
4.76M
        if(0 != sao_wd_luma)
1499
4.76M
        {
1500
4.76M
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1501
380k
            {
1502
380k
                if(0 == ps_sao->b3_y_type_idx)
1503
317k
                {
1504
                    /* Update left, top and top-left */
1505
2.85M
                    for(row = 0; row < sao_ht_luma; row++)
1506
2.54M
                    {
1507
2.54M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1508
2.54M
                    }
1509
317k
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1510
1511
317k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1512
1513
317k
                }
1514
1515
62.2k
                else if(1 == ps_sao->b3_y_type_idx)
1516
30.2k
                {
1517
30.2k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1518
30.2k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1519
30.2k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1520
30.2k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1521
1522
30.2k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1523
30.2k
                                                                              src_strd,
1524
30.2k
                                                                              pu1_src_left_luma,
1525
30.2k
                                                                              pu1_src_top_luma,
1526
30.2k
                                                                              pu1_sao_src_luma_top_left_ctb,
1527
30.2k
                                                                              ps_sao->b5_y_band_pos,
1528
30.2k
                                                                              ai1_offset_y,
1529
30.2k
                                                                              sao_wd_luma,
1530
30.2k
                                                                              sao_ht_luma
1531
30.2k
                                                                             );
1532
30.2k
                }
1533
1534
32.0k
                else // if(2 <= ps_sao->b3_y_type_idx)
1535
32.0k
                {
1536
32.0k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1537
32.0k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1538
32.0k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1539
32.0k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1540
1541
32.0k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1542
32.0k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1543
32.0k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1544
1545
288k
                    for(i = 0; i < 8; i++)
1546
256k
                    {
1547
1548
256k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1549
256k
                    }
1550
                    /******************************************************************
1551
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1552
                     *
1553
                     *               T_T
1554
                     *          ____________
1555
                     *         |    |       |
1556
                     *         | T_L|  T    |T_R
1557
                     *         |    | ______|____
1558
                     *         |    |  T_D  |    |
1559
                     *         |    |       |    |
1560
                     *         |____|_______|    |
1561
                     *              |            |
1562
                     *              |            |
1563
                     *              |____________|
1564
                     *
1565
                     *****************************************************************/
1566
1567
                    /*In case of slices*/
1568
32.0k
                    {
1569
32.0k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1570
17.3k
                        {
1571
1572
17.3k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1573
17.3k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1574
1575
17.3k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1576
17.3k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1577
1578
17.3k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1579
17.3k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1580
1581
17.3k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1582
17.3k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1583
1584
17.3k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1585
17.3k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1586
1587
17.3k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1588
9.35k
                            {
1589
                                /*Calculate neighbor ctb slice indices*/
1590
9.35k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1591
1.35k
                                {
1592
1.35k
                                    au4_idx_t[0] = -1;
1593
1.35k
                                    au4_idx_t[6] = -1;
1594
1.35k
                                    au4_idx_t[4] = -1;
1595
1.35k
                                }
1596
8.00k
                                else
1597
8.00k
                                {
1598
8.00k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1599
8.00k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1600
8.00k
                                }
1601
9.35k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1602
9.35k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1603
9.35k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1604
9.35k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1605
1606
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1607
9.35k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1608
1.35k
                                {
1609
1.35k
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1610
1.35k
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1611
1.35k
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1612
1.35k
                                }
1613
8.00k
                                else
1614
8.00k
                                {
1615
8.00k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1616
8.00k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1617
8.00k
                                }
1618
1619
1620
1621
9.35k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622
9.35k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1623
9.35k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1624
9.35k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
9.35k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1626
1627
9.35k
                                if(au4_idx_t[6] < idx_t)
1628
1.35k
                                {
1629
1.35k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1630
1.35k
                                }
1631
1632
                                /*
1633
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1634
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1635
                                 */
1636
1637
84.1k
                                for(i = 0; i < 8; i++)
1638
74.8k
                                {
1639
                                    /*Sets the edges that lie on the slice/tile boundary*/
1640
74.8k
                                    if(au4_idx_t[i] != idx_t)
1641
20.1k
                                    {
1642
20.1k
                                        au1_tile_slice_boundary[i] = 1;
1643
                                        /*Check for slice flag at such boundaries*/
1644
20.1k
                                    }
1645
54.6k
                                    else
1646
54.6k
                                    {
1647
54.6k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1648
54.6k
                                    }
1649
74.8k
                                }
1650
                                /*Reset indices*/
1651
84.1k
                                for(i = 0; i < 8; i++)
1652
74.8k
                                {
1653
74.8k
                                    au4_idx_t[i] = 0;
1654
74.8k
                                }
1655
9.35k
                            }
1656
1657
17.3k
                            if(ps_pps->i1_tiles_enabled_flag)
1658
8.17k
                            {
1659
                                /* Calculate availability flags at slice boundary */
1660
8.17k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1661
4.80k
                                {
1662
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1663
4.80k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1664
4.62k
                                    {
1665
                                        /*Calculate neighbor ctb slice indices*/
1666
4.62k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1667
721
                                        {
1668
721
                                            au4_idx_t[0] = -1;
1669
721
                                            au4_idx_t[6] = -1;
1670
721
                                            au4_idx_t[4] = -1;
1671
721
                                        }
1672
3.90k
                                        else
1673
3.90k
                                        {
1674
3.90k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1675
3.90k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1676
3.90k
                                        }
1677
4.62k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1678
4.62k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1679
4.62k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1680
4.62k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1681
1682
41.6k
                                        for(i = 0; i < 8; i++)
1683
37.0k
                                        {
1684
                                            /*Sets the edges that lie on the tile boundary*/
1685
37.0k
                                            if(au4_idx_t[i] != idx_t)
1686
16.0k
                                            {
1687
16.0k
                                                au1_tile_slice_boundary[i] |= 1;
1688
16.0k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1689
16.0k
                                            }
1690
37.0k
                                        }
1691
4.62k
                                    }
1692
4.80k
                                }
1693
8.17k
                            }
1694
1695
156k
                            for(i = 0; i < 8; i++)
1696
138k
                            {
1697
                                /*Sets the edges that lie on the slice/tile boundary*/
1698
138k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1699
19.4k
                                {
1700
19.4k
                                    au1_avail_luma[i] = 0;
1701
19.4k
                                }
1702
138k
                            }
1703
17.3k
                        }
1704
32.0k
                    }
1705
1706
1707
32.0k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1708
4.13k
                    {
1709
4.13k
                        au1_avail_luma[0] = 0;
1710
4.13k
                        au1_avail_luma[4] = 0;
1711
4.13k
                        au1_avail_luma[6] = 0;
1712
4.13k
                    }
1713
1714
32.0k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1715
3.59k
                    {
1716
3.59k
                        au1_avail_luma[1] = 0;
1717
3.59k
                        au1_avail_luma[5] = 0;
1718
3.59k
                        au1_avail_luma[7] = 0;
1719
3.59k
                    }
1720
1721
32.0k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1722
0
                    {
1723
0
                        au1_avail_luma[2] = 0;
1724
0
                        au1_avail_luma[4] = 0;
1725
0
                        au1_avail_luma[5] = 0;
1726
0
                    }
1727
1728
32.0k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1729
0
                    {
1730
0
                        au1_avail_luma[3] = 0;
1731
0
                        au1_avail_luma[6] = 0;
1732
0
                        au1_avail_luma[7] = 0;
1733
0
                    }
1734
1735
32.0k
                    {
1736
32.0k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1737
32.0k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1738
32.0k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1739
32.0k
                                                                          src_strd,
1740
32.0k
                                                                          pu1_src_left_luma,
1741
32.0k
                                                                          pu1_src_top_luma,
1742
32.0k
                                                                          pu1_sao_src_luma_top_left_ctb,
1743
32.0k
                                                                          au1_src_top_right,
1744
32.0k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1745
32.0k
                                                                          au1_avail_luma,
1746
32.0k
                                                                          ai1_offset_y,
1747
32.0k
                                                                          sao_wd_luma,
1748
32.0k
                                                                          sao_ht_luma);
1749
32.0k
                    }
1750
32.0k
                }
1751
380k
            }
1752
4.38M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1753
886k
            {
1754
                /* Update left, top and top-left */
1755
7.97M
                for(row = 0; row < sao_ht_luma; row++)
1756
7.08M
                {
1757
7.08M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1758
7.08M
                }
1759
886k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1760
1761
886k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1762
886k
            }
1763
4.76M
        }
1764
1765
4.76M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_wd_chroma)
1766
4.26M
        {
1767
4.26M
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1768
238k
            {
1769
238k
                if(0 == ps_sao->b3_cb_type_idx)
1770
205k
                {
1771
1772
1.85M
                    for(row = 0; row < sao_ht_chroma; row++)
1773
1.64M
                    {
1774
1.64M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1775
1.64M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1776
1.64M
                    }
1777
205k
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1778
205k
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1779
1780
205k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1781
1782
205k
                }
1783
1784
32.7k
                else if(1 == ps_sao->b3_cb_type_idx)
1785
14.6k
                {
1786
14.6k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1787
14.6k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1788
14.6k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1789
14.6k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1790
1791
14.6k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1792
14.6k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1793
14.6k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1794
14.6k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1795
1796
14.6k
                    if(chroma_yuv420sp_vu)
1797
490
                    {
1798
490
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1799
490
                                                                                    src_strd,
1800
490
                                                                                    pu1_src_left_chroma,
1801
490
                                                                                    pu1_src_top_chroma,
1802
490
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1803
490
                                                                                    ps_sao->b5_cr_band_pos,
1804
490
                                                                                    ps_sao->b5_cb_band_pos,
1805
490
                                                                                    ai1_offset_cr,
1806
490
                                                                                    ai1_offset_cb,
1807
490
                                                                                    sao_wd_chroma,
1808
490
                                                                                    sao_ht_chroma
1809
490
                                                                                   );
1810
490
                    }
1811
14.1k
                    else
1812
14.1k
                    {
1813
14.1k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1814
14.1k
                                                                                    src_strd,
1815
14.1k
                                                                                    pu1_src_left_chroma,
1816
14.1k
                                                                                    pu1_src_top_chroma,
1817
14.1k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1818
14.1k
                                                                                    ps_sao->b5_cb_band_pos,
1819
14.1k
                                                                                    ps_sao->b5_cr_band_pos,
1820
14.1k
                                                                                    ai1_offset_cb,
1821
14.1k
                                                                                    ai1_offset_cr,
1822
14.1k
                                                                                    sao_wd_chroma,
1823
14.1k
                                                                                    sao_ht_chroma
1824
14.1k
                                                                                   );
1825
14.1k
                    }
1826
14.6k
                }
1827
18.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1828
18.1k
                {
1829
18.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1830
18.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1831
18.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1832
18.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1833
1834
18.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1835
18.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1836
18.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1837
18.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1838
1839
164k
                    for(i = 0; i < 8; i++)
1840
145k
                    {
1841
145k
                        au1_avail_chroma[i] = 255;
1842
145k
                        au1_tile_slice_boundary[i] = 0;
1843
145k
                        au4_idx_t[i] = 0;
1844
145k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1845
145k
                    }
1846
1847
18.1k
                    {
1848
18.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1849
10.0k
                        {
1850
10.0k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1851
10.0k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1852
1853
10.0k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1854
10.0k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1855
1856
10.0k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1857
10.0k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1858
1859
10.0k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1860
10.0k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1861
1862
10.0k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1863
10.0k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1864
1865
10.0k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1866
2.79k
                            {
1867
2.79k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1868
272
                                {
1869
272
                                    au4_idx_t[0] = -1;
1870
272
                                    au4_idx_t[6] = -1;
1871
272
                                    au4_idx_t[4] = -1;
1872
272
                                }
1873
2.52k
                                else
1874
2.52k
                                {
1875
2.52k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1876
2.52k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1877
2.52k
                                }
1878
2.79k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1879
2.79k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1880
2.79k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1881
2.79k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1882
1883
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1884
1885
2.79k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1886
272
                                {
1887
272
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1888
272
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1889
272
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1890
272
                                }
1891
2.52k
                                else
1892
2.52k
                                {
1893
2.52k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1894
2.52k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1895
2.52k
                                }
1896
1897
2.79k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1898
2.79k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1899
2.79k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1900
2.79k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
2.79k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1902
1903
2.79k
                                if(idx_t > au4_idx_t[6])
1904
273
                                {
1905
273
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1906
273
                                }
1907
1908
                                /*
1909
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1910
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1911
                                 */
1912
25.1k
                                for(i = 0; i < 8; i++)
1913
22.3k
                                {
1914
                                    /*Sets the edges that lie on the slice/tile boundary*/
1915
22.3k
                                    if(au4_idx_t[i] != idx_t)
1916
5.98k
                                    {
1917
5.98k
                                        au1_tile_slice_boundary[i] = 1;
1918
5.98k
                                    }
1919
16.3k
                                    else
1920
16.3k
                                    {
1921
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1922
16.3k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1923
16.3k
                                    }
1924
22.3k
                                }
1925
                                /*Reset indices*/
1926
25.1k
                                for(i = 0; i < 8; i++)
1927
22.3k
                                {
1928
22.3k
                                    au4_idx_t[i] = 0;
1929
22.3k
                                }
1930
2.79k
                            }
1931
10.0k
                            if(ps_pps->i1_tiles_enabled_flag)
1932
7.31k
                            {
1933
                                /* Calculate availability flags at slice boundary */
1934
7.31k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1935
2.98k
                                {
1936
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1937
2.98k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1938
2.86k
                                    {
1939
                                        /*Calculate neighbor ctb slice indices*/
1940
2.86k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1941
730
                                        {
1942
730
                                            au4_idx_t[0] = -1;
1943
730
                                            au4_idx_t[6] = -1;
1944
730
                                            au4_idx_t[4] = -1;
1945
730
                                        }
1946
2.13k
                                        else
1947
2.13k
                                        {
1948
2.13k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1949
2.13k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1950
2.13k
                                        }
1951
2.86k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1952
2.86k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1953
2.86k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1954
2.86k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1955
1956
25.8k
                                        for(i = 0; i < 8; i++)
1957
22.9k
                                        {
1958
                                            /*Sets the edges that lie on the tile boundary*/
1959
22.9k
                                            if(au4_idx_t[i] != idx_t)
1960
9.42k
                                            {
1961
9.42k
                                                au1_tile_slice_boundary[i] |= 1;
1962
9.42k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1963
9.42k
                                            }
1964
22.9k
                                        }
1965
2.86k
                                    }
1966
2.98k
                                }
1967
7.31k
                            }
1968
90.5k
                            for(i = 0; i < 8; i++)
1969
80.4k
                            {
1970
                                /*Sets the edges that lie on the slice/tile boundary*/
1971
80.4k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1972
10.0k
                                {
1973
10.0k
                                    au1_avail_chroma[i] = 0;
1974
10.0k
                                }
1975
80.4k
                            }
1976
1977
10.0k
                        }
1978
18.1k
                    }
1979
18.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1980
2.55k
                    {
1981
2.55k
                        au1_avail_chroma[0] = 0;
1982
2.55k
                        au1_avail_chroma[4] = 0;
1983
2.55k
                        au1_avail_chroma[6] = 0;
1984
2.55k
                    }
1985
1986
18.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1987
2.24k
                    {
1988
2.24k
                        au1_avail_chroma[1] = 0;
1989
2.24k
                        au1_avail_chroma[5] = 0;
1990
2.24k
                        au1_avail_chroma[7] = 0;
1991
2.24k
                    }
1992
1993
18.1k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1994
18
                    {
1995
18
                        au1_avail_chroma[2] = 0;
1996
18
                        au1_avail_chroma[4] = 0;
1997
18
                        au1_avail_chroma[5] = 0;
1998
18
                    }
1999
2000
18.1k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2001
0
                    {
2002
0
                        au1_avail_chroma[3] = 0;
2003
0
                        au1_avail_chroma[6] = 0;
2004
0
                        au1_avail_chroma[7] = 0;
2005
0
                    }
2006
2007
18.1k
                    {
2008
18.1k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2009
18.1k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2010
18.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2011
18.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2012
2013
18.1k
                        if(chroma_yuv420sp_vu)
2014
1.05k
                        {
2015
1.05k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2016
1.05k
                                                                                 src_strd,
2017
1.05k
                                                                                 pu1_src_left_chroma,
2018
1.05k
                                                                                 pu1_src_top_chroma,
2019
1.05k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2020
1.05k
                                                                                 au1_src_top_right,
2021
1.05k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2022
1.05k
                                                                                 au1_avail_chroma,
2023
1.05k
                                                                                 ai1_offset_cr,
2024
1.05k
                                                                                 ai1_offset_cb,
2025
1.05k
                                                                                 sao_wd_chroma,
2026
1.05k
                                                                                 sao_ht_chroma);
2027
1.05k
                        }
2028
17.0k
                        else
2029
17.0k
                        {
2030
17.0k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2031
17.0k
                                                                                 src_strd,
2032
17.0k
                                                                                 pu1_src_left_chroma,
2033
17.0k
                                                                                 pu1_src_top_chroma,
2034
17.0k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2035
17.0k
                                                                                 au1_src_top_right,
2036
17.0k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2037
17.0k
                                                                                 au1_avail_chroma,
2038
17.0k
                                                                                 ai1_offset_cb,
2039
17.0k
                                                                                 ai1_offset_cr,
2040
17.0k
                                                                                 sao_wd_chroma,
2041
17.0k
                                                                                 sao_ht_chroma);
2042
17.0k
                        }
2043
18.1k
                    }
2044
2045
18.1k
                }
2046
238k
            }
2047
4.02M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2048
520k
            {
2049
4.68M
                for(row = 0; row < sao_ht_chroma; row++)
2050
4.16M
                {
2051
4.16M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2052
4.16M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2053
4.16M
                }
2054
520k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2055
520k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2056
2057
520k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2058
520k
            }
2059
4.26M
        }
2060
2061
4.76M
        pu1_src_luma += sao_ht_luma * src_strd;
2062
4.76M
        pu1_src_chroma += sao_ht_chroma * src_strd;
2063
4.76M
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2064
4.76M
    }
2065
2066
    /* Left CTB */
2067
5.25M
    if(ps_sao_ctxt->i4_ctb_x > 0)
2068
5.06M
    {
2069
5.06M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2070
5.06M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2071
5.06M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2072
5.06M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2073
2074
5.06M
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2075
5.06M
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2076
5.06M
        WORD32 au4_idx_l[8], idx_l;
2077
2078
5.06M
        WORD32 remaining_rows;
2079
5.06M
        slice_header_t *ps_slice_hdr_left;
2080
5.06M
        {
2081
5.06M
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2082
5.06M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
2083
5.06M
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2084
5.06M
        }
2085
2086
5.06M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2087
5.06M
        if(remaining_rows <= SAO_SHIFT_CTB)
2088
471k
        {
2089
471k
            sao_ht_luma += remaining_rows;
2090
471k
        }
2091
5.06M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2092
5.06M
        if(remaining_rows <= SAO_SHIFT_CTB)
2093
471k
        {
2094
471k
            sao_ht_chroma += remaining_rows;
2095
471k
        }
2096
2097
5.06M
        pu1_src_luma -= sao_wd_luma;
2098
5.06M
        pu1_src_chroma -= sao_wd_chroma;
2099
5.06M
        ps_sao -= 1;
2100
5.06M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2101
5.06M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2102
5.06M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
5.06M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2104
2105
2106
5.06M
        if(0 != sao_ht_luma)
2107
5.06M
        {
2108
5.06M
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2109
384k
            {
2110
384k
                if(0 == ps_sao->b3_y_type_idx)
2111
320k
                {
2112
                    /* Update left, top and top-left */
2113
10.2M
                    for(row = 0; row < sao_ht_luma; row++)
2114
9.96M
                    {
2115
9.96M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2116
9.96M
                    }
2117
                    /*Update in next location*/
2118
320k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2119
2120
320k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2121
2122
320k
                }
2123
2124
63.6k
                else if(1 == ps_sao->b3_y_type_idx)
2125
31.6k
                {
2126
31.6k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2127
31.6k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2128
31.6k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2129
31.6k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2130
2131
31.6k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2132
31.6k
                                                                              src_strd,
2133
31.6k
                                                                              pu1_src_left_luma,
2134
31.6k
                                                                              pu1_src_top_luma,
2135
31.6k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2136
31.6k
                                                                              ps_sao->b5_y_band_pos,
2137
31.6k
                                                                              ai1_offset_y,
2138
31.6k
                                                                              sao_wd_luma,
2139
31.6k
                                                                              sao_ht_luma
2140
31.6k
                                                                             );
2141
31.6k
                }
2142
2143
32.0k
                else // if(2 <= ps_sao->b3_y_type_idx)
2144
32.0k
                {
2145
32.0k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2146
32.0k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2147
32.0k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2148
32.0k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2149
2150
288k
                    for(i = 0; i < 8; i++)
2151
256k
                    {
2152
256k
                        au1_avail_luma[i] = 255;
2153
256k
                        au1_tile_slice_boundary[i] = 0;
2154
256k
                        au4_idx_l[i] = 0;
2155
256k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2156
256k
                    }
2157
                    /******************************************************************
2158
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2159
                     *
2160
                     *
2161
                     *          ____________
2162
                     *         |    |       |
2163
                     *         | L_T|       |
2164
                     *         |____|_______|____
2165
                     *         |    |       |    |
2166
                     *     L_L |  L |  L_R  |    |
2167
                     *         |____|_______|    |
2168
                     *              |            |
2169
                     *          L_D |            |
2170
                     *              |____________|
2171
                     *
2172
                     *****************************************************************/
2173
2174
                    /*In case of slices or tiles*/
2175
32.0k
                    {
2176
32.0k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2177
16.0k
                        {
2178
16.0k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2179
16.0k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2180
2181
16.0k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2182
16.0k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2183
2184
16.0k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2185
16.0k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2186
2187
16.0k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2188
16.0k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2189
2190
16.0k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2191
16.0k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2192
2193
16.0k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2194
8.14k
                            {
2195
8.14k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2196
505
                                {
2197
505
                                    au4_idx_l[2] = -1;
2198
505
                                    au4_idx_l[4] = -1;
2199
505
                                    au4_idx_l[5] = -1;
2200
505
                                }
2201
7.64k
                                else
2202
7.64k
                                {
2203
7.64k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2204
7.64k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2205
7.64k
                                }
2206
8.14k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2207
8.14k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2208
8.14k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2209
8.14k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2210
2211
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2212
8.14k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2213
505
                                {
2214
505
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2215
505
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2216
505
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2217
505
                                }
2218
7.64k
                                else
2219
7.64k
                                {
2220
7.64k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2221
7.64k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2222
2223
7.64k
                                }
2224
                                //TODO: ILF flag checks for [0] and [6] is missing.
2225
8.14k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2226
8.14k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
8.14k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2228
2229
8.14k
                                if(idx_l < au4_idx_l[5])
2230
0
                                {
2231
0
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2232
0
                                }
2233
2234
                                /*
2235
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2236
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2237
                                 */
2238
73.2k
                                for(i = 0; i < 8; i++)
2239
65.1k
                                {
2240
                                    /*Sets the edges that lie on the slice/tile boundary*/
2241
65.1k
                                    if(au4_idx_l[i] != idx_l)
2242
16.9k
                                    {
2243
16.9k
                                        au1_tile_slice_boundary[i] = 1;
2244
16.9k
                                    }
2245
48.1k
                                    else
2246
48.1k
                                    {
2247
48.1k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2248
48.1k
                                    }
2249
65.1k
                                }
2250
                                /*Reset indices*/
2251
73.2k
                                for(i = 0; i < 8; i++)
2252
65.1k
                                {
2253
65.1k
                                    au4_idx_l[i] = 0;
2254
65.1k
                                }
2255
8.14k
                            }
2256
2257
16.0k
                            if(ps_pps->i1_tiles_enabled_flag)
2258
7.98k
                            {
2259
                                /* Calculate availability flags at slice boundary */
2260
7.98k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2261
3.70k
                                {
2262
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2263
3.70k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2264
3.54k
                                    {
2265
3.54k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2266
1.18k
                                        {
2267
1.18k
                                            au4_idx_l[2] = -1;
2268
1.18k
                                            au4_idx_l[4] = -1;
2269
1.18k
                                            au4_idx_l[5] = -1;
2270
1.18k
                                        }
2271
2.35k
                                        else
2272
2.35k
                                        {
2273
2.35k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2274
2.35k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2275
2.35k
                                        }
2276
2277
3.54k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2278
3.54k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2279
3.54k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2280
3.54k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2281
2282
31.8k
                                        for(i = 0; i < 8; i++)
2283
28.3k
                                        {
2284
                                            /*Sets the edges that lie on the slice/tile boundary*/
2285
28.3k
                                            if(au4_idx_l[i] != idx_l)
2286
11.8k
                                            {
2287
11.8k
                                                au1_tile_slice_boundary[i] |= 1;
2288
11.8k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2289
11.8k
                                            }
2290
28.3k
                                        }
2291
3.54k
                                    }
2292
3.70k
                                }
2293
7.98k
                            }
2294
2295
144k
                            for(i = 0; i < 8; i++)
2296
128k
                            {
2297
                                /*Sets the edges that lie on the slice/tile boundary*/
2298
128k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2299
13.4k
                                {
2300
13.4k
                                    au1_avail_luma[i] = 0;
2301
13.4k
                                }
2302
128k
                            }
2303
16.0k
                        }
2304
32.0k
                    }
2305
32.0k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2306
0
                    {
2307
0
                        au1_avail_luma[0] = 0;
2308
0
                        au1_avail_luma[4] = 0;
2309
0
                        au1_avail_luma[6] = 0;
2310
0
                    }
2311
32.0k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2312
0
                    {
2313
0
                        au1_avail_luma[1] = 0;
2314
0
                        au1_avail_luma[5] = 0;
2315
0
                        au1_avail_luma[7] = 0;
2316
0
                    }
2317
2318
32.0k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2319
7.80k
                    {
2320
7.80k
                        au1_avail_luma[2] = 0;
2321
7.80k
                        au1_avail_luma[4] = 0;
2322
7.80k
                        au1_avail_luma[5] = 0;
2323
7.80k
                    }
2324
2325
32.0k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2326
3.54k
                    {
2327
3.54k
                        au1_avail_luma[3] = 0;
2328
3.54k
                        au1_avail_luma[6] = 0;
2329
3.54k
                        au1_avail_luma[7] = 0;
2330
3.54k
                    }
2331
2332
32.0k
                    {
2333
32.0k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2334
32.0k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2335
32.0k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2336
32.0k
                                                                          src_strd,
2337
32.0k
                                                                          pu1_src_left_luma,
2338
32.0k
                                                                          pu1_src_top_luma,
2339
32.0k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2340
32.0k
                                                                          au1_src_top_right,
2341
32.0k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2342
32.0k
                                                                          au1_avail_luma,
2343
32.0k
                                                                          ai1_offset_y,
2344
32.0k
                                                                          sao_wd_luma,
2345
32.0k
                                                                          sao_ht_luma);
2346
32.0k
                    }
2347
2348
32.0k
                }
2349
384k
            }
2350
4.67M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2351
899k
            {
2352
                /* Update left, top and top-left */
2353
19.5M
                for(row = 0; row < sao_ht_luma; row++)
2354
18.6M
                {
2355
18.6M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2356
18.6M
                }
2357
                /*Update in next location*/
2358
899k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2359
2360
899k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2361
899k
            }
2362
5.06M
        }
2363
2364
5.06M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_ht_chroma)
2365
4.55M
        {
2366
4.55M
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2367
243k
            {
2368
243k
                if(0 == ps_sao->b3_cb_type_idx)
2369
210k
                {
2370
3.76M
                    for(row = 0; row < sao_ht_chroma; row++)
2371
3.55M
                    {
2372
3.55M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2373
3.55M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2374
3.55M
                    }
2375
210k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2376
210k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2377
2378
210k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2379
210k
                }
2380
2381
33.6k
                else if(1 == ps_sao->b3_cb_type_idx)
2382
14.9k
                {
2383
14.9k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2384
14.9k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2385
14.9k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2386
14.9k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2387
2388
14.9k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2389
14.9k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2390
14.9k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2391
14.9k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2392
2393
14.9k
                    if(chroma_yuv420sp_vu)
2394
444
                    {
2395
444
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2396
444
                                                                                    src_strd,
2397
444
                                                                                    pu1_src_left_chroma,
2398
444
                                                                                    pu1_src_top_chroma,
2399
444
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2400
444
                                                                                    ps_sao->b5_cr_band_pos,
2401
444
                                                                                    ps_sao->b5_cb_band_pos,
2402
444
                                                                                    ai1_offset_cr,
2403
444
                                                                                    ai1_offset_cb,
2404
444
                                                                                    sao_wd_chroma,
2405
444
                                                                                    sao_ht_chroma
2406
444
                                                                                   );
2407
444
                    }
2408
14.5k
                    else
2409
14.5k
                    {
2410
14.5k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2411
14.5k
                                                                                    src_strd,
2412
14.5k
                                                                                    pu1_src_left_chroma,
2413
14.5k
                                                                                    pu1_src_top_chroma,
2414
14.5k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2415
14.5k
                                                                                    ps_sao->b5_cb_band_pos,
2416
14.5k
                                                                                    ps_sao->b5_cr_band_pos,
2417
14.5k
                                                                                    ai1_offset_cb,
2418
14.5k
                                                                                    ai1_offset_cr,
2419
14.5k
                                                                                    sao_wd_chroma,
2420
14.5k
                                                                                    sao_ht_chroma
2421
14.5k
                                                                                   );
2422
14.5k
                    }
2423
14.9k
                }
2424
2425
18.7k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2426
18.7k
                {
2427
18.7k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2428
18.7k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2429
18.7k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2430
18.7k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2431
2432
18.7k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2433
18.7k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2434
18.7k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2435
18.7k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2436
2437
168k
                    for(i = 0; i < 8; i++)
2438
149k
                    {
2439
149k
                        au1_avail_chroma[i] = 255;
2440
149k
                        au1_tile_slice_boundary[i] = 0;
2441
149k
                        au4_idx_l[i] = 0;
2442
149k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2443
149k
                    }
2444
                    /*In case of slices*/
2445
18.7k
                    {
2446
18.7k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2447
9.47k
                        {
2448
9.47k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2449
9.47k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2450
2451
9.47k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2452
9.47k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2453
2454
9.47k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2455
9.47k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2456
2457
9.47k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2458
9.47k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2459
2460
9.47k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2461
9.47k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2462
2463
9.47k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2464
2.31k
                            {
2465
2.31k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2466
135
                                {
2467
135
                                    au4_idx_l[2] = -1;
2468
135
                                    au4_idx_l[4] = -1;
2469
135
                                    au4_idx_l[5] = -1;
2470
135
                                }
2471
2.17k
                                else
2472
2.17k
                                {
2473
2.17k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2474
2.17k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2475
2.17k
                                }
2476
2.31k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2477
2.31k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2478
2.31k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2479
2.31k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2480
2481
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2482
2.31k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2483
135
                                {
2484
135
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2485
135
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2486
135
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2487
135
                                }
2488
2.17k
                                else
2489
2.17k
                                {
2490
2.17k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2491
2.17k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2492
2.17k
                                }
2493
2494
2.31k
                                if(au4_idx_l[5] > idx_l)
2495
0
                                {
2496
0
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2497
0
                                }
2498
2499
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2500
2.31k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2501
2.31k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2502
2.31k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2503
                                /*
2504
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2505
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2506
                                 */
2507
20.8k
                                for(i = 0; i < 8; i++)
2508
18.4k
                                {
2509
                                    /*Sets the edges that lie on the slice/tile boundary*/
2510
18.4k
                                    if(au4_idx_l[i] != idx_l)
2511
5.45k
                                    {
2512
5.45k
                                        au1_tile_slice_boundary[i] = 1;
2513
5.45k
                                    }
2514
13.0k
                                    else
2515
13.0k
                                    {
2516
13.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2517
13.0k
                                    }
2518
18.4k
                                }
2519
                                /*Reset indices*/
2520
20.8k
                                for(i = 0; i < 8; i++)
2521
18.4k
                                {
2522
18.4k
                                    au4_idx_l[i] = 0;
2523
18.4k
                                }
2524
2.31k
                            }
2525
9.47k
                            if(ps_pps->i1_tiles_enabled_flag)
2526
7.21k
                            {
2527
                                /* Calculate availability flags at slice boundary */
2528
7.21k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2529
3.17k
                                {
2530
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2531
3.17k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2532
3.05k
                                    {
2533
3.05k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2534
1.12k
                                        {
2535
1.12k
                                            au4_idx_l[2] = -1;
2536
1.12k
                                            au4_idx_l[4] = -1;
2537
1.12k
                                            au4_idx_l[5] = -1;
2538
1.12k
                                        }
2539
1.93k
                                        else
2540
1.93k
                                        {
2541
1.93k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2542
1.93k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2543
1.93k
                                        }
2544
2545
3.05k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2546
3.05k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2547
3.05k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2548
3.05k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2549
2550
27.4k
                                        for(i = 0; i < 8; i++)
2551
24.4k
                                        {
2552
                                            /*Sets the edges that lie on the slice/tile boundary*/
2553
24.4k
                                            if(au4_idx_l[i] != idx_l)
2554
9.57k
                                            {
2555
9.57k
                                                au1_tile_slice_boundary[i] |= 1;
2556
9.57k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2557
9.57k
                                            }
2558
24.4k
                                        }
2559
3.05k
                                    }
2560
3.17k
                                }
2561
7.21k
                            }
2562
85.2k
                            for(i = 0; i < 8; i++)
2563
75.8k
                            {
2564
                                /*Sets the edges that lie on the slice/tile boundary*/
2565
75.8k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2566
10.0k
                                {
2567
10.0k
                                    au1_avail_chroma[i] = 0;
2568
10.0k
                                }
2569
75.8k
                            }
2570
9.47k
                        }
2571
18.7k
                    }
2572
18.7k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2573
117
                    {
2574
117
                        au1_avail_chroma[0] = 0;
2575
117
                        au1_avail_chroma[4] = 0;
2576
117
                        au1_avail_chroma[6] = 0;
2577
117
                    }
2578
2579
18.7k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2580
0
                    {
2581
0
                        au1_avail_chroma[1] = 0;
2582
0
                        au1_avail_chroma[5] = 0;
2583
0
                        au1_avail_chroma[7] = 0;
2584
0
                    }
2585
2586
18.7k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2587
4.19k
                    {
2588
4.19k
                        au1_avail_chroma[2] = 0;
2589
4.19k
                        au1_avail_chroma[4] = 0;
2590
4.19k
                        au1_avail_chroma[5] = 0;
2591
4.19k
                    }
2592
2593
18.7k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2594
2.74k
                    {
2595
2.74k
                        au1_avail_chroma[3] = 0;
2596
2.74k
                        au1_avail_chroma[6] = 0;
2597
2.74k
                        au1_avail_chroma[7] = 0;
2598
2.74k
                    }
2599
2600
18.7k
                    {
2601
18.7k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2602
18.7k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2603
18.7k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2604
18.7k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2605
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2606
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2607
18.7k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2608
84
                        {
2609
84
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2610
84
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2611
84
                        }
2612
2613
2614
18.7k
                        if(chroma_yuv420sp_vu)
2615
812
                        {
2616
812
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2617
812
                                                                                 src_strd,
2618
812
                                                                                 pu1_src_left_chroma,
2619
812
                                                                                 pu1_src_top_chroma,
2620
812
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2621
812
                                                                                 au1_src_top_right,
2622
812
                                                                                 au1_src_bot_left,
2623
812
                                                                                 au1_avail_chroma,
2624
812
                                                                                 ai1_offset_cr,
2625
812
                                                                                 ai1_offset_cb,
2626
812
                                                                                 sao_wd_chroma,
2627
812
                                                                                 sao_ht_chroma);
2628
812
                        }
2629
17.9k
                        else
2630
17.9k
                        {
2631
17.9k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2632
17.9k
                                                                                 src_strd,
2633
17.9k
                                                                                 pu1_src_left_chroma,
2634
17.9k
                                                                                 pu1_src_top_chroma,
2635
17.9k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2636
17.9k
                                                                                 au1_src_top_right,
2637
17.9k
                                                                                 au1_src_bot_left,
2638
17.9k
                                                                                 au1_avail_chroma,
2639
17.9k
                                                                                 ai1_offset_cb,
2640
17.9k
                                                                                 ai1_offset_cr,
2641
17.9k
                                                                                 sao_wd_chroma,
2642
17.9k
                                                                                 sao_ht_chroma);
2643
17.9k
                        }
2644
18.7k
                    }
2645
2646
18.7k
                }
2647
243k
            }
2648
4.31M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2649
532k
            {
2650
6.80M
                for(row = 0; row < sao_ht_chroma; row++)
2651
6.27M
                {
2652
6.27M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2653
6.27M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2654
6.27M
                }
2655
532k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2656
532k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2657
2658
532k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2659
532k
            }
2660
2661
4.55M
        }
2662
5.06M
        pu1_src_luma += sao_wd_luma;
2663
5.06M
        pu1_src_chroma += sao_wd_chroma;
2664
5.06M
        ps_sao += 1;
2665
5.06M
    }
2666
2667
2668
    /* Current CTB */
2669
5.25M
    {
2670
5.25M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2671
5.25M
        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2672
5.25M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2673
5.25M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2674
5.25M
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2675
5.25M
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2676
5.25M
        WORD32 au4_idx_c[8], idx_c;
2677
2678
5.25M
        WORD32 remaining_rows;
2679
5.25M
        WORD32 remaining_cols;
2680
2681
5.25M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2682
5.25M
        if(remaining_cols <= SAO_SHIFT_CTB)
2683
190k
        {
2684
190k
            sao_wd_luma += remaining_cols;
2685
190k
        }
2686
5.25M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2687
5.25M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2688
190k
        {
2689
190k
            sao_wd_chroma += remaining_cols;
2690
190k
        }
2691
2692
5.25M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2693
5.25M
        if(remaining_rows <= SAO_SHIFT_CTB)
2694
487k
        {
2695
487k
            sao_ht_luma += remaining_rows;
2696
487k
        }
2697
5.25M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2698
5.25M
        if(remaining_rows <= SAO_SHIFT_CTB)
2699
487k
        {
2700
487k
            sao_ht_chroma += remaining_rows;
2701
487k
        }
2702
2703
5.25M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704
5.25M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2705
5.25M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
5.25M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2707
2708
5.25M
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2709
5.25M
        {
2710
5.25M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2711
415k
            {
2712
415k
                if(0 == ps_sao->b3_y_type_idx)
2713
344k
                {
2714
                    /* Update left, top and top-left */
2715
11.1M
                    for(row = 0; row < sao_ht_luma; row++)
2716
10.7M
                    {
2717
10.7M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2718
10.7M
                    }
2719
344k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2720
2721
344k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2722
2723
344k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2724
2725
344k
                }
2726
2727
71.3k
                else if(1 == ps_sao->b3_y_type_idx)
2728
35.3k
                {
2729
35.3k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2730
35.3k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2731
35.3k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2732
35.3k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2733
2734
35.3k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2735
35.3k
                                                                              src_strd,
2736
35.3k
                                                                              pu1_src_left_luma,
2737
35.3k
                                                                              pu1_src_top_luma,
2738
35.3k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2739
35.3k
                                                                              ps_sao->b5_y_band_pos,
2740
35.3k
                                                                              ai1_offset_y,
2741
35.3k
                                                                              sao_wd_luma,
2742
35.3k
                                                                              sao_ht_luma
2743
35.3k
                                                                             );
2744
35.3k
                }
2745
2746
36.0k
                else // if(2 <= ps_sao->b3_y_type_idx)
2747
36.0k
                {
2748
36.0k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2749
36.0k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2750
36.0k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2751
36.0k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2752
2753
324k
                    for(i = 0; i < 8; i++)
2754
288k
                    {
2755
288k
                        au1_avail_luma[i] = 255;
2756
288k
                        au1_tile_slice_boundary[i] = 0;
2757
288k
                        au4_idx_c[i] = 0;
2758
288k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2759
288k
                    }
2760
                    /******************************************************************
2761
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2762
                     *
2763
                     *
2764
                     *          ____________
2765
                     *         |    |       |
2766
                     *         |    | C_T   |
2767
                     *         |____|_______|____
2768
                     *         |    |       |    |
2769
                     *         | C_L|   C   | C_R|
2770
                     *         |____|_______|    |
2771
                     *              |  C_D       |
2772
                     *              |            |
2773
                     *              |____________|
2774
                     *
2775
                     *****************************************************************/
2776
2777
                    /*In case of slices*/
2778
36.0k
                    {
2779
36.0k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2780
17.4k
                        {
2781
17.4k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2782
17.4k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2783
2784
17.4k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2785
17.4k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2786
2787
17.4k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2788
17.4k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2789
2790
17.4k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2791
17.4k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2792
2793
17.4k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2794
17.4k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2795
2796
17.4k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2797
8.74k
                            {
2798
8.74k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2799
1.14k
                                {
2800
1.14k
                                    au4_idx_c[6] = -1;
2801
1.14k
                                    au4_idx_c[0] = -1;
2802
1.14k
                                    au4_idx_c[4] = -1;
2803
1.14k
                                }
2804
7.60k
                                else
2805
7.60k
                                {
2806
7.60k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2807
7.60k
                                }
2808
2809
8.74k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2810
433
                                {
2811
433
                                    au4_idx_c[2] = -1;
2812
433
                                    au4_idx_c[5] = -1;
2813
433
                                    au4_idx_c[4] = -1;
2814
433
                                }
2815
8.30k
                                else
2816
8.30k
                                {
2817
8.30k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2818
8.30k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2819
8.30k
                                }
2820
8.74k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2821
8.74k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2822
8.74k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2823
2824
8.74k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2825
1.14k
                                {
2826
1.14k
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2827
1.14k
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2828
1.14k
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2829
1.14k
                                }
2830
7.60k
                                else
2831
7.60k
                                {
2832
7.60k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2833
7.60k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2834
7.60k
                                }
2835
8.74k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2836
433
                                {
2837
433
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2838
433
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2839
433
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2840
433
                                }
2841
8.30k
                                else
2842
8.30k
                                {
2843
8.30k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2844
8.30k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2845
8.30k
                                }
2846
8.74k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2847
8.74k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
8.74k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2849
2850
8.74k
                                if(au4_idx_c[6] < idx_c)
2851
1.78k
                                {
2852
1.78k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2853
1.78k
                                }
2854
2855
                                /*
2856
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2857
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2858
                                 */
2859
78.6k
                                for(i = 0; i < 8; i++)
2860
69.9k
                                {
2861
                                    /*Sets the edges that lie on the slice/tile boundary*/
2862
69.9k
                                    if(au4_idx_c[i] != idx_c)
2863
19.6k
                                    {
2864
19.6k
                                        au1_tile_slice_boundary[i] = 1;
2865
19.6k
                                    }
2866
50.2k
                                    else
2867
50.2k
                                    {
2868
50.2k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2869
50.2k
                                    }
2870
69.9k
                                }
2871
                                /*Reset indices*/
2872
78.6k
                                for(i = 0; i < 8; i++)
2873
69.9k
                                {
2874
69.9k
                                    au4_idx_c[i] = 0;
2875
69.9k
                                }
2876
8.74k
                            }
2877
2878
17.4k
                            if(ps_pps->i1_tiles_enabled_flag)
2879
8.84k
                            {
2880
                                /* Calculate availability flags at slice boundary */
2881
8.84k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2882
4.11k
                                {
2883
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2884
4.11k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2885
3.95k
                                    {
2886
3.95k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2887
449
                                        {
2888
449
                                            au4_idx_c[6] = -1;
2889
449
                                            au4_idx_c[0] = -1;
2890
449
                                            au4_idx_c[4] = -1;
2891
449
                                        }
2892
3.50k
                                        else
2893
3.50k
                                        {
2894
3.50k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2895
3.50k
                                        }
2896
2897
3.95k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2898
1.16k
                                        {
2899
1.16k
                                            au4_idx_c[2] = -1;
2900
1.16k
                                            au4_idx_c[5] = -1;
2901
1.16k
                                            au4_idx_c[4] = -1;
2902
1.16k
                                        }
2903
2.78k
                                        else
2904
2.78k
                                        {
2905
2.78k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2906
2.78k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2907
2.78k
                                        }
2908
3.95k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2909
3.95k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2910
3.95k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2911
2912
35.5k
                                        for(i = 0; i < 8; i++)
2913
31.6k
                                        {
2914
                                            /*Sets the edges that lie on the slice/tile boundary*/
2915
31.6k
                                            if(au4_idx_c[i] != idx_c)
2916
13.4k
                                            {
2917
13.4k
                                                au1_tile_slice_boundary[i] |= 1;
2918
13.4k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2919
13.4k
                                            }
2920
31.6k
                                        }
2921
3.95k
                                    }
2922
4.11k
                                }
2923
8.84k
                            }
2924
2925
157k
                            for(i = 0; i < 8; i++)
2926
139k
                            {
2927
                                /*Sets the edges that lie on the slice/tile boundary*/
2928
139k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2929
16.4k
                                {
2930
16.4k
                                    au1_avail_luma[i] = 0;
2931
16.4k
                                }
2932
139k
                            }
2933
2934
17.4k
                        }
2935
36.0k
                    }
2936
36.0k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2937
4.61k
                    {
2938
4.61k
                        au1_avail_luma[0] = 0;
2939
4.61k
                        au1_avail_luma[4] = 0;
2940
4.61k
                        au1_avail_luma[6] = 0;
2941
4.61k
                    }
2942
2943
36.0k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2944
4.04k
                    {
2945
4.04k
                        au1_avail_luma[1] = 0;
2946
4.04k
                        au1_avail_luma[5] = 0;
2947
4.04k
                        au1_avail_luma[7] = 0;
2948
4.04k
                    }
2949
2950
36.0k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2951
8.72k
                    {
2952
8.72k
                        au1_avail_luma[2] = 0;
2953
8.72k
                        au1_avail_luma[4] = 0;
2954
8.72k
                        au1_avail_luma[5] = 0;
2955
8.72k
                    }
2956
2957
36.0k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2958
3.99k
                    {
2959
3.99k
                        au1_avail_luma[3] = 0;
2960
3.99k
                        au1_avail_luma[6] = 0;
2961
3.99k
                        au1_avail_luma[7] = 0;
2962
3.99k
                    }
2963
2964
36.0k
                    {
2965
36.0k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2966
36.0k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2967
2968
36.0k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2969
36.0k
                                                                          src_strd,
2970
36.0k
                                                                          pu1_src_left_luma,
2971
36.0k
                                                                          pu1_src_top_luma,
2972
36.0k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2973
36.0k
                                                                          au1_src_top_right,
2974
36.0k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2975
36.0k
                                                                          au1_avail_luma,
2976
36.0k
                                                                          ai1_offset_y,
2977
36.0k
                                                                          sao_wd_luma,
2978
36.0k
                                                                          sao_ht_luma);
2979
36.0k
                    }
2980
36.0k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2981
36.0k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2982
36.0k
                }
2983
415k
            }
2984
4.83M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2985
927k
            {
2986
                /* Update left, top and top-left */
2987
20.4M
                for(row = 0; row < sao_ht_luma; row++)
2988
19.5M
                {
2989
19.5M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2990
19.5M
                }
2991
927k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2992
2993
927k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2994
2995
927k
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2996
927k
            }
2997
5.25M
        }
2998
2999
5.25M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && (0 != sao_wd_chroma) && (0 != sao_ht_chroma))
3000
4.72M
        {
3001
4.72M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3002
264k
            {
3003
264k
                if(0 == ps_sao->b3_cb_type_idx)
3004
226k
                {
3005
4.06M
                    for(row = 0; row < sao_ht_chroma; row++)
3006
3.83M
                    {
3007
3.83M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3008
3.83M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3009
3.83M
                    }
3010
226k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3011
226k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3012
3013
226k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3014
3015
226k
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3016
226k
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3017
226k
                }
3018
3019
37.8k
                else if(1 == ps_sao->b3_cb_type_idx)
3020
16.6k
                {
3021
16.6k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3022
16.6k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3023
16.6k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3024
16.6k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3025
3026
16.6k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3027
16.6k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3028
16.6k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3029
16.6k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3030
3031
16.6k
                    if(chroma_yuv420sp_vu)
3032
479
                    {
3033
479
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3034
479
                                                                                    src_strd,
3035
479
                                                                                    pu1_src_left_chroma,
3036
479
                                                                                    pu1_src_top_chroma,
3037
479
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3038
479
                                                                                    ps_sao->b5_cr_band_pos,
3039
479
                                                                                    ps_sao->b5_cb_band_pos,
3040
479
                                                                                    ai1_offset_cr,
3041
479
                                                                                    ai1_offset_cb,
3042
479
                                                                                    sao_wd_chroma,
3043
479
                                                                                    sao_ht_chroma
3044
479
                                                                                   );
3045
479
                    }
3046
16.2k
                    else
3047
16.2k
                    {
3048
16.2k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3049
16.2k
                                                                                    src_strd,
3050
16.2k
                                                                                    pu1_src_left_chroma,
3051
16.2k
                                                                                    pu1_src_top_chroma,
3052
16.2k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3053
16.2k
                                                                                    ps_sao->b5_cb_band_pos,
3054
16.2k
                                                                                    ps_sao->b5_cr_band_pos,
3055
16.2k
                                                                                    ai1_offset_cb,
3056
16.2k
                                                                                    ai1_offset_cr,
3057
16.2k
                                                                                    sao_wd_chroma,
3058
16.2k
                                                                                    sao_ht_chroma
3059
16.2k
                                                                                   );
3060
16.2k
                    }
3061
16.6k
                }
3062
3063
21.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
3064
21.1k
                {
3065
21.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3066
21.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3067
21.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3068
21.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3069
3070
21.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3071
21.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3072
21.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3073
21.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3074
3075
190k
                    for(i = 0; i < 8; i++)
3076
169k
                    {
3077
169k
                        au1_avail_chroma[i] = 255;
3078
169k
                        au1_tile_slice_boundary[i] = 0;
3079
169k
                        au4_idx_c[i] = 0;
3080
169k
                        au4_ilf_across_tile_slice_enable[i] = 1;
3081
169k
                    }
3082
21.1k
                    {
3083
21.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3084
10.3k
                        {
3085
10.3k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3086
10.3k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3087
3088
10.3k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3089
10.3k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3090
3091
10.3k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3092
10.3k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3093
3094
10.3k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3095
10.3k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3096
3097
10.3k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
3098
10.3k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
3099
3100
10.3k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3101
2.37k
                            {
3102
2.37k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3103
235
                                {
3104
235
                                    au4_idx_c[0] = -1;
3105
235
                                    au4_idx_c[4] = -1;
3106
235
                                    au4_idx_c[6] = -1;
3107
235
                                }
3108
2.14k
                                else
3109
2.14k
                                {
3110
2.14k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3111
2.14k
                                }
3112
3113
2.37k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3114
141
                                {
3115
141
                                    au4_idx_c[2] = -1;
3116
141
                                    au4_idx_c[4] = -1;
3117
141
                                    au4_idx_c[5] = -1;
3118
141
                                }
3119
2.23k
                                else
3120
2.23k
                                {
3121
2.23k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3122
2.23k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3123
2.23k
                                }
3124
2.37k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3125
2.37k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3126
2.37k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3127
3128
2.37k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3129
235
                                {
3130
235
                                    au4_ilf_across_tile_slice_enable[0] = 0;
3131
235
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3132
235
                                    au4_ilf_across_tile_slice_enable[6] = 0;
3133
235
                                }
3134
2.14k
                                else
3135
2.14k
                                {
3136
2.14k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3137
2.14k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3138
2.14k
                                }
3139
3140
2.37k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3141
141
                                {
3142
141
                                    au4_ilf_across_tile_slice_enable[2] = 0;
3143
141
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3144
141
                                    au4_ilf_across_tile_slice_enable[5] = 0;
3145
141
                                }
3146
2.23k
                                else
3147
2.23k
                                {
3148
2.23k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3149
2.23k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3150
2.23k
                                }
3151
3152
2.37k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3153
2.37k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
2.37k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3155
3156
2.37k
                                if(idx_c > au4_idx_c[6])
3157
416
                                {
3158
416
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3159
416
                                }
3160
3161
                                /*
3162
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3163
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3164
                                 */
3165
21.3k
                                for(i = 0; i < 8; i++)
3166
19.0k
                                {
3167
                                    /*Sets the edges that lie on the slice/tile boundary*/
3168
19.0k
                                    if(au4_idx_c[i] != idx_c)
3169
6.22k
                                    {
3170
6.22k
                                        au1_tile_slice_boundary[i] = 1;
3171
6.22k
                                    }
3172
12.7k
                                    else
3173
12.7k
                                    {
3174
12.7k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
3175
12.7k
                                    }
3176
19.0k
                                }
3177
                                /*Reset indices*/
3178
21.3k
                                for(i = 0; i < 8; i++)
3179
19.0k
                                {
3180
19.0k
                                    au4_idx_c[i] = 0;
3181
19.0k
                                }
3182
2.37k
                            }
3183
3184
10.3k
                            if(ps_pps->i1_tiles_enabled_flag)
3185
8.04k
                            {
3186
                                /* Calculate availability flags at slice boundary */
3187
8.04k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3188
3.45k
                                {
3189
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3190
3.45k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3191
3.31k
                                    {
3192
3.31k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3193
662
                                        {
3194
662
                                            au4_idx_c[6] = -1;
3195
662
                                            au4_idx_c[0] = -1;
3196
662
                                            au4_idx_c[4] = -1;
3197
662
                                        }
3198
2.65k
                                        else
3199
2.65k
                                        {
3200
2.65k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3201
2.65k
                                        }
3202
3203
3.31k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3204
1.03k
                                        {
3205
1.03k
                                            au4_idx_c[2] = -1;
3206
1.03k
                                            au4_idx_c[5] = -1;
3207
1.03k
                                            au4_idx_c[4] = -1;
3208
1.03k
                                        }
3209
2.28k
                                        else
3210
2.28k
                                        {
3211
2.28k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3212
2.28k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3213
2.28k
                                        }
3214
3.31k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3215
3.31k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3216
3.31k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3217
3218
29.8k
                                        for(i = 0; i < 8; i++)
3219
26.5k
                                        {
3220
                                            /*Sets the edges that lie on the slice/tile boundary*/
3221
26.5k
                                            if(au4_idx_c[i] != idx_c)
3222
10.6k
                                            {
3223
10.6k
                                                au1_tile_slice_boundary[i] |= 1;
3224
10.6k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3225
10.6k
                                            }
3226
26.5k
                                        }
3227
3.31k
                                    }
3228
3.45k
                                }
3229
8.04k
                            }
3230
3231
93.3k
                            for(i = 0; i < 8; i++)
3232
83.0k
                            {
3233
                                /*Sets the edges that lie on the slice/tile boundary*/
3234
83.0k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3235
11.3k
                                {
3236
11.3k
                                    au1_avail_chroma[i] = 0;
3237
11.3k
                                }
3238
83.0k
                            }
3239
10.3k
                        }
3240
21.1k
                    }
3241
3242
21.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
3243
3.20k
                    {
3244
3.20k
                        au1_avail_chroma[0] = 0;
3245
3.20k
                        au1_avail_chroma[4] = 0;
3246
3.20k
                        au1_avail_chroma[6] = 0;
3247
3.20k
                    }
3248
3249
21.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3250
2.65k
                    {
3251
2.65k
                        au1_avail_chroma[1] = 0;
3252
2.65k
                        au1_avail_chroma[5] = 0;
3253
2.65k
                        au1_avail_chroma[7] = 0;
3254
2.65k
                    }
3255
3256
21.1k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
3257
4.83k
                    {
3258
4.83k
                        au1_avail_chroma[2] = 0;
3259
4.83k
                        au1_avail_chroma[4] = 0;
3260
4.83k
                        au1_avail_chroma[5] = 0;
3261
4.83k
                    }
3262
3263
21.1k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3264
3.29k
                    {
3265
3.29k
                        au1_avail_chroma[3] = 0;
3266
3.29k
                        au1_avail_chroma[6] = 0;
3267
3.29k
                        au1_avail_chroma[7] = 0;
3268
3.29k
                    }
3269
3270
21.1k
                    {
3271
21.1k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3272
21.1k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3273
3274
21.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3275
21.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3276
3277
21.1k
                        if(chroma_yuv420sp_vu)
3278
1.06k
                        {
3279
1.06k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3280
1.06k
                                                                                 src_strd,
3281
1.06k
                                                                                 pu1_src_left_chroma,
3282
1.06k
                                                                                 pu1_src_top_chroma,
3283
1.06k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3284
1.06k
                                                                                 au1_src_top_right,
3285
1.06k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3286
1.06k
                                                                                 au1_avail_chroma,
3287
1.06k
                                                                                 ai1_offset_cr,
3288
1.06k
                                                                                 ai1_offset_cb,
3289
1.06k
                                                                                 sao_wd_chroma,
3290
1.06k
                                                                                 sao_ht_chroma);
3291
1.06k
                        }
3292
20.1k
                        else
3293
20.1k
                        {
3294
20.1k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3295
20.1k
                                                                                 src_strd,
3296
20.1k
                                                                                 pu1_src_left_chroma,
3297
20.1k
                                                                                 pu1_src_top_chroma,
3298
20.1k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3299
20.1k
                                                                                 au1_src_top_right,
3300
20.1k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3301
20.1k
                                                                                 au1_avail_chroma,
3302
20.1k
                                                                                 ai1_offset_cb,
3303
20.1k
                                                                                 ai1_offset_cr,
3304
20.1k
                                                                                 sao_wd_chroma,
3305
20.1k
                                                                                 sao_ht_chroma);
3306
20.1k
                        }
3307
21.1k
                    }
3308
3309
21.1k
                }
3310
264k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3311
264k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3312
3313
264k
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3314
264k
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3315
264k
            }
3316
4.46M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3317
545k
            {
3318
7.08M
                for(row = 0; row < sao_ht_chroma; row++)
3319
6.54M
                {
3320
6.54M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3321
6.54M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3322
6.54M
                }
3323
545k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3324
545k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3325
3326
545k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3327
3328
545k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3329
545k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3330
545k
            }
3331
3332
4.72M
        }
3333
5.25M
    }
3334
3335
3336
3337
3338
/* If no loop filter is enabled copy the backed up values */
3339
5.25M
    {
3340
        /* Luma */
3341
5.25M
        if(no_loop_filter_enabled_luma)
3342
152k
        {
3343
152k
            UWORD32 u4_no_loop_filter_flag;
3344
152k
            WORD32 loop_filter_bit_pos;
3345
152k
            WORD32 log2_min_cu = 3;
3346
152k
            WORD32 min_cu = (1 << log2_min_cu);
3347
152k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3348
152k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3349
152k
            WORD32 sao_blk_wd = ctb_size;
3350
152k
            WORD32 remaining_rows;
3351
152k
            WORD32 remaining_cols;
3352
3353
152k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354
152k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3355
152k
            if(remaining_rows <= SAO_SHIFT_CTB)
3356
13.8k
                sao_blk_ht += remaining_rows;
3357
152k
            if(remaining_cols <= SAO_SHIFT_CTB)
3358
4.10k
                sao_blk_wd += remaining_cols;
3359
3360
152k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3361
152k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3362
3363
152k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3364
3365
152k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3366
152k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3367
152k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3368
145k
                loop_filter_bit_pos -= 1;
3369
3370
152k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3371
152k
                            (loop_filter_bit_pos >> 3);
3372
3373
152k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3374
1.29M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3375
1.14M
            {
3376
1.14M
                WORD32 tmp_wd = sao_blk_wd;
3377
3378
1.14M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3379
1.14M
                                (loop_filter_bit_pos & 7);
3380
1.14M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3381
3382
1.14M
                if(u4_no_loop_filter_flag)
3383
1.00M
                {
3384
2.18M
                    while(tmp_wd > 0)
3385
1.18M
                    {
3386
1.18M
                        if(CTZ(u4_no_loop_filter_flag))
3387
155k
                        {
3388
155k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389
155k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3390
155k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3391
155k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3392
155k
                        }
3393
1.02M
                        else
3394
1.02M
                        {
3395
9.08M
                            for(row = 0; row < min_cu; row++)
3396
8.06M
                            {
3397
422M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3398
414M
                                {
3399
414M
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3400
414M
                                }
3401
8.06M
                            }
3402
1.02M
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403
1.02M
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3404
1.02M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3405
1.02M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3406
1.02M
                        }
3407
1.18M
                    }
3408
3409
1.00M
                    pu1_src_tmp_luma -= sao_blk_wd;
3410
1.00M
                    pu1_src_backup_luma -= sao_blk_wd;
3411
1.00M
                }
3412
3413
1.14M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3414
1.14M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3415
1.14M
            }
3416
152k
        }
3417
3418
        /* Chroma */
3419
5.25M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && no_loop_filter_enabled_chroma)
3420
152k
        {
3421
152k
            UWORD32 u4_no_loop_filter_flag;
3422
152k
            WORD32 loop_filter_bit_pos;
3423
152k
            WORD32 log2_min_cu = 3;
3424
152k
            WORD32 min_cu = (1 << log2_min_cu);
3425
152k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3426
152k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3427
152k
            WORD32 sao_blk_wd = ctb_size;
3428
152k
            WORD32 remaining_rows;
3429
152k
            WORD32 remaining_cols;
3430
3431
152k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432
152k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3433
152k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3434
13.9k
                sao_blk_ht += remaining_rows;
3435
152k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3436
4.09k
                sao_blk_wd += remaining_cols;
3437
3438
152k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3439
152k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3440
3441
152k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3442
3443
152k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3444
152k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3445
152k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3446
145k
                loop_filter_bit_pos -= 2;
3447
3448
152k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3449
152k
                            (loop_filter_bit_pos >> 3);
3450
3451
152k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3452
1.27M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3453
1.12M
            {
3454
1.12M
                WORD32 tmp_wd = sao_blk_wd;
3455
3456
1.12M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3457
1.12M
                                (loop_filter_bit_pos & 7);
3458
1.12M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3459
3460
1.12M
                if(u4_no_loop_filter_flag)
3461
1.00M
                {
3462
2.17M
                    while(tmp_wd > 0)
3463
1.17M
                    {
3464
1.17M
                        if(CTZ(u4_no_loop_filter_flag))
3465
150k
                        {
3466
150k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467
150k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3468
150k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3469
150k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3470
150k
                        }
3471
1.02M
                        else
3472
1.02M
                        {
3473
5.07M
                            for(row = 0; row < min_cu / 2; row++)
3474
4.05M
                            {
3475
225M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3476
221M
                                {
3477
221M
                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3478
221M
                                }
3479
4.05M
                            }
3480
3481
1.02M
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482
1.02M
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3483
1.02M
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3484
1.02M
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3485
1.02M
                        }
3486
1.17M
                    }
3487
3488
1.00M
                    pu1_src_tmp_chroma -= sao_blk_wd;
3489
1.00M
                    pu1_src_backup_chroma -= sao_blk_wd;
3490
1.00M
                }
3491
3492
1.12M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3493
1.12M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3494
1.12M
            }
3495
152k
        }
3496
5.25M
    }
3497
3498
5.25M
}
3499