Coverage Report

Created: 2026-01-17 06:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
151M
#define SAO_SHIFT_CTB    8
79
80
/**
81
 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82
 */
83
void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84
0
{
85
0
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86
0
    UWORD8 *pu1_src_luma;
87
0
    UWORD8 *pu1_src_chroma;
88
0
    WORD32 src_strd;
89
0
    WORD32 ctb_size;
90
0
    WORD32 log2_ctb_size;
91
0
    sps_t *ps_sps;
92
0
    sao_t *ps_sao;
93
0
    WORD32 row, col;
94
0
    UWORD8 au1_avail_luma[8];
95
0
    UWORD8 au1_avail_chroma[8];
96
0
    WORD32 i;
97
0
    UWORD8 *pu1_src_top_luma;
98
0
    UWORD8 *pu1_src_top_chroma;
99
0
    UWORD8 *pu1_src_left_luma;
100
0
    UWORD8 *pu1_src_left_chroma;
101
0
    UWORD8 au1_src_top_right[2];
102
0
    UWORD8 au1_src_bot_left[2];
103
0
    UWORD8 *pu1_no_loop_filter_flag;
104
0
    WORD32 loop_filter_strd;
105
106
    /* Only first 5 values are used, but arrays are large
107
     enough so that SIMD functions can read 64 bits at a time */
108
0
    WORD8 ai1_offset_y[8] = {0};
109
0
    WORD8 ai1_offset_cb[8] = {0};
110
0
    WORD8 ai1_offset_cr[8] = {0};
111
112
0
    PROFILE_DISABLE_SAO();
113
114
0
    ps_sps = ps_sao_ctxt->ps_sps;
115
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
116
0
    ctb_size = (1 << log2_ctb_size);
117
0
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118
0
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119
0
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121
0
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122
0
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124
    /* Current CTB */
125
0
    {
126
0
        WORD32 sao_wd_luma;
127
0
        WORD32 sao_wd_chroma;
128
0
        WORD32 sao_ht_luma;
129
0
        WORD32 sao_ht_chroma;
130
131
0
        WORD32 remaining_rows;
132
0
        WORD32 remaining_cols;
133
134
0
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135
0
        sao_wd_luma = MIN(ctb_size, remaining_cols);
136
0
        sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138
0
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139
0
        sao_ht_luma = MIN(ctb_size, remaining_rows);
140
0
        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142
0
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143
0
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144
0
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145
0
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147
0
        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148
0
                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149
0
                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151
0
        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152
0
        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153
0
        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154
0
        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156
0
        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157
0
        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158
0
        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159
0
        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161
0
        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162
0
        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163
0
        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164
0
        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166
0
        for(i = 0; i < 8; i++)
167
0
        {
168
0
            au1_avail_luma[i] = 255;
169
0
            au1_avail_chroma[i] = 255;
170
0
        }
171
172
173
0
        if(0 == ps_sao_ctxt->i4_ctb_x)
174
0
        {
175
0
            au1_avail_luma[0] = 0;
176
0
            au1_avail_luma[4] = 0;
177
0
            au1_avail_luma[6] = 0;
178
179
0
            au1_avail_chroma[0] = 0;
180
0
            au1_avail_chroma[4] = 0;
181
0
            au1_avail_chroma[6] = 0;
182
0
        }
183
184
0
        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185
0
        {
186
0
            au1_avail_luma[1] = 0;
187
0
            au1_avail_luma[5] = 0;
188
0
            au1_avail_luma[7] = 0;
189
190
0
            au1_avail_chroma[1] = 0;
191
0
            au1_avail_chroma[5] = 0;
192
0
            au1_avail_chroma[7] = 0;
193
0
        }
194
195
0
        if(0 == ps_sao_ctxt->i4_ctb_y)
196
0
        {
197
0
            au1_avail_luma[2] = 0;
198
0
            au1_avail_luma[4] = 0;
199
0
            au1_avail_luma[5] = 0;
200
201
0
            au1_avail_chroma[2] = 0;
202
0
            au1_avail_chroma[4] = 0;
203
0
            au1_avail_chroma[5] = 0;
204
0
        }
205
206
0
        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207
0
        {
208
0
            au1_avail_luma[3] = 0;
209
0
            au1_avail_luma[6] = 0;
210
0
            au1_avail_luma[7] = 0;
211
212
0
            au1_avail_chroma[3] = 0;
213
0
            au1_avail_chroma[6] = 0;
214
0
            au1_avail_chroma[7] = 0;
215
0
        }
216
217
218
0
        if(0 == ps_sao->b3_y_type_idx)
219
0
        {
220
            /* Update left, top and top-left */
221
0
            for(row = 0; row < sao_ht_luma; row++)
222
0
            {
223
0
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224
0
            }
225
0
            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229
0
        }
230
0
        else
231
0
        {
232
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235
0
            WORD32 no_loop_filter_enabled = 0;
236
237
            /* Check the loop filter flags and copy the original values for back up */
238
0
            {
239
0
                UWORD32 u4_no_loop_filter_flag;
240
0
                WORD32 min_cu = 8;
241
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244
0
                {
245
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246
0
                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249
0
                    if(u4_no_loop_filter_flag)
250
0
                    {
251
0
                        WORD32 tmp_wd = sao_wd_luma;
252
0
                        no_loop_filter_enabled = 1;
253
0
                        while(tmp_wd > 0)
254
0
                        {
255
0
                            if(CTZ(u4_no_loop_filter_flag))
256
0
                            {
257
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260
0
                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261
0
                            }
262
0
                            else
263
0
                            {
264
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265
0
                                {
266
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267
0
                                    {
268
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269
0
                                    }
270
0
                                }
271
272
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276
0
                            }
277
0
                        }
278
279
0
                        pu1_src_tmp -= sao_wd_luma;
280
0
                    }
281
282
0
                    pu1_src_tmp += min_cu * src_strd;
283
0
                    pu1_src_copy += min_cu * tmp_strd;
284
0
                }
285
0
            }
286
287
0
            if(1 == ps_sao->b3_y_type_idx)
288
0
            {
289
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290
0
                                                                          src_strd,
291
0
                                                                          pu1_src_left_luma,
292
0
                                                                          pu1_src_top_luma,
293
0
                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294
0
                                                                          ps_sao->b5_y_band_pos,
295
0
                                                                          ai1_offset_y,
296
0
                                                                          sao_wd_luma,
297
0
                                                                          sao_ht_luma);
298
0
            }
299
0
            else // if(2 <= ps_sao->b3_y_type_idx)
300
0
            {
301
0
                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302
0
                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303
0
                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304
0
                                                                  src_strd,
305
0
                                                                  pu1_src_left_luma,
306
0
                                                                  pu1_src_top_luma,
307
0
                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308
0
                                                                  au1_src_top_right,
309
0
                                                                  au1_src_bot_left,
310
0
                                                                  au1_avail_luma,
311
0
                                                                  ai1_offset_y,
312
0
                                                                  sao_wd_luma,
313
0
                                                                  sao_ht_luma);
314
0
            }
315
316
            /* Check the loop filter flags and copy the original values back if they are set */
317
0
            if(no_loop_filter_enabled)
318
0
            {
319
0
                UWORD32 u4_no_loop_filter_flag;
320
0
                WORD32 min_cu = 8;
321
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324
0
                {
325
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328
0
                    if(u4_no_loop_filter_flag)
329
0
                    {
330
0
                        WORD32 tmp_wd = sao_wd_luma;
331
0
                        while(tmp_wd > 0)
332
0
                        {
333
0
                            if(CTZ(u4_no_loop_filter_flag))
334
0
                            {
335
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339
0
                            }
340
0
                            else
341
0
                            {
342
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343
0
                                {
344
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345
0
                                    {
346
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347
0
                                    }
348
0
                                }
349
350
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354
0
                            }
355
0
                        }
356
357
0
                        pu1_src_tmp -= sao_wd_luma;
358
0
                    }
359
360
0
                    pu1_src_tmp += min_cu * src_strd;
361
0
                    pu1_src_copy += min_cu * tmp_strd;
362
0
                }
363
0
            }
364
365
0
        }
366
367
0
        if(0 == ps_sao->b3_cb_type_idx)
368
0
        {
369
0
            for(row = 0; row < sao_ht_chroma; row++)
370
0
            {
371
0
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372
0
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373
0
            }
374
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378
0
        }
379
0
        else
380
0
        {
381
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384
0
            WORD32 no_loop_filter_enabled = 0;
385
386
            /* Check the loop filter flags and copy the original values for back up */
387
0
            {
388
0
                UWORD32 u4_no_loop_filter_flag;
389
0
                WORD32 min_cu = 4;
390
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393
0
                {
394
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397
0
                    if(u4_no_loop_filter_flag)
398
0
                    {
399
0
                        WORD32 tmp_wd = sao_wd_chroma;
400
0
                        no_loop_filter_enabled = 1;
401
0
                        while(tmp_wd > 0)
402
0
                        {
403
0
                            if(CTZ(u4_no_loop_filter_flag))
404
0
                            {
405
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409
0
                            }
410
0
                            else
411
0
                            {
412
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413
0
                                {
414
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415
0
                                    {
416
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417
0
                                    }
418
0
                                }
419
420
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424
0
                            }
425
0
                        }
426
427
0
                        pu1_src_tmp -= sao_wd_chroma;
428
0
                    }
429
430
0
                    pu1_src_tmp += min_cu * src_strd;
431
0
                    pu1_src_copy += min_cu * tmp_strd;
432
0
                }
433
0
            }
434
435
0
            if(1 == ps_sao->b3_cb_type_idx)
436
0
            {
437
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438
0
                                                                            src_strd,
439
0
                                                                            pu1_src_left_chroma,
440
0
                                                                            pu1_src_top_chroma,
441
0
                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442
0
                                                                            ps_sao->b5_cb_band_pos,
443
0
                                                                            ps_sao->b5_cr_band_pos,
444
0
                                                                            ai1_offset_cb,
445
0
                                                                            ai1_offset_cr,
446
0
                                                                            sao_wd_chroma,
447
0
                                                                            sao_ht_chroma
448
0
                                                                           );
449
0
            }
450
0
            else // if(2 <= ps_sao->b3_cb_type_idx)
451
0
            {
452
0
                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453
0
                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454
0
                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455
0
                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456
0
                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457
0
                                                                     src_strd,
458
0
                                                                     pu1_src_left_chroma,
459
0
                                                                     pu1_src_top_chroma,
460
0
                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461
0
                                                                     au1_src_top_right,
462
0
                                                                     au1_src_bot_left,
463
0
                                                                     au1_avail_chroma,
464
0
                                                                     ai1_offset_cb,
465
0
                                                                     ai1_offset_cr,
466
0
                                                                     sao_wd_chroma,
467
0
                                                                     sao_ht_chroma);
468
0
            }
469
470
            /* Check the loop filter flags and copy the original values back if they are set */
471
0
            if(no_loop_filter_enabled)
472
0
            {
473
0
                UWORD32 u4_no_loop_filter_flag;
474
0
                WORD32 min_cu = 4;
475
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478
0
                {
479
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482
0
                    if(u4_no_loop_filter_flag)
483
0
                    {
484
0
                        WORD32 tmp_wd = sao_wd_chroma;
485
0
                        while(tmp_wd > 0)
486
0
                        {
487
0
                            if(CTZ(u4_no_loop_filter_flag))
488
0
                            {
489
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493
0
                            }
494
0
                            else
495
0
                            {
496
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497
0
                                {
498
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499
0
                                    {
500
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501
0
                                    }
502
0
                                }
503
504
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508
0
                            }
509
0
                        }
510
511
0
                        pu1_src_tmp -= sao_wd_chroma;
512
0
                    }
513
514
0
                    pu1_src_tmp += min_cu * src_strd;
515
0
                    pu1_src_copy += min_cu * tmp_strd;
516
0
                }
517
0
            }
518
519
0
        }
520
521
0
    }
522
0
}
523
524
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525
3.91M
{
526
3.91M
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527
3.91M
    UWORD8 *pu1_src_luma;
528
3.91M
    UWORD8 *pu1_src_chroma;
529
3.91M
    WORD32 src_strd;
530
3.91M
    WORD32 ctb_size;
531
3.91M
    WORD32 log2_ctb_size;
532
3.91M
    sps_t *ps_sps;
533
3.91M
    sao_t *ps_sao;
534
3.91M
    pps_t *ps_pps;
535
3.91M
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536
3.91M
    tile_t *ps_tile;
537
3.91M
    UWORD16 *pu1_slice_idx;
538
3.91M
    UWORD16 *pu1_tile_idx;
539
3.91M
    WORD32 row, col;
540
3.91M
    UWORD8 au1_avail_luma[8];
541
3.91M
    UWORD8 au1_avail_chroma[8];
542
3.91M
    UWORD8 au1_tile_slice_boundary[8];
543
3.91M
    UWORD8 au4_ilf_across_tile_slice_enable[8];
544
3.91M
    WORD32 i;
545
3.91M
    UWORD8 *pu1_src_top_luma;
546
3.91M
    UWORD8 *pu1_src_top_chroma;
547
3.91M
    UWORD8 *pu1_src_left_luma;
548
3.91M
    UWORD8 *pu1_src_left_chroma;
549
3.91M
    UWORD8 au1_src_top_right[2];
550
3.91M
    UWORD8 au1_src_bot_left[2];
551
3.91M
    UWORD8 *pu1_no_loop_filter_flag;
552
3.91M
    UWORD8 *pu1_src_backup_luma;
553
3.91M
    UWORD8 *pu1_src_backup_chroma;
554
3.91M
    WORD32 backup_strd;
555
3.91M
    WORD32 loop_filter_strd;
556
557
3.91M
    WORD32 no_loop_filter_enabled_luma = 0;
558
3.91M
    WORD32 no_loop_filter_enabled_chroma = 0;
559
3.91M
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560
3.91M
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561
3.91M
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
562
3.91M
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563
3.91M
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
564
3.91M
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565
3.91M
    UWORD8  u1_sao_src_top_left_luma_bot_left;
566
3.91M
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567
3.91M
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568
3.91M
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569
    /* Only first 5 values are used, but arrays are large
570
     enough so that SIMD functions can read 64 bits at a time */
571
3.91M
    WORD8 ai1_offset_y[8] = {0};
572
3.91M
    WORD8 ai1_offset_cb[8] = {0};
573
3.91M
    WORD8 ai1_offset_cr[8] = {0};
574
3.91M
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576
3.91M
    PROFILE_DISABLE_SAO();
577
578
3.91M
    ps_sps = ps_sao_ctxt->ps_sps;
579
3.91M
    ps_pps = ps_sao_ctxt->ps_pps;
580
3.91M
    ps_tile = ps_sao_ctxt->ps_tile;
581
582
3.91M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
583
3.91M
    ctb_size = (1 << log2_ctb_size);
584
3.91M
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585
3.91M
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586
3.91M
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588
3.91M
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589
3.91M
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590
3.91M
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591
3.91M
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593
    /*Stores the left value for each row ctbs- Needed for column tiles*/
594
3.91M
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595
3.91M
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596
3.91M
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597
3.91M
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598
3.91M
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599
3.91M
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600
3.91M
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601
3.91M
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602
3.91M
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603
3.91M
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605
3.91M
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606
3.91M
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607
3.91M
    backup_strd = 2 * MAX_CTB_SIZE;
608
609
3.91M
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611
3.91M
    {
612
        /* Check the loop filter flags and copy the original values for back up */
613
        /* Luma */
614
615
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616
         * can belong to different slice with their own sao_enable flag */
617
3.91M
        {
618
3.91M
            UWORD32 u4_no_loop_filter_flag;
619
3.91M
            WORD32 loop_filter_bit_pos;
620
3.91M
            WORD32 log2_min_cu = 3;
621
3.91M
            WORD32 min_cu = (1 << log2_min_cu);
622
3.91M
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623
3.91M
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624
3.91M
            WORD32 sao_blk_wd = ctb_size;
625
3.91M
            WORD32 remaining_rows;
626
3.91M
            WORD32 remaining_cols;
627
628
3.91M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629
3.91M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630
3.91M
            if(remaining_rows <= SAO_SHIFT_CTB)
631
394k
                sao_blk_ht += remaining_rows;
632
3.91M
            if(remaining_cols <= SAO_SHIFT_CTB)
633
132k
                sao_blk_wd += remaining_cols;
634
635
3.91M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636
3.91M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638
3.91M
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640
3.91M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641
3.91M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642
3.91M
            if(ps_sao_ctxt->i4_ctb_x > 0)
643
3.78M
                loop_filter_bit_pos -= 1;
644
645
3.91M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646
3.91M
                            (loop_filter_bit_pos >> 3);
647
648
3.91M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649
31.2M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650
27.3M
            {
651
27.3M
                WORD32 tmp_wd = sao_blk_wd;
652
653
27.3M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654
27.3M
                                (loop_filter_bit_pos & 7);
655
27.3M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657
27.3M
                if(u4_no_loop_filter_flag)
658
872k
                {
659
872k
                    no_loop_filter_enabled_luma = 1;
660
1.87M
                    while(tmp_wd > 0)
661
1.00M
                    {
662
1.00M
                        if(CTZ(u4_no_loop_filter_flag))
663
120k
                        {
664
120k
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665
120k
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666
120k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667
120k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668
120k
                        }
669
884k
                        else
670
884k
                        {
671
7.74M
                            for(row = 0; row < min_cu; row++)
672
6.86M
                            {
673
338M
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674
331M
                                {
675
331M
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676
331M
                                }
677
6.86M
                            }
678
884k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679
884k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680
884k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681
884k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682
884k
                        }
683
1.00M
                    }
684
685
872k
                    pu1_src_tmp_luma -= sao_blk_wd;
686
872k
                    pu1_src_backup_luma -= sao_blk_wd;
687
872k
                }
688
689
27.3M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
690
27.3M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
691
27.3M
            }
692
3.91M
        }
693
694
        /* Chroma */
695
696
3.91M
        if (CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc)
697
3.91M
        {
698
3.91M
            UWORD32 u4_no_loop_filter_flag;
699
3.91M
            WORD32 loop_filter_bit_pos;
700
3.91M
            WORD32 log2_min_cu = 3;
701
3.91M
            WORD32 min_cu = (1 << log2_min_cu);
702
3.91M
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
703
3.91M
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
704
3.91M
            WORD32 sao_blk_wd = ctb_size;
705
3.91M
            WORD32 remaining_rows;
706
3.91M
            WORD32 remaining_cols;
707
708
3.91M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709
3.91M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
710
3.91M
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
711
394k
                sao_blk_ht += remaining_rows;
712
3.91M
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
713
132k
                sao_blk_wd += remaining_cols;
714
715
3.91M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
716
3.91M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
717
718
3.91M
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
719
720
3.91M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
721
3.91M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
722
3.91M
            if(ps_sao_ctxt->i4_ctb_x > 0)
723
3.78M
                loop_filter_bit_pos -= 2;
724
725
3.91M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
726
3.91M
                            (loop_filter_bit_pos >> 3);
727
728
3.91M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
729
31.2M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
730
27.3M
            {
731
27.3M
                WORD32 tmp_wd = sao_blk_wd;
732
733
27.3M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
734
27.3M
                                (loop_filter_bit_pos & 7);
735
27.3M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
736
737
27.3M
                if(u4_no_loop_filter_flag)
738
873k
                {
739
873k
                    no_loop_filter_enabled_chroma = 1;
740
1.87M
                    while(tmp_wd > 0)
741
998k
                    {
742
998k
                        if(CTZ(u4_no_loop_filter_flag))
743
115k
                        {
744
115k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745
115k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
746
115k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
747
115k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
748
115k
                        }
749
882k
                        else
750
882k
                        {
751
4.38M
                            for(row = 0; row < min_cu / 2; row++)
752
3.50M
                            {
753
200M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
754
197M
                                {
755
197M
                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
756
197M
                                }
757
3.50M
                            }
758
759
882k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760
882k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
761
882k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
762
882k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
763
882k
                        }
764
998k
                    }
765
766
873k
                    pu1_src_tmp_chroma -= sao_blk_wd;
767
873k
                    pu1_src_backup_chroma -= sao_blk_wd;
768
873k
                }
769
770
27.3M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
771
27.3M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
772
27.3M
            }
773
3.91M
        }
774
3.91M
    }
775
776
3.91M
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
777
778
    /* Top-left CTB */
779
3.91M
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
780
3.40M
    {
781
3.40M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
782
3.40M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
783
3.40M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
784
3.40M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
785
786
3.40M
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
787
3.40M
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
788
3.40M
        WORD32 au4_idx_tl[8], idx_tl;
789
790
3.40M
        slice_header_t *ps_slice_hdr_top_left;
791
3.40M
        {
792
3.40M
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
793
3.40M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
794
3.40M
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
795
3.40M
        }
796
797
798
3.40M
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
799
3.40M
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
800
3.40M
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
801
3.40M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
802
3.40M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
803
3.40M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
804
3.40M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
805
806
3.40M
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
807
155k
        {
808
155k
            if(0 == ps_sao->b3_y_type_idx)
809
106k
            {
810
                /* Update left, top and top-left */
811
955k
                for(row = 0; row < sao_ht_luma; row++)
812
849k
                {
813
849k
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
814
849k
                }
815
106k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
816
817
106k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
818
819
820
106k
            }
821
822
49.0k
            else if(1 == ps_sao->b3_y_type_idx)
823
25.8k
            {
824
25.8k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
825
25.8k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
826
25.8k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
827
25.8k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
828
829
25.8k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
830
25.8k
                                                                          src_strd,
831
25.8k
                                                                          pu1_src_left_luma,
832
25.8k
                                                                          pu1_src_top_luma,
833
25.8k
                                                                          pu1_sao_src_luma_top_left_ctb,
834
25.8k
                                                                          ps_sao->b5_y_band_pos,
835
25.8k
                                                                          ai1_offset_y,
836
25.8k
                                                                          sao_wd_luma,
837
25.8k
                                                                          sao_ht_luma
838
25.8k
                                                                         );
839
25.8k
            }
840
841
23.2k
            else // if(2 <= ps_sao->b3_y_type_idx)
842
23.2k
            {
843
23.2k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
844
23.2k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
845
23.2k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
846
23.2k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
847
848
208k
                for(i = 0; i < 8; i++)
849
185k
                {
850
185k
                    au1_avail_luma[i] = 255;
851
185k
                    au1_tile_slice_boundary[i] = 0;
852
185k
                    au4_idx_tl[i] = 0;
853
185k
                    au4_ilf_across_tile_slice_enable[i] = 1;
854
185k
                }
855
856
                /******************************************************************
857
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
858
                 *
859
                 *          TL_T
860
                 *       4  _2__5________
861
                 *     0   |    |       |
862
                 *    TL_L | TL | 1 TL_R|
863
                 *         |____|_______|____
864
                 *        6|TL_D|7      |    |
865
                 *         | 3  |       |    |
866
                 *         |____|_______|    |
867
                 *              |            |
868
                 *              |            |
869
                 *              |____________|
870
                 *
871
                 *****************************************************************/
872
873
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
874
23.2k
                {
875
23.2k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
876
13.9k
                    {
877
13.9k
                        {
878
                            /*Assuming that sao shift is uniform along x and y directions*/
879
13.9k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
880
0
                            {
881
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
882
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
883
0
                            }
884
13.9k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
885
13.9k
                            {
886
13.9k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
887
13.9k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
888
13.9k
                            }
889
13.9k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
890
13.9k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
891
892
13.9k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
893
13.9k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
894
895
13.9k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
896
13.9k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
897
898
13.9k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
899
13.9k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
900
13.9k
                        }
901
902
13.9k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
903
4.96k
                        {
904
                            /*Calculate slice indices for neighbor pixels*/
905
4.96k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
906
4.96k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
907
4.96k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
908
4.96k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
909
4.96k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
4.96k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
911
912
4.96k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
913
0
                            {
914
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
915
0
                                {
916
0
                                    au4_idx_tl[6] = -1;
917
0
                                    au4_idx_tl[4] = -1;
918
0
                                }
919
0
                                else
920
0
                                {
921
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
922
0
                                }
923
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
924
0
                                {
925
0
                                    au4_idx_tl[5] = -1;
926
0
                                    au4_idx_tl[4] = -1;
927
0
                                }
928
0
                                else
929
0
                                {
930
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
931
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
932
0
                                }
933
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
934
0
                            }
935
936
                            /* Verify that the neighbor ctbs dont cross pic boundary.
937
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
938
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
939
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
940
                             * the respective pixel's flags are checked
941
                             */
942
943
4.96k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
944
0
                            {
945
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
946
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
947
0
                            }
948
4.96k
                            else
949
4.96k
                            {
950
4.96k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
951
4.96k
                            }
952
4.96k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
953
0
                            {
954
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
955
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
956
0
                            }
957
4.96k
                            else
958
4.96k
                            {
959
4.96k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960
4.96k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
961
4.96k
                            }
962
4.96k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963
4.96k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
964
4.96k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
965
4.96k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
966
4.96k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
967
968
4.96k
                            if(au4_idx_tl[5] > idx_tl)
969
468
                            {
970
468
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
971
468
                            }
972
973
                            /*
974
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
975
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
976
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
977
                             * the respective pixel's flags are checked
978
                             */
979
44.6k
                            for(i = 0; i < 8; i++)
980
39.7k
                            {
981
                                /*Sets the edges that lie on the slice/tile boundary*/
982
39.7k
                                if(au4_idx_tl[i] != idx_tl)
983
10.3k
                                {
984
10.3k
                                    au1_tile_slice_boundary[i] = 1;
985
10.3k
                                }
986
29.3k
                                else
987
29.3k
                                {
988
29.3k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
989
29.3k
                                }
990
39.7k
                            }
991
992
4.96k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
993
4.96k
                        }
994
995
13.9k
                        if(ps_pps->i1_tiles_enabled_flag)
996
9.00k
                        {
997
                            /* Calculate availability flags at slice boundary */
998
9.00k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
999
4.77k
                            {
1000
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1001
4.77k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1002
4.50k
                                {
1003
                                    /*Set the boundary arrays*/
1004
                                    /*Calculate tile indices for neighbor pixels*/
1005
4.50k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1006
4.50k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1007
4.50k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1008
4.50k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1009
4.50k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
4.50k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1011
1012
4.50k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1013
0
                                    {
1014
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1015
0
                                        {
1016
0
                                            au4_idx_tl[6] = -1;
1017
0
                                            au4_idx_tl[4] = -1;
1018
0
                                        }
1019
0
                                        else
1020
0
                                        {
1021
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1022
0
                                        }
1023
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1024
0
                                        {
1025
0
                                            au4_idx_tl[5] = -1;
1026
0
                                            au4_idx_tl[4] = -1;
1027
0
                                        }
1028
0
                                        else
1029
0
                                        {
1030
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1031
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1032
0
                                        }
1033
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1034
0
                                    }
1035
40.5k
                                    for(i = 0; i < 8; i++)
1036
36.0k
                                    {
1037
                                        /*Sets the edges that lie on the tile boundary*/
1038
36.0k
                                        if(au4_idx_tl[i] != idx_tl)
1039
14.0k
                                        {
1040
14.0k
                                            au1_tile_slice_boundary[i] |= 1;
1041
14.0k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1042
14.0k
                                        }
1043
36.0k
                                    }
1044
4.50k
                                }
1045
4.77k
                            }
1046
9.00k
                        }
1047
1048
1049
                        /*Set availability flags based on tile and slice boundaries*/
1050
125k
                        for(i = 0; i < 8; i++)
1051
111k
                        {
1052
                            /*Sets the edges that lie on the slice/tile boundary*/
1053
111k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1054
14.1k
                            {
1055
14.1k
                                au1_avail_luma[i] = 0;
1056
14.1k
                            }
1057
111k
                        }
1058
13.9k
                    }
1059
23.2k
                }
1060
1061
23.2k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1062
0
                {
1063
0
                    au1_avail_luma[0] = 0;
1064
0
                    au1_avail_luma[4] = 0;
1065
0
                    au1_avail_luma[6] = 0;
1066
0
                }
1067
1068
23.2k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1069
0
                {
1070
0
                    au1_avail_luma[1] = 0;
1071
0
                    au1_avail_luma[5] = 0;
1072
0
                    au1_avail_luma[7] = 0;
1073
0
                }
1074
                //y==1 case
1075
23.2k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1076
0
                {
1077
0
                    au1_avail_luma[2] = 0;
1078
0
                    au1_avail_luma[4] = 0;
1079
0
                    au1_avail_luma[5] = 0;
1080
0
                }
1081
23.2k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1082
0
                {
1083
0
                    au1_avail_luma[3] = 0;
1084
0
                    au1_avail_luma[6] = 0;
1085
0
                    au1_avail_luma[7] = 0;
1086
0
                }
1087
1088
23.2k
                {
1089
23.2k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1090
23.2k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1091
23.2k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1092
23.2k
                                                                      src_strd,
1093
23.2k
                                                                      pu1_src_left_luma,
1094
23.2k
                                                                      pu1_src_top_luma,
1095
23.2k
                                                                      pu1_sao_src_luma_top_left_ctb,
1096
23.2k
                                                                      au1_src_top_right,
1097
23.2k
                                                                      &u1_sao_src_top_left_luma_bot_left,
1098
23.2k
                                                                      au1_avail_luma,
1099
23.2k
                                                                      ai1_offset_y,
1100
23.2k
                                                                      sao_wd_luma,
1101
23.2k
                                                                      sao_ht_luma);
1102
23.2k
                }
1103
23.2k
            }
1104
1105
155k
        }
1106
3.24M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1107
419k
        {
1108
            /* Update left, top and top-left */
1109
3.77M
            for(row = 0; row < sao_ht_luma; row++)
1110
3.35M
            {
1111
3.35M
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1112
3.35M
            }
1113
419k
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1114
1115
419k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1116
419k
        }
1117
1118
3.40M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1119
184k
        {
1120
184k
            if(0 == ps_sao->b3_cb_type_idx)
1121
151k
            {
1122
1.35M
                for(row = 0; row < sao_ht_chroma; row++)
1123
1.20M
                {
1124
1.20M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1125
1.20M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1126
1.20M
                }
1127
151k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1128
151k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1129
1130
151k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1131
1132
151k
            }
1133
1134
33.4k
            else if(1 == ps_sao->b3_cb_type_idx)
1135
16.2k
            {
1136
16.2k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1137
16.2k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1138
16.2k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1139
16.2k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1140
1141
16.2k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1142
16.2k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1143
16.2k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1144
16.2k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1145
1146
16.2k
                if(chroma_yuv420sp_vu)
1147
652
                {
1148
652
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1149
652
                                                                                src_strd,
1150
652
                                                                                pu1_src_left_chroma,
1151
652
                                                                                pu1_src_top_chroma,
1152
652
                                                                                pu1_sao_src_chroma_top_left_ctb,
1153
652
                                                                                ps_sao->b5_cr_band_pos,
1154
652
                                                                                ps_sao->b5_cb_band_pos,
1155
652
                                                                                ai1_offset_cr,
1156
652
                                                                                ai1_offset_cb,
1157
652
                                                                                sao_wd_chroma,
1158
652
                                                                                sao_ht_chroma
1159
652
                                                                               );
1160
652
                }
1161
15.5k
                else
1162
15.5k
                {
1163
15.5k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1164
15.5k
                                                                                src_strd,
1165
15.5k
                                                                                pu1_src_left_chroma,
1166
15.5k
                                                                                pu1_src_top_chroma,
1167
15.5k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1168
15.5k
                                                                                ps_sao->b5_cb_band_pos,
1169
15.5k
                                                                                ps_sao->b5_cr_band_pos,
1170
15.5k
                                                                                ai1_offset_cb,
1171
15.5k
                                                                                ai1_offset_cr,
1172
15.5k
                                                                                sao_wd_chroma,
1173
15.5k
                                                                                sao_ht_chroma
1174
15.5k
                                                                               );
1175
15.5k
                }
1176
16.2k
            }
1177
1178
17.2k
            else // if(2 <= ps_sao->b3_cb_type_idx)
1179
17.2k
            {
1180
17.2k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1181
17.2k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1182
17.2k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1183
17.2k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1184
1185
17.2k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1186
17.2k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1187
17.2k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1188
17.2k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1189
155k
                for(i = 0; i < 8; i++)
1190
138k
                {
1191
138k
                    au1_avail_chroma[i] = 255;
1192
138k
                    au1_tile_slice_boundary[i] = 0;
1193
138k
                    au4_idx_tl[i] = 0;
1194
138k
                    au4_ilf_across_tile_slice_enable[i] = 1;
1195
138k
                }
1196
                /*In case of slices*/
1197
17.2k
                {
1198
17.2k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1199
10.8k
                    {
1200
10.8k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1201
881
                        {
1202
881
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1203
881
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1204
881
                        }
1205
9.93k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1206
9.37k
                        {
1207
9.37k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1208
9.37k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1209
9.37k
                        }
1210
10.8k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1211
10.8k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1212
1213
10.8k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1214
10.8k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1215
1216
10.8k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1217
10.8k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1218
1219
10.8k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1220
10.8k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1221
1222
10.8k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1223
2.29k
                        {
1224
1225
2.29k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1226
2.29k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1227
2.29k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1228
2.29k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1229
2.29k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
2.29k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1231
1232
2.29k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1233
752
                            {
1234
752
                                if(ps_sao_ctxt->i4_ctb_x == 1)
1235
176
                                {
1236
176
                                    au4_idx_tl[6] = -1;
1237
176
                                    au4_idx_tl[4] = -1;
1238
176
                                }
1239
576
                                else
1240
576
                                {
1241
576
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1242
576
                                }
1243
752
                                if(ps_sao_ctxt->i4_ctb_y == 1)
1244
27
                                {
1245
27
                                    au4_idx_tl[5] = -1;
1246
27
                                    au4_idx_tl[4] = -1;
1247
27
                                }
1248
725
                                else
1249
725
                                {
1250
725
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1251
725
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1252
725
                                }
1253
752
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1254
752
                            }
1255
1256
                            /* Verify that the neighbor ctbs don't cross pic boundary
1257
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1258
2.29k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1259
176
                            {
1260
176
                                au4_ilf_across_tile_slice_enable[4] = 0;
1261
176
                                au4_ilf_across_tile_slice_enable[6] = 0;
1262
176
                            }
1263
2.11k
                            else
1264
2.11k
                            {
1265
2.11k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1266
2.11k
                            }
1267
2.29k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1268
27
                            {
1269
27
                                au4_ilf_across_tile_slice_enable[5] = 0;
1270
27
                                au4_ilf_across_tile_slice_enable[4] = 0;
1271
27
                            }
1272
2.26k
                            else
1273
2.26k
                            {
1274
2.26k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1275
2.26k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1276
2.26k
                            }
1277
2.29k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278
2.29k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1279
2.29k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1280
2.29k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1281
2.29k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1282
                            /*
1283
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1284
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1285
                             */
1286
20.6k
                            for(i = 0; i < 8; i++)
1287
18.3k
                            {
1288
                                /*Sets the edges that lie on the slice/tile boundary*/
1289
18.3k
                                if(au4_idx_tl[i] != idx_tl)
1290
3.71k
                                {
1291
3.71k
                                    au1_tile_slice_boundary[i] = 1;
1292
3.71k
                                }
1293
14.6k
                                else
1294
14.6k
                                {
1295
14.6k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
1296
14.6k
                                }
1297
18.3k
                            }
1298
1299
                            /*Reset indices*/
1300
20.6k
                            for(i = 0; i < 8; i++)
1301
18.3k
                            {
1302
18.3k
                                au4_idx_tl[i] = 0;
1303
18.3k
                            }
1304
2.29k
                        }
1305
10.8k
                        if(ps_pps->i1_tiles_enabled_flag)
1306
8.52k
                        {
1307
                            /* Calculate availability flags at slice boundary */
1308
8.52k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1309
3.31k
                            {
1310
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1311
3.31k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1312
3.17k
                                {
1313
                                    /*Set the boundary arrays*/
1314
                                    /*Calculate tile indices for neighbor pixels*/
1315
3.17k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1316
3.17k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1317
3.17k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1318
3.17k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1319
3.17k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
3.17k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1321
1322
3.17k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1323
0
                                    {
1324
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1325
0
                                        {
1326
0
                                            au4_idx_tl[6] = -1;
1327
0
                                            au4_idx_tl[4] = -1;
1328
0
                                        }
1329
0
                                        else
1330
0
                                        {
1331
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1332
0
                                        }
1333
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1334
0
                                        {
1335
0
                                            au4_idx_tl[5] = -1;
1336
0
                                            au4_idx_tl[4] = -1;
1337
0
                                        }
1338
0
                                        else
1339
0
                                        {
1340
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1341
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1342
0
                                        }
1343
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1344
0
                                    }
1345
28.5k
                                    for(i = 0; i < 8; i++)
1346
25.4k
                                    {
1347
                                        /*Sets the edges that lie on the tile boundary*/
1348
25.4k
                                        if(au4_idx_tl[i] != idx_tl)
1349
9.67k
                                        {
1350
9.67k
                                            au1_tile_slice_boundary[i] |= 1;
1351
9.67k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1352
9.67k
                                        }
1353
25.4k
                                    }
1354
3.17k
                                }
1355
3.31k
                            }
1356
8.52k
                        }
1357
1358
97.3k
                        for(i = 0; i < 8; i++)
1359
86.5k
                        {
1360
                            /*Sets the edges that lie on the slice/tile boundary*/
1361
86.5k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1362
10.1k
                            {
1363
10.1k
                                au1_avail_chroma[i] = 0;
1364
10.1k
                            }
1365
86.5k
                        }
1366
10.8k
                    }
1367
17.2k
                }
1368
1369
17.2k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1370
579
                {
1371
579
                    au1_avail_chroma[0] = 0;
1372
579
                    au1_avail_chroma[4] = 0;
1373
579
                    au1_avail_chroma[6] = 0;
1374
579
                }
1375
17.2k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1376
0
                {
1377
0
                    au1_avail_chroma[1] = 0;
1378
0
                    au1_avail_chroma[5] = 0;
1379
0
                    au1_avail_chroma[7] = 0;
1380
0
                }
1381
1382
17.2k
                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1383
293
                {
1384
293
                    au1_avail_chroma[2] = 0;
1385
293
                    au1_avail_chroma[4] = 0;
1386
293
                    au1_avail_chroma[5] = 0;
1387
293
                }
1388
17.2k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1389
0
                {
1390
0
                    au1_avail_chroma[3] = 0;
1391
0
                    au1_avail_chroma[6] = 0;
1392
0
                    au1_avail_chroma[7] = 0;
1393
0
                }
1394
1395
17.2k
                {
1396
17.2k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1397
17.2k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1398
17.2k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1399
17.2k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1400
17.2k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1401
1.66k
                    {
1402
1.66k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1403
1.66k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1404
1.66k
                    }
1405
1406
17.2k
                    if(chroma_yuv420sp_vu)
1407
1.05k
                    {
1408
1.05k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1409
1.05k
                                                                             src_strd,
1410
1.05k
                                                                             pu1_src_left_chroma,
1411
1.05k
                                                                             pu1_src_top_chroma,
1412
1.05k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1413
1.05k
                                                                             au1_src_top_right,
1414
1.05k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1415
1.05k
                                                                             au1_avail_chroma,
1416
1.05k
                                                                             ai1_offset_cr,
1417
1.05k
                                                                             ai1_offset_cb,
1418
1.05k
                                                                             sao_wd_chroma,
1419
1.05k
                                                                             sao_ht_chroma);
1420
1.05k
                    }
1421
16.2k
                    else
1422
16.2k
                    {
1423
16.2k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1424
16.2k
                                                                             src_strd,
1425
16.2k
                                                                             pu1_src_left_chroma,
1426
16.2k
                                                                             pu1_src_top_chroma,
1427
16.2k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1428
16.2k
                                                                             au1_src_top_right,
1429
16.2k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1430
16.2k
                                                                             au1_avail_chroma,
1431
16.2k
                                                                             ai1_offset_cb,
1432
16.2k
                                                                             ai1_offset_cr,
1433
16.2k
                                                                             sao_wd_chroma,
1434
16.2k
                                                                             sao_ht_chroma);
1435
16.2k
                    }
1436
17.2k
                }
1437
17.2k
            }
1438
184k
        }
1439
3.21M
        else if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && ((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag)))
1440
383k
        {
1441
3.45M
            for(row = 0; row < sao_ht_chroma; row++)
1442
3.07M
            {
1443
3.07M
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1444
3.07M
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1445
3.07M
            }
1446
383k
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1447
383k
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1448
1449
383k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1450
383k
        }
1451
1452
3.40M
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1453
3.40M
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1454
3.40M
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1455
3.40M
    }
1456
1457
1458
    /* Top CTB */
1459
3.91M
    if((ps_sao_ctxt->i4_ctb_y > 0))
1460
3.52M
    {
1461
3.52M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1462
3.52M
        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1463
3.52M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1464
3.52M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1465
1466
3.52M
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1467
3.52M
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1468
3.52M
        WORD32 au4_idx_t[8], idx_t;
1469
1470
3.52M
        WORD32 remaining_cols;
1471
1472
3.52M
        slice_header_t *ps_slice_hdr_top;
1473
3.52M
        {
1474
3.52M
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1475
3.52M
                                        (ps_sao_ctxt->i4_ctb_x);
1476
3.52M
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1477
3.52M
        }
1478
1479
3.52M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1480
3.52M
        if(remaining_cols <= SAO_SHIFT_CTB)
1481
121k
        {
1482
121k
            sao_wd_luma += remaining_cols;
1483
121k
        }
1484
3.52M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1485
3.52M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1486
121k
        {
1487
121k
            sao_wd_chroma += remaining_cols;
1488
121k
        }
1489
1490
3.52M
        pu1_src_luma -= (sao_ht_luma * src_strd);
1491
3.52M
        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1492
3.52M
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1493
3.52M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494
3.52M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1495
3.52M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1496
3.52M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1497
1498
3.52M
        if(0 != sao_wd_luma)
1499
3.52M
        {
1500
3.52M
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1501
170k
            {
1502
170k
                if(0 == ps_sao->b3_y_type_idx)
1503
115k
                {
1504
                    /* Update left, top and top-left */
1505
1.04M
                    for(row = 0; row < sao_ht_luma; row++)
1506
927k
                    {
1507
927k
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1508
927k
                    }
1509
115k
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1510
1511
115k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1512
1513
115k
                }
1514
1515
54.1k
                else if(1 == ps_sao->b3_y_type_idx)
1516
28.1k
                {
1517
28.1k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1518
28.1k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1519
28.1k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1520
28.1k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1521
1522
28.1k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1523
28.1k
                                                                              src_strd,
1524
28.1k
                                                                              pu1_src_left_luma,
1525
28.1k
                                                                              pu1_src_top_luma,
1526
28.1k
                                                                              pu1_sao_src_luma_top_left_ctb,
1527
28.1k
                                                                              ps_sao->b5_y_band_pos,
1528
28.1k
                                                                              ai1_offset_y,
1529
28.1k
                                                                              sao_wd_luma,
1530
28.1k
                                                                              sao_ht_luma
1531
28.1k
                                                                             );
1532
28.1k
                }
1533
1534
25.9k
                else // if(2 <= ps_sao->b3_y_type_idx)
1535
25.9k
                {
1536
25.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1537
25.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1538
25.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1539
25.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1540
1541
25.9k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1542
25.9k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1543
25.9k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1544
1545
233k
                    for(i = 0; i < 8; i++)
1546
207k
                    {
1547
1548
207k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1549
207k
                    }
1550
                    /******************************************************************
1551
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1552
                     *
1553
                     *               T_T
1554
                     *          ____________
1555
                     *         |    |       |
1556
                     *         | T_L|  T    |T_R
1557
                     *         |    | ______|____
1558
                     *         |    |  T_D  |    |
1559
                     *         |    |       |    |
1560
                     *         |____|_______|    |
1561
                     *              |            |
1562
                     *              |            |
1563
                     *              |____________|
1564
                     *
1565
                     *****************************************************************/
1566
1567
                    /*In case of slices*/
1568
25.9k
                    {
1569
25.9k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1570
15.2k
                        {
1571
1572
15.2k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1573
15.2k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1574
1575
15.2k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1576
15.2k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1577
1578
15.2k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1579
15.2k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1580
1581
15.2k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1582
15.2k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1583
1584
15.2k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1585
15.2k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1586
1587
15.2k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1588
5.30k
                            {
1589
                                /*Calculate neighbor ctb slice indices*/
1590
5.30k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1591
753
                                {
1592
753
                                    au4_idx_t[0] = -1;
1593
753
                                    au4_idx_t[6] = -1;
1594
753
                                    au4_idx_t[4] = -1;
1595
753
                                }
1596
4.55k
                                else
1597
4.55k
                                {
1598
4.55k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1599
4.55k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1600
4.55k
                                }
1601
5.30k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1602
5.30k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1603
5.30k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1604
5.30k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1605
1606
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1607
5.30k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1608
753
                                {
1609
753
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1610
753
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1611
753
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1612
753
                                }
1613
4.55k
                                else
1614
4.55k
                                {
1615
4.55k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1616
4.55k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1617
4.55k
                                }
1618
1619
1620
1621
5.30k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622
5.30k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1623
5.30k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1624
5.30k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
5.30k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1626
1627
5.30k
                                if(au4_idx_t[6] < idx_t)
1628
763
                                {
1629
763
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1630
763
                                }
1631
1632
                                /*
1633
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1634
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1635
                                 */
1636
1637
47.7k
                                for(i = 0; i < 8; i++)
1638
42.4k
                                {
1639
                                    /*Sets the edges that lie on the slice/tile boundary*/
1640
42.4k
                                    if(au4_idx_t[i] != idx_t)
1641
12.3k
                                    {
1642
12.3k
                                        au1_tile_slice_boundary[i] = 1;
1643
                                        /*Check for slice flag at such boundaries*/
1644
12.3k
                                    }
1645
30.0k
                                    else
1646
30.0k
                                    {
1647
30.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1648
30.0k
                                    }
1649
42.4k
                                }
1650
                                /*Reset indices*/
1651
47.7k
                                for(i = 0; i < 8; i++)
1652
42.4k
                                {
1653
42.4k
                                    au4_idx_t[i] = 0;
1654
42.4k
                                }
1655
5.30k
                            }
1656
1657
15.2k
                            if(ps_pps->i1_tiles_enabled_flag)
1658
9.96k
                            {
1659
                                /* Calculate availability flags at slice boundary */
1660
9.96k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1661
5.56k
                                {
1662
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1663
5.56k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1664
5.26k
                                    {
1665
                                        /*Calculate neighbor ctb slice indices*/
1666
5.26k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1667
904
                                        {
1668
904
                                            au4_idx_t[0] = -1;
1669
904
                                            au4_idx_t[6] = -1;
1670
904
                                            au4_idx_t[4] = -1;
1671
904
                                        }
1672
4.36k
                                        else
1673
4.36k
                                        {
1674
4.36k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1675
4.36k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1676
4.36k
                                        }
1677
5.26k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1678
5.26k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1679
5.26k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1680
5.26k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1681
1682
47.4k
                                        for(i = 0; i < 8; i++)
1683
42.1k
                                        {
1684
                                            /*Sets the edges that lie on the tile boundary*/
1685
42.1k
                                            if(au4_idx_t[i] != idx_t)
1686
17.5k
                                            {
1687
17.5k
                                                au1_tile_slice_boundary[i] |= 1;
1688
17.5k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1689
17.5k
                                            }
1690
42.1k
                                        }
1691
5.26k
                                    }
1692
5.56k
                                }
1693
9.96k
                            }
1694
1695
137k
                            for(i = 0; i < 8; i++)
1696
122k
                            {
1697
                                /*Sets the edges that lie on the slice/tile boundary*/
1698
122k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1699
19.3k
                                {
1700
19.3k
                                    au1_avail_luma[i] = 0;
1701
19.3k
                                }
1702
122k
                            }
1703
15.2k
                        }
1704
25.9k
                    }
1705
1706
1707
25.9k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1708
3.21k
                    {
1709
3.21k
                        au1_avail_luma[0] = 0;
1710
3.21k
                        au1_avail_luma[4] = 0;
1711
3.21k
                        au1_avail_luma[6] = 0;
1712
3.21k
                    }
1713
1714
25.9k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1715
2.76k
                    {
1716
2.76k
                        au1_avail_luma[1] = 0;
1717
2.76k
                        au1_avail_luma[5] = 0;
1718
2.76k
                        au1_avail_luma[7] = 0;
1719
2.76k
                    }
1720
1721
25.9k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1722
0
                    {
1723
0
                        au1_avail_luma[2] = 0;
1724
0
                        au1_avail_luma[4] = 0;
1725
0
                        au1_avail_luma[5] = 0;
1726
0
                    }
1727
1728
25.9k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1729
0
                    {
1730
0
                        au1_avail_luma[3] = 0;
1731
0
                        au1_avail_luma[6] = 0;
1732
0
                        au1_avail_luma[7] = 0;
1733
0
                    }
1734
1735
25.9k
                    {
1736
25.9k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1737
25.9k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1738
25.9k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1739
25.9k
                                                                          src_strd,
1740
25.9k
                                                                          pu1_src_left_luma,
1741
25.9k
                                                                          pu1_src_top_luma,
1742
25.9k
                                                                          pu1_sao_src_luma_top_left_ctb,
1743
25.9k
                                                                          au1_src_top_right,
1744
25.9k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1745
25.9k
                                                                          au1_avail_luma,
1746
25.9k
                                                                          ai1_offset_y,
1747
25.9k
                                                                          sao_wd_luma,
1748
25.9k
                                                                          sao_ht_luma);
1749
25.9k
                    }
1750
25.9k
                }
1751
170k
            }
1752
3.35M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1753
434k
            {
1754
                /* Update left, top and top-left */
1755
3.91M
                for(row = 0; row < sao_ht_luma; row++)
1756
3.47M
                {
1757
3.47M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1758
3.47M
                }
1759
434k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1760
1761
434k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1762
434k
            }
1763
3.52M
        }
1764
1765
3.52M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_wd_chroma)
1766
3.49M
        {
1767
3.49M
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1768
184k
            {
1769
184k
                if(0 == ps_sao->b3_cb_type_idx)
1770
150k
                {
1771
1772
1.35M
                    for(row = 0; row < sao_ht_chroma; row++)
1773
1.20M
                    {
1774
1.20M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1775
1.20M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1776
1.20M
                    }
1777
150k
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1778
150k
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1779
1780
150k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1781
1782
150k
                }
1783
1784
33.4k
                else if(1 == ps_sao->b3_cb_type_idx)
1785
15.8k
                {
1786
15.8k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1787
15.8k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1788
15.8k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1789
15.8k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1790
1791
15.8k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1792
15.8k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1793
15.8k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1794
15.8k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1795
1796
15.8k
                    if(chroma_yuv420sp_vu)
1797
729
                    {
1798
729
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1799
729
                                                                                    src_strd,
1800
729
                                                                                    pu1_src_left_chroma,
1801
729
                                                                                    pu1_src_top_chroma,
1802
729
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1803
729
                                                                                    ps_sao->b5_cr_band_pos,
1804
729
                                                                                    ps_sao->b5_cb_band_pos,
1805
729
                                                                                    ai1_offset_cr,
1806
729
                                                                                    ai1_offset_cb,
1807
729
                                                                                    sao_wd_chroma,
1808
729
                                                                                    sao_ht_chroma
1809
729
                                                                                   );
1810
729
                    }
1811
15.1k
                    else
1812
15.1k
                    {
1813
15.1k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1814
15.1k
                                                                                    src_strd,
1815
15.1k
                                                                                    pu1_src_left_chroma,
1816
15.1k
                                                                                    pu1_src_top_chroma,
1817
15.1k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1818
15.1k
                                                                                    ps_sao->b5_cb_band_pos,
1819
15.1k
                                                                                    ps_sao->b5_cr_band_pos,
1820
15.1k
                                                                                    ai1_offset_cb,
1821
15.1k
                                                                                    ai1_offset_cr,
1822
15.1k
                                                                                    sao_wd_chroma,
1823
15.1k
                                                                                    sao_ht_chroma
1824
15.1k
                                                                                   );
1825
15.1k
                    }
1826
15.8k
                }
1827
17.5k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1828
17.5k
                {
1829
17.5k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1830
17.5k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1831
17.5k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1832
17.5k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1833
1834
17.5k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1835
17.5k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1836
17.5k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1837
17.5k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1838
1839
158k
                    for(i = 0; i < 8; i++)
1840
140k
                    {
1841
140k
                        au1_avail_chroma[i] = 255;
1842
140k
                        au1_tile_slice_boundary[i] = 0;
1843
140k
                        au4_idx_t[i] = 0;
1844
140k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1845
140k
                    }
1846
1847
17.5k
                    {
1848
17.5k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1849
10.5k
                        {
1850
10.5k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1851
10.5k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1852
1853
10.5k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1854
10.5k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1855
1856
10.5k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1857
10.5k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1858
1859
10.5k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1860
10.5k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1861
1862
10.5k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1863
10.5k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1864
1865
10.5k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1866
1.78k
                            {
1867
1.78k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1868
190
                                {
1869
190
                                    au4_idx_t[0] = -1;
1870
190
                                    au4_idx_t[6] = -1;
1871
190
                                    au4_idx_t[4] = -1;
1872
190
                                }
1873
1.59k
                                else
1874
1.59k
                                {
1875
1.59k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1876
1.59k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1877
1.59k
                                }
1878
1.78k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1879
1.78k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1880
1.78k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1881
1.78k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1882
1883
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1884
1885
1.78k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1886
190
                                {
1887
190
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1888
190
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1889
190
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1890
190
                                }
1891
1.59k
                                else
1892
1.59k
                                {
1893
1.59k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1894
1.59k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1895
1.59k
                                }
1896
1897
1.78k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1898
1.78k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1899
1.78k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1900
1.78k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
1.78k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1902
1903
1.78k
                                if(idx_t > au4_idx_t[6])
1904
200
                                {
1905
200
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1906
200
                                }
1907
1908
                                /*
1909
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1910
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1911
                                 */
1912
16.0k
                                for(i = 0; i < 8; i++)
1913
14.2k
                                {
1914
                                    /*Sets the edges that lie on the slice/tile boundary*/
1915
14.2k
                                    if(au4_idx_t[i] != idx_t)
1916
3.84k
                                    {
1917
3.84k
                                        au1_tile_slice_boundary[i] = 1;
1918
3.84k
                                    }
1919
10.4k
                                    else
1920
10.4k
                                    {
1921
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1922
10.4k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1923
10.4k
                                    }
1924
14.2k
                                }
1925
                                /*Reset indices*/
1926
16.0k
                                for(i = 0; i < 8; i++)
1927
14.2k
                                {
1928
14.2k
                                    au4_idx_t[i] = 0;
1929
14.2k
                                }
1930
1.78k
                            }
1931
10.5k
                            if(ps_pps->i1_tiles_enabled_flag)
1932
8.73k
                            {
1933
                                /* Calculate availability flags at slice boundary */
1934
8.73k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1935
3.30k
                                {
1936
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1937
3.30k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1938
3.14k
                                    {
1939
                                        /*Calculate neighbor ctb slice indices*/
1940
3.14k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1941
755
                                        {
1942
755
                                            au4_idx_t[0] = -1;
1943
755
                                            au4_idx_t[6] = -1;
1944
755
                                            au4_idx_t[4] = -1;
1945
755
                                        }
1946
2.39k
                                        else
1947
2.39k
                                        {
1948
2.39k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1949
2.39k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1950
2.39k
                                        }
1951
3.14k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1952
3.14k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1953
3.14k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1954
3.14k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1955
1956
28.3k
                                        for(i = 0; i < 8; i++)
1957
25.1k
                                        {
1958
                                            /*Sets the edges that lie on the tile boundary*/
1959
25.1k
                                            if(au4_idx_t[i] != idx_t)
1960
10.3k
                                            {
1961
10.3k
                                                au1_tile_slice_boundary[i] |= 1;
1962
10.3k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1963
10.3k
                                            }
1964
25.1k
                                        }
1965
3.14k
                                    }
1966
3.30k
                                }
1967
8.73k
                            }
1968
94.5k
                            for(i = 0; i < 8; i++)
1969
84.0k
                            {
1970
                                /*Sets the edges that lie on the slice/tile boundary*/
1971
84.0k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1972
10.7k
                                {
1973
10.7k
                                    au1_avail_chroma[i] = 0;
1974
10.7k
                                }
1975
84.0k
                            }
1976
1977
10.5k
                        }
1978
17.5k
                    }
1979
17.5k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1980
2.39k
                    {
1981
2.39k
                        au1_avail_chroma[0] = 0;
1982
2.39k
                        au1_avail_chroma[4] = 0;
1983
2.39k
                        au1_avail_chroma[6] = 0;
1984
2.39k
                    }
1985
1986
17.5k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1987
1.98k
                    {
1988
1.98k
                        au1_avail_chroma[1] = 0;
1989
1.98k
                        au1_avail_chroma[5] = 0;
1990
1.98k
                        au1_avail_chroma[7] = 0;
1991
1.98k
                    }
1992
1993
17.5k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1994
13
                    {
1995
13
                        au1_avail_chroma[2] = 0;
1996
13
                        au1_avail_chroma[4] = 0;
1997
13
                        au1_avail_chroma[5] = 0;
1998
13
                    }
1999
2000
17.5k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2001
0
                    {
2002
0
                        au1_avail_chroma[3] = 0;
2003
0
                        au1_avail_chroma[6] = 0;
2004
0
                        au1_avail_chroma[7] = 0;
2005
0
                    }
2006
2007
17.5k
                    {
2008
17.5k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2009
17.5k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2010
17.5k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2011
17.5k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2012
2013
17.5k
                        if(chroma_yuv420sp_vu)
2014
1.14k
                        {
2015
1.14k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2016
1.14k
                                                                                 src_strd,
2017
1.14k
                                                                                 pu1_src_left_chroma,
2018
1.14k
                                                                                 pu1_src_top_chroma,
2019
1.14k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2020
1.14k
                                                                                 au1_src_top_right,
2021
1.14k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2022
1.14k
                                                                                 au1_avail_chroma,
2023
1.14k
                                                                                 ai1_offset_cr,
2024
1.14k
                                                                                 ai1_offset_cb,
2025
1.14k
                                                                                 sao_wd_chroma,
2026
1.14k
                                                                                 sao_ht_chroma);
2027
1.14k
                        }
2028
16.3k
                        else
2029
16.3k
                        {
2030
16.3k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2031
16.3k
                                                                                 src_strd,
2032
16.3k
                                                                                 pu1_src_left_chroma,
2033
16.3k
                                                                                 pu1_src_top_chroma,
2034
16.3k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2035
16.3k
                                                                                 au1_src_top_right,
2036
16.3k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2037
16.3k
                                                                                 au1_avail_chroma,
2038
16.3k
                                                                                 ai1_offset_cb,
2039
16.3k
                                                                                 ai1_offset_cr,
2040
16.3k
                                                                                 sao_wd_chroma,
2041
16.3k
                                                                                 sao_ht_chroma);
2042
16.3k
                        }
2043
17.5k
                    }
2044
2045
17.5k
                }
2046
184k
            }
2047
3.31M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2048
397k
            {
2049
3.57M
                for(row = 0; row < sao_ht_chroma; row++)
2050
3.17M
                {
2051
3.17M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2052
3.17M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2053
3.17M
                }
2054
397k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2055
397k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2056
2057
397k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2058
397k
            }
2059
3.49M
        }
2060
2061
3.52M
        pu1_src_luma += sao_ht_luma * src_strd;
2062
3.52M
        pu1_src_chroma += sao_ht_chroma * src_strd;
2063
3.52M
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2064
3.52M
    }
2065
2066
    /* Left CTB */
2067
3.91M
    if(ps_sao_ctxt->i4_ctb_x > 0)
2068
3.78M
    {
2069
3.78M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2070
3.78M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2071
3.78M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2072
3.78M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2073
2074
3.78M
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2075
3.78M
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2076
3.78M
        WORD32 au4_idx_l[8], idx_l;
2077
2078
3.78M
        WORD32 remaining_rows;
2079
3.78M
        slice_header_t *ps_slice_hdr_left;
2080
3.78M
        {
2081
3.78M
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2082
3.78M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
2083
3.78M
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2084
3.78M
        }
2085
2086
3.78M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2087
3.78M
        if(remaining_rows <= SAO_SHIFT_CTB)
2088
383k
        {
2089
383k
            sao_ht_luma += remaining_rows;
2090
383k
        }
2091
3.78M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2092
3.78M
        if(remaining_rows <= SAO_SHIFT_CTB)
2093
383k
        {
2094
383k
            sao_ht_chroma += remaining_rows;
2095
383k
        }
2096
2097
3.78M
        pu1_src_luma -= sao_wd_luma;
2098
3.78M
        pu1_src_chroma -= sao_wd_chroma;
2099
3.78M
        ps_sao -= 1;
2100
3.78M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2101
3.78M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2102
3.78M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
3.78M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2104
2105
2106
3.78M
        if(0 != sao_ht_luma)
2107
3.78M
        {
2108
3.78M
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2109
177k
            {
2110
177k
                if(0 == ps_sao->b3_y_type_idx)
2111
122k
                {
2112
                    /* Update left, top and top-left */
2113
5.44M
                    for(row = 0; row < sao_ht_luma; row++)
2114
5.32M
                    {
2115
5.32M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2116
5.32M
                    }
2117
                    /*Update in next location*/
2118
122k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2119
2120
122k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2121
2122
122k
                }
2123
2124
55.4k
                else if(1 == ps_sao->b3_y_type_idx)
2125
29.3k
                {
2126
29.3k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2127
29.3k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2128
29.3k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2129
29.3k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2130
2131
29.3k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2132
29.3k
                                                                              src_strd,
2133
29.3k
                                                                              pu1_src_left_luma,
2134
29.3k
                                                                              pu1_src_top_luma,
2135
29.3k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2136
29.3k
                                                                              ps_sao->b5_y_band_pos,
2137
29.3k
                                                                              ai1_offset_y,
2138
29.3k
                                                                              sao_wd_luma,
2139
29.3k
                                                                              sao_ht_luma
2140
29.3k
                                                                             );
2141
29.3k
                }
2142
2143
26.0k
                else // if(2 <= ps_sao->b3_y_type_idx)
2144
26.0k
                {
2145
26.0k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2146
26.0k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2147
26.0k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2148
26.0k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2149
2150
234k
                    for(i = 0; i < 8; i++)
2151
208k
                    {
2152
208k
                        au1_avail_luma[i] = 255;
2153
208k
                        au1_tile_slice_boundary[i] = 0;
2154
208k
                        au4_idx_l[i] = 0;
2155
208k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2156
208k
                    }
2157
                    /******************************************************************
2158
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2159
                     *
2160
                     *
2161
                     *          ____________
2162
                     *         |    |       |
2163
                     *         | L_T|       |
2164
                     *         |____|_______|____
2165
                     *         |    |       |    |
2166
                     *     L_L |  L |  L_R  |    |
2167
                     *         |____|_______|    |
2168
                     *              |            |
2169
                     *          L_D |            |
2170
                     *              |____________|
2171
                     *
2172
                     *****************************************************************/
2173
2174
                    /*In case of slices or tiles*/
2175
26.0k
                    {
2176
26.0k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2177
14.2k
                        {
2178
14.2k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2179
14.2k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2180
2181
14.2k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2182
14.2k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2183
2184
14.2k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2185
14.2k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2186
2187
14.2k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2188
14.2k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2189
2190
14.2k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2191
14.2k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2192
2193
14.2k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2194
4.46k
                            {
2195
4.46k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2196
355
                                {
2197
355
                                    au4_idx_l[2] = -1;
2198
355
                                    au4_idx_l[4] = -1;
2199
355
                                    au4_idx_l[5] = -1;
2200
355
                                }
2201
4.10k
                                else
2202
4.10k
                                {
2203
4.10k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2204
4.10k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2205
4.10k
                                }
2206
4.46k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2207
4.46k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2208
4.46k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2209
4.46k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2210
2211
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2212
4.46k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2213
355
                                {
2214
355
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2215
355
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2216
355
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2217
355
                                }
2218
4.10k
                                else
2219
4.10k
                                {
2220
4.10k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2221
4.10k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2222
2223
4.10k
                                }
2224
                                //TODO: ILF flag checks for [0] and [6] is missing.
2225
4.46k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2226
4.46k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
4.46k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2228
2229
4.46k
                                if(idx_l < au4_idx_l[5])
2230
0
                                {
2231
0
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2232
0
                                }
2233
2234
                                /*
2235
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2236
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2237
                                 */
2238
40.1k
                                for(i = 0; i < 8; i++)
2239
35.6k
                                {
2240
                                    /*Sets the edges that lie on the slice/tile boundary*/
2241
35.6k
                                    if(au4_idx_l[i] != idx_l)
2242
10.3k
                                    {
2243
10.3k
                                        au1_tile_slice_boundary[i] = 1;
2244
10.3k
                                    }
2245
25.3k
                                    else
2246
25.3k
                                    {
2247
25.3k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2248
25.3k
                                    }
2249
35.6k
                                }
2250
                                /*Reset indices*/
2251
40.1k
                                for(i = 0; i < 8; i++)
2252
35.6k
                                {
2253
35.6k
                                    au4_idx_l[i] = 0;
2254
35.6k
                                }
2255
4.46k
                            }
2256
2257
14.2k
                            if(ps_pps->i1_tiles_enabled_flag)
2258
9.78k
                            {
2259
                                /* Calculate availability flags at slice boundary */
2260
9.78k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2261
3.88k
                                {
2262
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2263
3.88k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2264
3.65k
                                    {
2265
3.65k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2266
325
                                        {
2267
325
                                            au4_idx_l[2] = -1;
2268
325
                                            au4_idx_l[4] = -1;
2269
325
                                            au4_idx_l[5] = -1;
2270
325
                                        }
2271
3.32k
                                        else
2272
3.32k
                                        {
2273
3.32k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2274
3.32k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2275
3.32k
                                        }
2276
2277
3.65k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2278
3.65k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2279
3.65k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2280
3.65k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2281
2282
32.8k
                                        for(i = 0; i < 8; i++)
2283
29.2k
                                        {
2284
                                            /*Sets the edges that lie on the slice/tile boundary*/
2285
29.2k
                                            if(au4_idx_l[i] != idx_l)
2286
11.5k
                                            {
2287
11.5k
                                                au1_tile_slice_boundary[i] |= 1;
2288
11.5k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2289
11.5k
                                            }
2290
29.2k
                                        }
2291
3.65k
                                    }
2292
3.88k
                                }
2293
9.78k
                            }
2294
2295
128k
                            for(i = 0; i < 8; i++)
2296
113k
                            {
2297
                                /*Sets the edges that lie on the slice/tile boundary*/
2298
113k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2299
12.7k
                                {
2300
12.7k
                                    au1_avail_luma[i] = 0;
2301
12.7k
                                }
2302
113k
                            }
2303
14.2k
                        }
2304
26.0k
                    }
2305
26.0k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2306
0
                    {
2307
0
                        au1_avail_luma[0] = 0;
2308
0
                        au1_avail_luma[4] = 0;
2309
0
                        au1_avail_luma[6] = 0;
2310
0
                    }
2311
26.0k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2312
0
                    {
2313
0
                        au1_avail_luma[1] = 0;
2314
0
                        au1_avail_luma[5] = 0;
2315
0
                        au1_avail_luma[7] = 0;
2316
0
                    }
2317
2318
26.0k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2319
6.55k
                    {
2320
6.55k
                        au1_avail_luma[2] = 0;
2321
6.55k
                        au1_avail_luma[4] = 0;
2322
6.55k
                        au1_avail_luma[5] = 0;
2323
6.55k
                    }
2324
2325
26.0k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2326
2.81k
                    {
2327
2.81k
                        au1_avail_luma[3] = 0;
2328
2.81k
                        au1_avail_luma[6] = 0;
2329
2.81k
                        au1_avail_luma[7] = 0;
2330
2.81k
                    }
2331
2332
26.0k
                    {
2333
26.0k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2334
26.0k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2335
26.0k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2336
26.0k
                                                                          src_strd,
2337
26.0k
                                                                          pu1_src_left_luma,
2338
26.0k
                                                                          pu1_src_top_luma,
2339
26.0k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2340
26.0k
                                                                          au1_src_top_right,
2341
26.0k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2342
26.0k
                                                                          au1_avail_luma,
2343
26.0k
                                                                          ai1_offset_y,
2344
26.0k
                                                                          sao_wd_luma,
2345
26.0k
                                                                          sao_ht_luma);
2346
26.0k
                    }
2347
2348
26.0k
                }
2349
177k
            }
2350
3.60M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2351
443k
            {
2352
                /* Update left, top and top-left */
2353
14.3M
                for(row = 0; row < sao_ht_luma; row++)
2354
13.9M
                {
2355
13.9M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2356
13.9M
                }
2357
                /*Update in next location*/
2358
443k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2359
2360
443k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2361
443k
            }
2362
3.78M
        }
2363
2364
3.78M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 != sao_ht_chroma)
2365
3.76M
        {
2366
3.76M
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2367
190k
            {
2368
190k
                if(0 == ps_sao->b3_cb_type_idx)
2369
155k
                {
2370
3.10M
                    for(row = 0; row < sao_ht_chroma; row++)
2371
2.95M
                    {
2372
2.95M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2373
2.95M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2374
2.95M
                    }
2375
155k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2376
155k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2377
2378
155k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2379
155k
                }
2380
2381
34.5k
                else if(1 == ps_sao->b3_cb_type_idx)
2382
16.4k
                {
2383
16.4k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2384
16.4k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2385
16.4k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2386
16.4k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2387
2388
16.4k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2389
16.4k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2390
16.4k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2391
16.4k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2392
2393
16.4k
                    if(chroma_yuv420sp_vu)
2394
679
                    {
2395
679
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2396
679
                                                                                    src_strd,
2397
679
                                                                                    pu1_src_left_chroma,
2398
679
                                                                                    pu1_src_top_chroma,
2399
679
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2400
679
                                                                                    ps_sao->b5_cr_band_pos,
2401
679
                                                                                    ps_sao->b5_cb_band_pos,
2402
679
                                                                                    ai1_offset_cr,
2403
679
                                                                                    ai1_offset_cb,
2404
679
                                                                                    sao_wd_chroma,
2405
679
                                                                                    sao_ht_chroma
2406
679
                                                                                   );
2407
679
                    }
2408
15.7k
                    else
2409
15.7k
                    {
2410
15.7k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2411
15.7k
                                                                                    src_strd,
2412
15.7k
                                                                                    pu1_src_left_chroma,
2413
15.7k
                                                                                    pu1_src_top_chroma,
2414
15.7k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2415
15.7k
                                                                                    ps_sao->b5_cb_band_pos,
2416
15.7k
                                                                                    ps_sao->b5_cr_band_pos,
2417
15.7k
                                                                                    ai1_offset_cb,
2418
15.7k
                                                                                    ai1_offset_cr,
2419
15.7k
                                                                                    sao_wd_chroma,
2420
15.7k
                                                                                    sao_ht_chroma
2421
15.7k
                                                                                   );
2422
15.7k
                    }
2423
16.4k
                }
2424
2425
18.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2426
18.1k
                {
2427
18.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2428
18.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2429
18.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2430
18.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2431
2432
18.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2433
18.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2434
18.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2435
18.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2436
2437
163k
                    for(i = 0; i < 8; i++)
2438
145k
                    {
2439
145k
                        au1_avail_chroma[i] = 255;
2440
145k
                        au1_tile_slice_boundary[i] = 0;
2441
145k
                        au4_idx_l[i] = 0;
2442
145k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2443
145k
                    }
2444
                    /*In case of slices*/
2445
18.1k
                    {
2446
18.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2447
10.1k
                        {
2448
10.1k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2449
10.1k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2450
2451
10.1k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2452
10.1k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2453
2454
10.1k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2455
10.1k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2456
2457
10.1k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2458
10.1k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2459
2460
10.1k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2461
10.1k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2462
2463
10.1k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2464
1.42k
                            {
2465
1.42k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2466
33
                                {
2467
33
                                    au4_idx_l[2] = -1;
2468
33
                                    au4_idx_l[4] = -1;
2469
33
                                    au4_idx_l[5] = -1;
2470
33
                                }
2471
1.39k
                                else
2472
1.39k
                                {
2473
1.39k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2474
1.39k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2475
1.39k
                                }
2476
1.42k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2477
1.42k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2478
1.42k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2479
1.42k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2480
2481
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2482
1.42k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2483
33
                                {
2484
33
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2485
33
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2486
33
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2487
33
                                }
2488
1.39k
                                else
2489
1.39k
                                {
2490
1.39k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2491
1.39k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2492
1.39k
                                }
2493
2494
1.42k
                                if(au4_idx_l[5] > idx_l)
2495
0
                                {
2496
0
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2497
0
                                }
2498
2499
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2500
1.42k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2501
1.42k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2502
1.42k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2503
                                /*
2504
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2505
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2506
                                 */
2507
12.8k
                                for(i = 0; i < 8; i++)
2508
11.3k
                                {
2509
                                    /*Sets the edges that lie on the slice/tile boundary*/
2510
11.3k
                                    if(au4_idx_l[i] != idx_l)
2511
3.45k
                                    {
2512
3.45k
                                        au1_tile_slice_boundary[i] = 1;
2513
3.45k
                                    }
2514
7.93k
                                    else
2515
7.93k
                                    {
2516
7.93k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2517
7.93k
                                    }
2518
11.3k
                                }
2519
                                /*Reset indices*/
2520
12.8k
                                for(i = 0; i < 8; i++)
2521
11.3k
                                {
2522
11.3k
                                    au4_idx_l[i] = 0;
2523
11.3k
                                }
2524
1.42k
                            }
2525
10.1k
                            if(ps_pps->i1_tiles_enabled_flag)
2526
8.77k
                            {
2527
                                /* Calculate availability flags at slice boundary */
2528
8.77k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2529
3.68k
                                {
2530
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2531
3.68k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2532
3.54k
                                    {
2533
3.54k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2534
1.25k
                                        {
2535
1.25k
                                            au4_idx_l[2] = -1;
2536
1.25k
                                            au4_idx_l[4] = -1;
2537
1.25k
                                            au4_idx_l[5] = -1;
2538
1.25k
                                        }
2539
2.28k
                                        else
2540
2.28k
                                        {
2541
2.28k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2542
2.28k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2543
2.28k
                                        }
2544
2545
3.54k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2546
3.54k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2547
3.54k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2548
3.54k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2549
2550
31.8k
                                        for(i = 0; i < 8; i++)
2551
28.3k
                                        {
2552
                                            /*Sets the edges that lie on the slice/tile boundary*/
2553
28.3k
                                            if(au4_idx_l[i] != idx_l)
2554
11.0k
                                            {
2555
11.0k
                                                au1_tile_slice_boundary[i] |= 1;
2556
11.0k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2557
11.0k
                                            }
2558
28.3k
                                        }
2559
3.54k
                                    }
2560
3.68k
                                }
2561
8.77k
                            }
2562
91.7k
                            for(i = 0; i < 8; i++)
2563
81.5k
                            {
2564
                                /*Sets the edges that lie on the slice/tile boundary*/
2565
81.5k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2566
11.1k
                                {
2567
11.1k
                                    au1_avail_chroma[i] = 0;
2568
11.1k
                                }
2569
81.5k
                            }
2570
10.1k
                        }
2571
18.1k
                    }
2572
18.1k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2573
15
                    {
2574
15
                        au1_avail_chroma[0] = 0;
2575
15
                        au1_avail_chroma[4] = 0;
2576
15
                        au1_avail_chroma[6] = 0;
2577
15
                    }
2578
2579
18.1k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2580
0
                    {
2581
0
                        au1_avail_chroma[1] = 0;
2582
0
                        au1_avail_chroma[5] = 0;
2583
0
                        au1_avail_chroma[7] = 0;
2584
0
                    }
2585
2586
18.1k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2587
4.19k
                    {
2588
4.19k
                        au1_avail_chroma[2] = 0;
2589
4.19k
                        au1_avail_chroma[4] = 0;
2590
4.19k
                        au1_avail_chroma[5] = 0;
2591
4.19k
                    }
2592
2593
18.1k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2594
2.54k
                    {
2595
2.54k
                        au1_avail_chroma[3] = 0;
2596
2.54k
                        au1_avail_chroma[6] = 0;
2597
2.54k
                        au1_avail_chroma[7] = 0;
2598
2.54k
                    }
2599
2600
18.1k
                    {
2601
18.1k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2602
18.1k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2603
18.1k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2604
18.1k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2605
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2606
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2607
18.1k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2608
30
                        {
2609
30
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2610
30
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2611
30
                        }
2612
2613
2614
18.1k
                        if(chroma_yuv420sp_vu)
2615
1.08k
                        {
2616
1.08k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2617
1.08k
                                                                                 src_strd,
2618
1.08k
                                                                                 pu1_src_left_chroma,
2619
1.08k
                                                                                 pu1_src_top_chroma,
2620
1.08k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2621
1.08k
                                                                                 au1_src_top_right,
2622
1.08k
                                                                                 au1_src_bot_left,
2623
1.08k
                                                                                 au1_avail_chroma,
2624
1.08k
                                                                                 ai1_offset_cr,
2625
1.08k
                                                                                 ai1_offset_cb,
2626
1.08k
                                                                                 sao_wd_chroma,
2627
1.08k
                                                                                 sao_ht_chroma);
2628
1.08k
                        }
2629
17.0k
                        else
2630
17.0k
                        {
2631
17.0k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2632
17.0k
                                                                                 src_strd,
2633
17.0k
                                                                                 pu1_src_left_chroma,
2634
17.0k
                                                                                 pu1_src_top_chroma,
2635
17.0k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2636
17.0k
                                                                                 au1_src_top_right,
2637
17.0k
                                                                                 au1_src_bot_left,
2638
17.0k
                                                                                 au1_avail_chroma,
2639
17.0k
                                                                                 ai1_offset_cb,
2640
17.0k
                                                                                 ai1_offset_cr,
2641
17.0k
                                                                                 sao_wd_chroma,
2642
17.0k
                                                                                 sao_ht_chroma);
2643
17.0k
                        }
2644
18.1k
                    }
2645
2646
18.1k
                }
2647
190k
            }
2648
3.57M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2649
405k
            {
2650
5.14M
                for(row = 0; row < sao_ht_chroma; row++)
2651
4.73M
                {
2652
4.73M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2653
4.73M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2654
4.73M
                }
2655
405k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2656
405k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2657
2658
405k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2659
405k
            }
2660
2661
3.76M
        }
2662
3.78M
        pu1_src_luma += sao_wd_luma;
2663
3.78M
        pu1_src_chroma += sao_wd_chroma;
2664
3.78M
        ps_sao += 1;
2665
3.78M
    }
2666
2667
2668
    /* Current CTB */
2669
3.91M
    {
2670
3.91M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2671
3.91M
        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2672
3.91M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2673
3.91M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2674
3.91M
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2675
3.91M
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2676
3.91M
        WORD32 au4_idx_c[8], idx_c;
2677
2678
3.91M
        WORD32 remaining_rows;
2679
3.91M
        WORD32 remaining_cols;
2680
2681
3.91M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2682
3.91M
        if(remaining_cols <= SAO_SHIFT_CTB)
2683
132k
        {
2684
132k
            sao_wd_luma += remaining_cols;
2685
132k
        }
2686
3.91M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2687
3.91M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2688
132k
        {
2689
132k
            sao_wd_chroma += remaining_cols;
2690
132k
        }
2691
2692
3.91M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2693
3.91M
        if(remaining_rows <= SAO_SHIFT_CTB)
2694
394k
        {
2695
394k
            sao_ht_luma += remaining_rows;
2696
394k
        }
2697
3.91M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2698
3.91M
        if(remaining_rows <= SAO_SHIFT_CTB)
2699
394k
        {
2700
394k
            sao_ht_chroma += remaining_rows;
2701
394k
        }
2702
2703
3.91M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704
3.91M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2705
3.91M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
3.91M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2707
2708
3.91M
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2709
3.91M
        {
2710
3.91M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2711
194k
            {
2712
194k
                if(0 == ps_sao->b3_y_type_idx)
2713
133k
                {
2714
                    /* Update left, top and top-left */
2715
5.95M
                    for(row = 0; row < sao_ht_luma; row++)
2716
5.81M
                    {
2717
5.81M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2718
5.81M
                    }
2719
133k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2720
2721
133k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2722
2723
133k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2724
2725
133k
                }
2726
2727
61.1k
                else if(1 == ps_sao->b3_y_type_idx)
2728
31.9k
                {
2729
31.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2730
31.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2731
31.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2732
31.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2733
2734
31.9k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2735
31.9k
                                                                              src_strd,
2736
31.9k
                                                                              pu1_src_left_luma,
2737
31.9k
                                                                              pu1_src_top_luma,
2738
31.9k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2739
31.9k
                                                                              ps_sao->b5_y_band_pos,
2740
31.9k
                                                                              ai1_offset_y,
2741
31.9k
                                                                              sao_wd_luma,
2742
31.9k
                                                                              sao_ht_luma
2743
31.9k
                                                                             );
2744
31.9k
                }
2745
2746
29.1k
                else // if(2 <= ps_sao->b3_y_type_idx)
2747
29.1k
                {
2748
29.1k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2749
29.1k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2750
29.1k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2751
29.1k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2752
2753
262k
                    for(i = 0; i < 8; i++)
2754
233k
                    {
2755
233k
                        au1_avail_luma[i] = 255;
2756
233k
                        au1_tile_slice_boundary[i] = 0;
2757
233k
                        au4_idx_c[i] = 0;
2758
233k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2759
233k
                    }
2760
                    /******************************************************************
2761
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2762
                     *
2763
                     *
2764
                     *          ____________
2765
                     *         |    |       |
2766
                     *         |    | C_T   |
2767
                     *         |____|_______|____
2768
                     *         |    |       |    |
2769
                     *         | C_L|   C   | C_R|
2770
                     *         |____|_______|    |
2771
                     *              |  C_D       |
2772
                     *              |            |
2773
                     *              |____________|
2774
                     *
2775
                     *****************************************************************/
2776
2777
                    /*In case of slices*/
2778
29.1k
                    {
2779
29.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2780
15.5k
                        {
2781
15.5k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2782
15.5k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2783
2784
15.5k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2785
15.5k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2786
2787
15.5k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2788
15.5k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2789
2790
15.5k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2791
15.5k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2792
2793
15.5k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2794
15.5k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2795
2796
15.5k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2797
4.71k
                            {
2798
4.71k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2799
595
                                {
2800
595
                                    au4_idx_c[6] = -1;
2801
595
                                    au4_idx_c[0] = -1;
2802
595
                                    au4_idx_c[4] = -1;
2803
595
                                }
2804
4.11k
                                else
2805
4.11k
                                {
2806
4.11k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2807
4.11k
                                }
2808
2809
4.71k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2810
303
                                {
2811
303
                                    au4_idx_c[2] = -1;
2812
303
                                    au4_idx_c[5] = -1;
2813
303
                                    au4_idx_c[4] = -1;
2814
303
                                }
2815
4.40k
                                else
2816
4.40k
                                {
2817
4.40k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2818
4.40k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2819
4.40k
                                }
2820
4.71k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2821
4.71k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2822
4.71k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2823
2824
4.71k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2825
595
                                {
2826
595
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2827
595
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2828
595
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2829
595
                                }
2830
4.11k
                                else
2831
4.11k
                                {
2832
4.11k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2833
4.11k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2834
4.11k
                                }
2835
4.71k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2836
303
                                {
2837
303
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2838
303
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2839
303
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2840
303
                                }
2841
4.40k
                                else
2842
4.40k
                                {
2843
4.40k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2844
4.40k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2845
4.40k
                                }
2846
4.71k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2847
4.71k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
4.71k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2849
2850
4.71k
                                if(au4_idx_c[6] < idx_c)
2851
1.04k
                                {
2852
1.04k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2853
1.04k
                                }
2854
2855
                                /*
2856
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2857
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2858
                                 */
2859
42.4k
                                for(i = 0; i < 8; i++)
2860
37.6k
                                {
2861
                                    /*Sets the edges that lie on the slice/tile boundary*/
2862
37.6k
                                    if(au4_idx_c[i] != idx_c)
2863
11.7k
                                    {
2864
11.7k
                                        au1_tile_slice_boundary[i] = 1;
2865
11.7k
                                    }
2866
25.9k
                                    else
2867
25.9k
                                    {
2868
25.9k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2869
25.9k
                                    }
2870
37.6k
                                }
2871
                                /*Reset indices*/
2872
42.4k
                                for(i = 0; i < 8; i++)
2873
37.6k
                                {
2874
37.6k
                                    au4_idx_c[i] = 0;
2875
37.6k
                                }
2876
4.71k
                            }
2877
2878
15.5k
                            if(ps_pps->i1_tiles_enabled_flag)
2879
10.8k
                            {
2880
                                /* Calculate availability flags at slice boundary */
2881
10.8k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2882
4.45k
                                {
2883
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2884
4.45k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2885
4.20k
                                    {
2886
4.20k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2887
488
                                        {
2888
488
                                            au4_idx_c[6] = -1;
2889
488
                                            au4_idx_c[0] = -1;
2890
488
                                            au4_idx_c[4] = -1;
2891
488
                                        }
2892
3.71k
                                        else
2893
3.71k
                                        {
2894
3.71k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2895
3.71k
                                        }
2896
2897
4.20k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2898
365
                                        {
2899
365
                                            au4_idx_c[2] = -1;
2900
365
                                            au4_idx_c[5] = -1;
2901
365
                                            au4_idx_c[4] = -1;
2902
365
                                        }
2903
3.83k
                                        else
2904
3.83k
                                        {
2905
3.83k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2906
3.83k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2907
3.83k
                                        }
2908
4.20k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2909
4.20k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2910
4.20k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2911
2912
37.8k
                                        for(i = 0; i < 8; i++)
2913
33.6k
                                        {
2914
                                            /*Sets the edges that lie on the slice/tile boundary*/
2915
33.6k
                                            if(au4_idx_c[i] != idx_c)
2916
13.5k
                                            {
2917
13.5k
                                                au1_tile_slice_boundary[i] |= 1;
2918
13.5k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2919
13.5k
                                            }
2920
33.6k
                                        }
2921
4.20k
                                    }
2922
4.45k
                                }
2923
10.8k
                            }
2924
2925
139k
                            for(i = 0; i < 8; i++)
2926
124k
                            {
2927
                                /*Sets the edges that lie on the slice/tile boundary*/
2928
124k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2929
15.2k
                                {
2930
15.2k
                                    au1_avail_luma[i] = 0;
2931
15.2k
                                }
2932
124k
                            }
2933
2934
15.5k
                        }
2935
29.1k
                    }
2936
29.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2937
3.58k
                    {
2938
3.58k
                        au1_avail_luma[0] = 0;
2939
3.58k
                        au1_avail_luma[4] = 0;
2940
3.58k
                        au1_avail_luma[6] = 0;
2941
3.58k
                    }
2942
2943
29.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2944
3.12k
                    {
2945
3.12k
                        au1_avail_luma[1] = 0;
2946
3.12k
                        au1_avail_luma[5] = 0;
2947
3.12k
                        au1_avail_luma[7] = 0;
2948
3.12k
                    }
2949
2950
29.1k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2951
7.51k
                    {
2952
7.51k
                        au1_avail_luma[2] = 0;
2953
7.51k
                        au1_avail_luma[4] = 0;
2954
7.51k
                        au1_avail_luma[5] = 0;
2955
7.51k
                    }
2956
2957
29.1k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2958
3.16k
                    {
2959
3.16k
                        au1_avail_luma[3] = 0;
2960
3.16k
                        au1_avail_luma[6] = 0;
2961
3.16k
                        au1_avail_luma[7] = 0;
2962
3.16k
                    }
2963
2964
29.1k
                    {
2965
29.1k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2966
29.1k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2967
2968
29.1k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2969
29.1k
                                                                          src_strd,
2970
29.1k
                                                                          pu1_src_left_luma,
2971
29.1k
                                                                          pu1_src_top_luma,
2972
29.1k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2973
29.1k
                                                                          au1_src_top_right,
2974
29.1k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2975
29.1k
                                                                          au1_avail_luma,
2976
29.1k
                                                                          ai1_offset_y,
2977
29.1k
                                                                          sao_wd_luma,
2978
29.1k
                                                                          sao_ht_luma);
2979
29.1k
                    }
2980
29.1k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2981
29.1k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2982
29.1k
                }
2983
194k
            }
2984
3.72M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2985
459k
            {
2986
                /* Update left, top and top-left */
2987
14.9M
                for(row = 0; row < sao_ht_luma; row++)
2988
14.4M
                {
2989
14.4M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2990
14.4M
                }
2991
459k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2992
2993
459k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2994
2995
459k
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2996
459k
            }
2997
3.91M
        }
2998
2999
3.91M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && (0 != sao_wd_chroma) && (0 != sao_ht_chroma))
3000
3.88M
        {
3001
3.88M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3002
204k
            {
3003
204k
                if(0 == ps_sao->b3_cb_type_idx)
3004
166k
                {
3005
3.32M
                    for(row = 0; row < sao_ht_chroma; row++)
3006
3.16M
                    {
3007
3.16M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3008
3.16M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3009
3.16M
                    }
3010
166k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3011
166k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3012
3013
166k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3014
3015
166k
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3016
166k
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3017
166k
                }
3018
3019
38.0k
                else if(1 == ps_sao->b3_cb_type_idx)
3020
17.9k
                {
3021
17.9k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3022
17.9k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3023
17.9k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3024
17.9k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3025
3026
17.9k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3027
17.9k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3028
17.9k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3029
17.9k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3030
3031
17.9k
                    if(chroma_yuv420sp_vu)
3032
760
                    {
3033
760
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3034
760
                                                                                    src_strd,
3035
760
                                                                                    pu1_src_left_chroma,
3036
760
                                                                                    pu1_src_top_chroma,
3037
760
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3038
760
                                                                                    ps_sao->b5_cr_band_pos,
3039
760
                                                                                    ps_sao->b5_cb_band_pos,
3040
760
                                                                                    ai1_offset_cr,
3041
760
                                                                                    ai1_offset_cb,
3042
760
                                                                                    sao_wd_chroma,
3043
760
                                                                                    sao_ht_chroma
3044
760
                                                                                   );
3045
760
                    }
3046
17.1k
                    else
3047
17.1k
                    {
3048
17.1k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3049
17.1k
                                                                                    src_strd,
3050
17.1k
                                                                                    pu1_src_left_chroma,
3051
17.1k
                                                                                    pu1_src_top_chroma,
3052
17.1k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3053
17.1k
                                                                                    ps_sao->b5_cb_band_pos,
3054
17.1k
                                                                                    ps_sao->b5_cr_band_pos,
3055
17.1k
                                                                                    ai1_offset_cb,
3056
17.1k
                                                                                    ai1_offset_cr,
3057
17.1k
                                                                                    sao_wd_chroma,
3058
17.1k
                                                                                    sao_ht_chroma
3059
17.1k
                                                                                   );
3060
17.1k
                    }
3061
17.9k
                }
3062
3063
20.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
3064
20.1k
                {
3065
20.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3066
20.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3067
20.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3068
20.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3069
3070
20.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3071
20.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3072
20.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3073
20.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3074
3075
181k
                    for(i = 0; i < 8; i++)
3076
161k
                    {
3077
161k
                        au1_avail_chroma[i] = 255;
3078
161k
                        au1_tile_slice_boundary[i] = 0;
3079
161k
                        au4_idx_c[i] = 0;
3080
161k
                        au4_ilf_across_tile_slice_enable[i] = 1;
3081
161k
                    }
3082
20.1k
                    {
3083
20.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3084
11.2k
                        {
3085
11.2k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3086
11.2k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3087
3088
11.2k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3089
11.2k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3090
3091
11.2k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3092
11.2k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3093
3094
11.2k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3095
11.2k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3096
3097
11.2k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
3098
11.2k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
3099
3100
11.2k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3101
1.52k
                            {
3102
1.52k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3103
162
                                {
3104
162
                                    au4_idx_c[0] = -1;
3105
162
                                    au4_idx_c[4] = -1;
3106
162
                                    au4_idx_c[6] = -1;
3107
162
                                }
3108
1.35k
                                else
3109
1.35k
                                {
3110
1.35k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3111
1.35k
                                }
3112
3113
1.52k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3114
38
                                {
3115
38
                                    au4_idx_c[2] = -1;
3116
38
                                    au4_idx_c[4] = -1;
3117
38
                                    au4_idx_c[5] = -1;
3118
38
                                }
3119
1.48k
                                else
3120
1.48k
                                {
3121
1.48k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3122
1.48k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3123
1.48k
                                }
3124
1.52k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3125
1.52k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3126
1.52k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3127
3128
1.52k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3129
162
                                {
3130
162
                                    au4_ilf_across_tile_slice_enable[0] = 0;
3131
162
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3132
162
                                    au4_ilf_across_tile_slice_enable[6] = 0;
3133
162
                                }
3134
1.35k
                                else
3135
1.35k
                                {
3136
1.35k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3137
1.35k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3138
1.35k
                                }
3139
3140
1.52k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3141
38
                                {
3142
38
                                    au4_ilf_across_tile_slice_enable[2] = 0;
3143
38
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3144
38
                                    au4_ilf_across_tile_slice_enable[5] = 0;
3145
38
                                }
3146
1.48k
                                else
3147
1.48k
                                {
3148
1.48k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3149
1.48k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3150
1.48k
                                }
3151
3152
1.52k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3153
1.52k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
1.52k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3155
3156
1.52k
                                if(idx_c > au4_idx_c[6])
3157
324
                                {
3158
324
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3159
324
                                }
3160
3161
                                /*
3162
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3163
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3164
                                 */
3165
13.6k
                                for(i = 0; i < 8; i++)
3166
12.1k
                                {
3167
                                    /*Sets the edges that lie on the slice/tile boundary*/
3168
12.1k
                                    if(au4_idx_c[i] != idx_c)
3169
4.07k
                                    {
3170
4.07k
                                        au1_tile_slice_boundary[i] = 1;
3171
4.07k
                                    }
3172
8.09k
                                    else
3173
8.09k
                                    {
3174
8.09k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
3175
8.09k
                                    }
3176
12.1k
                                }
3177
                                /*Reset indices*/
3178
13.6k
                                for(i = 0; i < 8; i++)
3179
12.1k
                                {
3180
12.1k
                                    au4_idx_c[i] = 0;
3181
12.1k
                                }
3182
1.52k
                            }
3183
3184
11.2k
                            if(ps_pps->i1_tiles_enabled_flag)
3185
9.68k
                            {
3186
                                /* Calculate availability flags at slice boundary */
3187
9.68k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3188
3.88k
                                {
3189
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3190
3.88k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3191
3.73k
                                    {
3192
3.73k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3193
617
                                        {
3194
617
                                            au4_idx_c[6] = -1;
3195
617
                                            au4_idx_c[0] = -1;
3196
617
                                            au4_idx_c[4] = -1;
3197
617
                                        }
3198
3.11k
                                        else
3199
3.11k
                                        {
3200
3.11k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3201
3.11k
                                        }
3202
3203
3.73k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3204
1.15k
                                        {
3205
1.15k
                                            au4_idx_c[2] = -1;
3206
1.15k
                                            au4_idx_c[5] = -1;
3207
1.15k
                                            au4_idx_c[4] = -1;
3208
1.15k
                                        }
3209
2.57k
                                        else
3210
2.57k
                                        {
3211
2.57k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3212
2.57k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3213
2.57k
                                        }
3214
3.73k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3215
3.73k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3216
3.73k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3217
3218
33.5k
                                        for(i = 0; i < 8; i++)
3219
29.8k
                                        {
3220
                                            /*Sets the edges that lie on the slice/tile boundary*/
3221
29.8k
                                            if(au4_idx_c[i] != idx_c)
3222
11.7k
                                            {
3223
11.7k
                                                au1_tile_slice_boundary[i] |= 1;
3224
11.7k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3225
11.7k
                                            }
3226
29.8k
                                        }
3227
3.73k
                                    }
3228
3.88k
                                }
3229
9.68k
                            }
3230
3231
100k
                            for(i = 0; i < 8; i++)
3232
89.6k
                            {
3233
                                /*Sets the edges that lie on the slice/tile boundary*/
3234
89.6k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3235
12.1k
                                {
3236
12.1k
                                    au1_avail_chroma[i] = 0;
3237
12.1k
                                }
3238
89.6k
                            }
3239
11.2k
                        }
3240
20.1k
                    }
3241
3242
20.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
3243
2.64k
                    {
3244
2.64k
                        au1_avail_chroma[0] = 0;
3245
2.64k
                        au1_avail_chroma[4] = 0;
3246
2.64k
                        au1_avail_chroma[6] = 0;
3247
2.64k
                    }
3248
3249
20.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3250
2.06k
                    {
3251
2.06k
                        au1_avail_chroma[1] = 0;
3252
2.06k
                        au1_avail_chroma[5] = 0;
3253
2.06k
                        au1_avail_chroma[7] = 0;
3254
2.06k
                    }
3255
3256
20.1k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
3257
4.55k
                    {
3258
4.55k
                        au1_avail_chroma[2] = 0;
3259
4.55k
                        au1_avail_chroma[4] = 0;
3260
4.55k
                        au1_avail_chroma[5] = 0;
3261
4.55k
                    }
3262
3263
20.1k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3264
2.78k
                    {
3265
2.78k
                        au1_avail_chroma[3] = 0;
3266
2.78k
                        au1_avail_chroma[6] = 0;
3267
2.78k
                        au1_avail_chroma[7] = 0;
3268
2.78k
                    }
3269
3270
20.1k
                    {
3271
20.1k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3272
20.1k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3273
3274
20.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3275
20.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3276
3277
20.1k
                        if(chroma_yuv420sp_vu)
3278
1.17k
                        {
3279
1.17k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3280
1.17k
                                                                                 src_strd,
3281
1.17k
                                                                                 pu1_src_left_chroma,
3282
1.17k
                                                                                 pu1_src_top_chroma,
3283
1.17k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3284
1.17k
                                                                                 au1_src_top_right,
3285
1.17k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3286
1.17k
                                                                                 au1_avail_chroma,
3287
1.17k
                                                                                 ai1_offset_cr,
3288
1.17k
                                                                                 ai1_offset_cb,
3289
1.17k
                                                                                 sao_wd_chroma,
3290
1.17k
                                                                                 sao_ht_chroma);
3291
1.17k
                        }
3292
18.9k
                        else
3293
18.9k
                        {
3294
18.9k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3295
18.9k
                                                                                 src_strd,
3296
18.9k
                                                                                 pu1_src_left_chroma,
3297
18.9k
                                                                                 pu1_src_top_chroma,
3298
18.9k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3299
18.9k
                                                                                 au1_src_top_right,
3300
18.9k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3301
18.9k
                                                                                 au1_avail_chroma,
3302
18.9k
                                                                                 ai1_offset_cb,
3303
18.9k
                                                                                 ai1_offset_cr,
3304
18.9k
                                                                                 sao_wd_chroma,
3305
18.9k
                                                                                 sao_ht_chroma);
3306
18.9k
                        }
3307
20.1k
                    }
3308
3309
20.1k
                }
3310
204k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3311
204k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3312
3313
204k
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3314
204k
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3315
204k
            }
3316
3.68M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3317
419k
            {
3318
5.36M
                for(row = 0; row < sao_ht_chroma; row++)
3319
4.94M
                {
3320
4.94M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3321
4.94M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3322
4.94M
                }
3323
419k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3324
419k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3325
3326
419k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3327
3328
419k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3329
419k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3330
419k
            }
3331
3332
3.88M
        }
3333
3.91M
    }
3334
3335
3336
3337
3338
/* If no loop filter is enabled copy the backed up values */
3339
3.91M
    {
3340
        /* Luma */
3341
3.91M
        if(no_loop_filter_enabled_luma)
3342
129k
        {
3343
129k
            UWORD32 u4_no_loop_filter_flag;
3344
129k
            WORD32 loop_filter_bit_pos;
3345
129k
            WORD32 log2_min_cu = 3;
3346
129k
            WORD32 min_cu = (1 << log2_min_cu);
3347
129k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3348
129k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3349
129k
            WORD32 sao_blk_wd = ctb_size;
3350
129k
            WORD32 remaining_rows;
3351
129k
            WORD32 remaining_cols;
3352
3353
129k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354
129k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3355
129k
            if(remaining_rows <= SAO_SHIFT_CTB)
3356
11.0k
                sao_blk_ht += remaining_rows;
3357
129k
            if(remaining_cols <= SAO_SHIFT_CTB)
3358
3.01k
                sao_blk_wd += remaining_cols;
3359
3360
129k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3361
129k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3362
3363
129k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3364
3365
129k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3366
129k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3367
129k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3368
124k
                loop_filter_bit_pos -= 1;
3369
3370
129k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3371
129k
                            (loop_filter_bit_pos >> 3);
3372
3373
129k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3374
1.09M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3375
960k
            {
3376
960k
                WORD32 tmp_wd = sao_blk_wd;
3377
3378
960k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3379
960k
                                (loop_filter_bit_pos & 7);
3380
960k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3381
3382
960k
                if(u4_no_loop_filter_flag)
3383
873k
                {
3384
1.87M
                    while(tmp_wd > 0)
3385
1.00M
                    {
3386
1.00M
                        if(CTZ(u4_no_loop_filter_flag))
3387
120k
                        {
3388
120k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389
120k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3390
120k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3391
120k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3392
120k
                        }
3393
885k
                        else
3394
885k
                        {
3395
7.83M
                            for(row = 0; row < min_cu; row++)
3396
6.94M
                            {
3397
368M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3398
361M
                                {
3399
361M
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3400
361M
                                }
3401
6.94M
                            }
3402
885k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403
885k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3404
885k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3405
885k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3406
885k
                        }
3407
1.00M
                    }
3408
3409
873k
                    pu1_src_tmp_luma -= sao_blk_wd;
3410
873k
                    pu1_src_backup_luma -= sao_blk_wd;
3411
873k
                }
3412
3413
960k
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3414
960k
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3415
960k
            }
3416
129k
        }
3417
3418
        /* Chroma */
3419
3.91M
        if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && no_loop_filter_enabled_chroma)
3420
128k
        {
3421
128k
            UWORD32 u4_no_loop_filter_flag;
3422
128k
            WORD32 loop_filter_bit_pos;
3423
128k
            WORD32 log2_min_cu = 3;
3424
128k
            WORD32 min_cu = (1 << log2_min_cu);
3425
128k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3426
128k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3427
128k
            WORD32 sao_blk_wd = ctb_size;
3428
128k
            WORD32 remaining_rows;
3429
128k
            WORD32 remaining_cols;
3430
3431
128k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432
128k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3433
128k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3434
11.0k
                sao_blk_ht += remaining_rows;
3435
128k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3436
2.96k
                sao_blk_wd += remaining_cols;
3437
3438
128k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3439
128k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3440
3441
128k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3442
3443
128k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3444
128k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3445
128k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3446
123k
                loop_filter_bit_pos -= 2;
3447
3448
128k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3449
128k
                            (loop_filter_bit_pos >> 3);
3450
3451
128k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3452
1.07M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3453
948k
            {
3454
948k
                WORD32 tmp_wd = sao_blk_wd;
3455
3456
948k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3457
948k
                                (loop_filter_bit_pos & 7);
3458
948k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3459
3460
948k
                if(u4_no_loop_filter_flag)
3461
872k
                {
3462
1.87M
                    while(tmp_wd > 0)
3463
997k
                    {
3464
997k
                        if(CTZ(u4_no_loop_filter_flag))
3465
115k
                        {
3466
115k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467
115k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3468
115k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3469
115k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3470
115k
                        }
3471
882k
                        else
3472
882k
                        {
3473
4.38M
                            for(row = 0; row < min_cu / 2; row++)
3474
3.50M
                            {
3475
197M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3476
193M
                                {
3477
193M
                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3478
193M
                                }
3479
3.50M
                            }
3480
3481
882k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482
882k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3483
882k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3484
882k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3485
882k
                        }
3486
997k
                    }
3487
3488
872k
                    pu1_src_tmp_chroma -= sao_blk_wd;
3489
872k
                    pu1_src_backup_chroma -= sao_blk_wd;
3490
872k
                }
3491
3492
948k
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3493
948k
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3494
948k
            }
3495
128k
        }
3496
3.91M
    }
3497
3498
3.91M
}
3499