Coverage Report

Created: 2025-10-13 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
209M
#define SAO_SHIFT_CTB    8
79
80
/**
81
 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82
 */
83
void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84
0
{
85
0
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86
0
    UWORD8 *pu1_src_luma;
87
0
    UWORD8 *pu1_src_chroma;
88
0
    WORD32 src_strd;
89
0
    WORD32 ctb_size;
90
0
    WORD32 log2_ctb_size;
91
0
    sps_t *ps_sps;
92
0
    sao_t *ps_sao;
93
0
    WORD32 row, col;
94
0
    UWORD8 au1_avail_luma[8];
95
0
    UWORD8 au1_avail_chroma[8];
96
0
    WORD32 i;
97
0
    UWORD8 *pu1_src_top_luma;
98
0
    UWORD8 *pu1_src_top_chroma;
99
0
    UWORD8 *pu1_src_left_luma;
100
0
    UWORD8 *pu1_src_left_chroma;
101
0
    UWORD8 au1_src_top_right[2];
102
0
    UWORD8 au1_src_bot_left[2];
103
0
    UWORD8 *pu1_no_loop_filter_flag;
104
0
    WORD32 loop_filter_strd;
105
106
    /* Only first 5 values are used, but arrays are large
107
     enough so that SIMD functions can read 64 bits at a time */
108
0
    WORD8 ai1_offset_y[8] = {0};
109
0
    WORD8 ai1_offset_cb[8] = {0};
110
0
    WORD8 ai1_offset_cr[8] = {0};
111
112
0
    PROFILE_DISABLE_SAO();
113
114
0
    ps_sps = ps_sao_ctxt->ps_sps;
115
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
116
0
    ctb_size = (1 << log2_ctb_size);
117
0
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118
0
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119
0
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121
0
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122
0
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124
    /* Current CTB */
125
0
    {
126
0
        WORD32 sao_wd_luma;
127
0
        WORD32 sao_wd_chroma;
128
0
        WORD32 sao_ht_luma;
129
0
        WORD32 sao_ht_chroma;
130
131
0
        WORD32 remaining_rows;
132
0
        WORD32 remaining_cols;
133
134
0
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135
0
        sao_wd_luma = MIN(ctb_size, remaining_cols);
136
0
        sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138
0
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139
0
        sao_ht_luma = MIN(ctb_size, remaining_rows);
140
0
        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142
0
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143
0
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144
0
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145
0
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147
0
        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148
0
                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149
0
                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151
0
        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152
0
        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153
0
        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154
0
        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156
0
        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157
0
        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158
0
        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159
0
        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161
0
        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162
0
        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163
0
        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164
0
        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166
0
        for(i = 0; i < 8; i++)
167
0
        {
168
0
            au1_avail_luma[i] = 255;
169
0
            au1_avail_chroma[i] = 255;
170
0
        }
171
172
173
0
        if(0 == ps_sao_ctxt->i4_ctb_x)
174
0
        {
175
0
            au1_avail_luma[0] = 0;
176
0
            au1_avail_luma[4] = 0;
177
0
            au1_avail_luma[6] = 0;
178
179
0
            au1_avail_chroma[0] = 0;
180
0
            au1_avail_chroma[4] = 0;
181
0
            au1_avail_chroma[6] = 0;
182
0
        }
183
184
0
        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185
0
        {
186
0
            au1_avail_luma[1] = 0;
187
0
            au1_avail_luma[5] = 0;
188
0
            au1_avail_luma[7] = 0;
189
190
0
            au1_avail_chroma[1] = 0;
191
0
            au1_avail_chroma[5] = 0;
192
0
            au1_avail_chroma[7] = 0;
193
0
        }
194
195
0
        if(0 == ps_sao_ctxt->i4_ctb_y)
196
0
        {
197
0
            au1_avail_luma[2] = 0;
198
0
            au1_avail_luma[4] = 0;
199
0
            au1_avail_luma[5] = 0;
200
201
0
            au1_avail_chroma[2] = 0;
202
0
            au1_avail_chroma[4] = 0;
203
0
            au1_avail_chroma[5] = 0;
204
0
        }
205
206
0
        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207
0
        {
208
0
            au1_avail_luma[3] = 0;
209
0
            au1_avail_luma[6] = 0;
210
0
            au1_avail_luma[7] = 0;
211
212
0
            au1_avail_chroma[3] = 0;
213
0
            au1_avail_chroma[6] = 0;
214
0
            au1_avail_chroma[7] = 0;
215
0
        }
216
217
218
0
        if(0 == ps_sao->b3_y_type_idx)
219
0
        {
220
            /* Update left, top and top-left */
221
0
            for(row = 0; row < sao_ht_luma; row++)
222
0
            {
223
0
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224
0
            }
225
0
            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229
0
        }
230
0
        else
231
0
        {
232
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235
0
            WORD32 no_loop_filter_enabled = 0;
236
237
            /* Check the loop filter flags and copy the original values for back up */
238
0
            {
239
0
                UWORD32 u4_no_loop_filter_flag;
240
0
                WORD32 min_cu = 8;
241
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244
0
                {
245
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246
0
                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249
0
                    if(u4_no_loop_filter_flag)
250
0
                    {
251
0
                        WORD32 tmp_wd = sao_wd_luma;
252
0
                        no_loop_filter_enabled = 1;
253
0
                        while(tmp_wd > 0)
254
0
                        {
255
0
                            if(CTZ(u4_no_loop_filter_flag))
256
0
                            {
257
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260
0
                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261
0
                            }
262
0
                            else
263
0
                            {
264
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265
0
                                {
266
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267
0
                                    {
268
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269
0
                                    }
270
0
                                }
271
272
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276
0
                            }
277
0
                        }
278
279
0
                        pu1_src_tmp -= sao_wd_luma;
280
0
                    }
281
282
0
                    pu1_src_tmp += min_cu * src_strd;
283
0
                    pu1_src_copy += min_cu * tmp_strd;
284
0
                }
285
0
            }
286
287
0
            if(1 == ps_sao->b3_y_type_idx)
288
0
            {
289
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290
0
                                                                          src_strd,
291
0
                                                                          pu1_src_left_luma,
292
0
                                                                          pu1_src_top_luma,
293
0
                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294
0
                                                                          ps_sao->b5_y_band_pos,
295
0
                                                                          ai1_offset_y,
296
0
                                                                          sao_wd_luma,
297
0
                                                                          sao_ht_luma);
298
0
            }
299
0
            else // if(2 <= ps_sao->b3_y_type_idx)
300
0
            {
301
0
                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302
0
                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303
0
                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304
0
                                                                  src_strd,
305
0
                                                                  pu1_src_left_luma,
306
0
                                                                  pu1_src_top_luma,
307
0
                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308
0
                                                                  au1_src_top_right,
309
0
                                                                  au1_src_bot_left,
310
0
                                                                  au1_avail_luma,
311
0
                                                                  ai1_offset_y,
312
0
                                                                  sao_wd_luma,
313
0
                                                                  sao_ht_luma);
314
0
            }
315
316
            /* Check the loop filter flags and copy the original values back if they are set */
317
0
            if(no_loop_filter_enabled)
318
0
            {
319
0
                UWORD32 u4_no_loop_filter_flag;
320
0
                WORD32 min_cu = 8;
321
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324
0
                {
325
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328
0
                    if(u4_no_loop_filter_flag)
329
0
                    {
330
0
                        WORD32 tmp_wd = sao_wd_luma;
331
0
                        while(tmp_wd > 0)
332
0
                        {
333
0
                            if(CTZ(u4_no_loop_filter_flag))
334
0
                            {
335
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339
0
                            }
340
0
                            else
341
0
                            {
342
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343
0
                                {
344
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345
0
                                    {
346
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347
0
                                    }
348
0
                                }
349
350
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354
0
                            }
355
0
                        }
356
357
0
                        pu1_src_tmp -= sao_wd_luma;
358
0
                    }
359
360
0
                    pu1_src_tmp += min_cu * src_strd;
361
0
                    pu1_src_copy += min_cu * tmp_strd;
362
0
                }
363
0
            }
364
365
0
        }
366
367
0
        if(0 == ps_sao->b3_cb_type_idx)
368
0
        {
369
0
            for(row = 0; row < sao_ht_chroma; row++)
370
0
            {
371
0
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372
0
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373
0
            }
374
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378
0
        }
379
0
        else
380
0
        {
381
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384
0
            WORD32 no_loop_filter_enabled = 0;
385
386
            /* Check the loop filter flags and copy the original values for back up */
387
0
            {
388
0
                UWORD32 u4_no_loop_filter_flag;
389
0
                WORD32 min_cu = 4;
390
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393
0
                {
394
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397
0
                    if(u4_no_loop_filter_flag)
398
0
                    {
399
0
                        WORD32 tmp_wd = sao_wd_chroma;
400
0
                        no_loop_filter_enabled = 1;
401
0
                        while(tmp_wd > 0)
402
0
                        {
403
0
                            if(CTZ(u4_no_loop_filter_flag))
404
0
                            {
405
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409
0
                            }
410
0
                            else
411
0
                            {
412
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413
0
                                {
414
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415
0
                                    {
416
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417
0
                                    }
418
0
                                }
419
420
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424
0
                            }
425
0
                        }
426
427
0
                        pu1_src_tmp -= sao_wd_chroma;
428
0
                    }
429
430
0
                    pu1_src_tmp += min_cu * src_strd;
431
0
                    pu1_src_copy += min_cu * tmp_strd;
432
0
                }
433
0
            }
434
435
0
            if(1 == ps_sao->b3_cb_type_idx)
436
0
            {
437
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438
0
                                                                            src_strd,
439
0
                                                                            pu1_src_left_chroma,
440
0
                                                                            pu1_src_top_chroma,
441
0
                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442
0
                                                                            ps_sao->b5_cb_band_pos,
443
0
                                                                            ps_sao->b5_cr_band_pos,
444
0
                                                                            ai1_offset_cb,
445
0
                                                                            ai1_offset_cr,
446
0
                                                                            sao_wd_chroma,
447
0
                                                                            sao_ht_chroma
448
0
                                                                           );
449
0
            }
450
0
            else // if(2 <= ps_sao->b3_cb_type_idx)
451
0
            {
452
0
                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453
0
                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454
0
                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455
0
                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456
0
                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457
0
                                                                     src_strd,
458
0
                                                                     pu1_src_left_chroma,
459
0
                                                                     pu1_src_top_chroma,
460
0
                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461
0
                                                                     au1_src_top_right,
462
0
                                                                     au1_src_bot_left,
463
0
                                                                     au1_avail_chroma,
464
0
                                                                     ai1_offset_cb,
465
0
                                                                     ai1_offset_cr,
466
0
                                                                     sao_wd_chroma,
467
0
                                                                     sao_ht_chroma);
468
0
            }
469
470
            /* Check the loop filter flags and copy the original values back if they are set */
471
0
            if(no_loop_filter_enabled)
472
0
            {
473
0
                UWORD32 u4_no_loop_filter_flag;
474
0
                WORD32 min_cu = 4;
475
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478
0
                {
479
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482
0
                    if(u4_no_loop_filter_flag)
483
0
                    {
484
0
                        WORD32 tmp_wd = sao_wd_chroma;
485
0
                        while(tmp_wd > 0)
486
0
                        {
487
0
                            if(CTZ(u4_no_loop_filter_flag))
488
0
                            {
489
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493
0
                            }
494
0
                            else
495
0
                            {
496
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497
0
                                {
498
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499
0
                                    {
500
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501
0
                                    }
502
0
                                }
503
504
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508
0
                            }
509
0
                        }
510
511
0
                        pu1_src_tmp -= sao_wd_chroma;
512
0
                    }
513
514
0
                    pu1_src_tmp += min_cu * src_strd;
515
0
                    pu1_src_copy += min_cu * tmp_strd;
516
0
                }
517
0
            }
518
519
0
        }
520
521
0
    }
522
0
}
523
524
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525
5.34M
{
526
5.34M
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527
5.34M
    UWORD8 *pu1_src_luma;
528
5.34M
    UWORD8 *pu1_src_chroma;
529
5.34M
    WORD32 src_strd;
530
5.34M
    WORD32 ctb_size;
531
5.34M
    WORD32 log2_ctb_size;
532
5.34M
    sps_t *ps_sps;
533
5.34M
    sao_t *ps_sao;
534
5.34M
    pps_t *ps_pps;
535
5.34M
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536
5.34M
    tile_t *ps_tile;
537
5.34M
    UWORD16 *pu1_slice_idx;
538
5.34M
    UWORD16 *pu1_tile_idx;
539
5.34M
    WORD32 row, col;
540
5.34M
    UWORD8 au1_avail_luma[8];
541
5.34M
    UWORD8 au1_avail_chroma[8];
542
5.34M
    UWORD8 au1_tile_slice_boundary[8];
543
5.34M
    UWORD8 au4_ilf_across_tile_slice_enable[8];
544
5.34M
    WORD32 i;
545
5.34M
    UWORD8 *pu1_src_top_luma;
546
5.34M
    UWORD8 *pu1_src_top_chroma;
547
5.34M
    UWORD8 *pu1_src_left_luma;
548
5.34M
    UWORD8 *pu1_src_left_chroma;
549
5.34M
    UWORD8 au1_src_top_right[2];
550
5.34M
    UWORD8 au1_src_bot_left[2];
551
5.34M
    UWORD8 *pu1_no_loop_filter_flag;
552
5.34M
    UWORD8 *pu1_src_backup_luma;
553
5.34M
    UWORD8 *pu1_src_backup_chroma;
554
5.34M
    WORD32 backup_strd;
555
5.34M
    WORD32 loop_filter_strd;
556
557
5.34M
    WORD32 no_loop_filter_enabled_luma = 0;
558
5.34M
    WORD32 no_loop_filter_enabled_chroma = 0;
559
5.34M
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560
5.34M
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561
5.34M
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
562
5.34M
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563
5.34M
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
564
5.34M
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565
5.34M
    UWORD8  u1_sao_src_top_left_luma_bot_left;
566
5.34M
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567
5.34M
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568
5.34M
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569
    /* Only first 5 values are used, but arrays are large
570
     enough so that SIMD functions can read 64 bits at a time */
571
5.34M
    WORD8 ai1_offset_y[8] = {0};
572
5.34M
    WORD8 ai1_offset_cb[8] = {0};
573
5.34M
    WORD8 ai1_offset_cr[8] = {0};
574
5.34M
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576
5.34M
    PROFILE_DISABLE_SAO();
577
578
5.34M
    ps_sps = ps_sao_ctxt->ps_sps;
579
5.34M
    ps_pps = ps_sao_ctxt->ps_pps;
580
5.34M
    ps_tile = ps_sao_ctxt->ps_tile;
581
582
5.34M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
583
5.34M
    ctb_size = (1 << log2_ctb_size);
584
5.34M
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585
5.34M
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586
5.34M
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588
5.34M
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589
5.34M
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590
5.34M
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591
5.34M
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593
    /*Stores the left value for each row ctbs- Needed for column tiles*/
594
5.34M
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595
5.34M
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596
5.34M
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597
5.34M
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598
5.34M
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599
5.34M
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600
5.34M
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601
5.34M
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602
5.34M
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603
5.34M
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605
5.34M
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606
5.34M
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607
5.34M
    backup_strd = 2 * MAX_CTB_SIZE;
608
609
5.34M
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611
5.34M
    {
612
        /* Check the loop filter flags and copy the original values for back up */
613
        /* Luma */
614
615
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616
         * can belong to different slice with their own sao_enable flag */
617
5.34M
        {
618
5.34M
            UWORD32 u4_no_loop_filter_flag;
619
5.34M
            WORD32 loop_filter_bit_pos;
620
5.34M
            WORD32 log2_min_cu = 3;
621
5.34M
            WORD32 min_cu = (1 << log2_min_cu);
622
5.34M
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623
5.34M
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624
5.34M
            WORD32 sao_blk_wd = ctb_size;
625
5.34M
            WORD32 remaining_rows;
626
5.34M
            WORD32 remaining_cols;
627
628
5.34M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629
5.34M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630
5.34M
            if(remaining_rows <= SAO_SHIFT_CTB)
631
317k
                sao_blk_ht += remaining_rows;
632
5.34M
            if(remaining_cols <= SAO_SHIFT_CTB)
633
143k
                sao_blk_wd += remaining_cols;
634
635
5.34M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636
5.34M
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638
5.34M
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640
5.34M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641
5.34M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642
5.34M
            if(ps_sao_ctxt->i4_ctb_x > 0)
643
5.19M
                loop_filter_bit_pos -= 1;
644
645
5.34M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646
5.34M
                            (loop_filter_bit_pos >> 3);
647
648
5.34M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649
30.3M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650
25.0M
            {
651
25.0M
                WORD32 tmp_wd = sao_blk_wd;
652
653
25.0M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654
25.0M
                                (loop_filter_bit_pos & 7);
655
25.0M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657
25.0M
                if(u4_no_loop_filter_flag)
658
768k
                {
659
768k
                    no_loop_filter_enabled_luma = 1;
660
1.64M
                    while(tmp_wd > 0)
661
871k
                    {
662
871k
                        if(CTZ(u4_no_loop_filter_flag))
663
92.0k
                        {
664
92.0k
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665
92.0k
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666
92.0k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667
92.0k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668
92.0k
                        }
669
779k
                        else
670
779k
                        {
671
6.13M
                            for(row = 0; row < min_cu; row++)
672
5.35M
                            {
673
272M
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674
267M
                                {
675
267M
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676
267M
                                }
677
5.35M
                            }
678
779k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679
779k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680
779k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681
779k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682
779k
                        }
683
871k
                    }
684
685
768k
                    pu1_src_tmp_luma -= sao_blk_wd;
686
768k
                    pu1_src_backup_luma -= sao_blk_wd;
687
768k
                }
688
689
25.0M
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
690
25.0M
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
691
25.0M
            }
692
5.34M
        }
693
694
        /* Chroma */
695
696
5.34M
        {
697
5.34M
            UWORD32 u4_no_loop_filter_flag;
698
5.34M
            WORD32 loop_filter_bit_pos;
699
5.34M
            WORD32 log2_min_cu = 3;
700
5.34M
            WORD32 min_cu = (1 << log2_min_cu);
701
5.34M
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
702
5.34M
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
703
5.34M
            WORD32 sao_blk_wd = ctb_size;
704
5.34M
            WORD32 remaining_rows;
705
5.34M
            WORD32 remaining_cols;
706
707
5.34M
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
708
5.34M
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709
5.34M
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
710
317k
                sao_blk_ht += remaining_rows;
711
5.34M
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
712
143k
                sao_blk_wd += remaining_cols;
713
714
5.34M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
715
5.34M
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
716
717
5.34M
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
718
719
5.34M
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
720
5.34M
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
721
5.34M
            if(ps_sao_ctxt->i4_ctb_x > 0)
722
5.19M
                loop_filter_bit_pos -= 2;
723
724
5.34M
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
725
5.34M
                            (loop_filter_bit_pos >> 3);
726
727
5.34M
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
728
30.3M
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
729
25.0M
            {
730
25.0M
                WORD32 tmp_wd = sao_blk_wd;
731
732
25.0M
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
733
25.0M
                                (loop_filter_bit_pos & 7);
734
25.0M
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
735
736
25.0M
                if(u4_no_loop_filter_flag)
737
781k
                {
738
781k
                    no_loop_filter_enabled_chroma = 1;
739
1.66M
                    while(tmp_wd > 0)
740
878k
                    {
741
878k
                        if(CTZ(u4_no_loop_filter_flag))
742
89.2k
                        {
743
89.2k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
744
89.2k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745
89.2k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
746
89.2k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
747
89.2k
                        }
748
789k
                        else
749
789k
                        {
750
3.83M
                            for(row = 0; row < min_cu / 2; row++)
751
3.04M
                            {
752
175M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
753
172M
                                {
754
172M
                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
755
172M
                                }
756
3.04M
                            }
757
758
789k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
759
789k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760
789k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
761
789k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
762
789k
                        }
763
878k
                    }
764
765
781k
                    pu1_src_tmp_chroma -= sao_blk_wd;
766
781k
                    pu1_src_backup_chroma -= sao_blk_wd;
767
781k
                }
768
769
25.0M
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
770
25.0M
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
771
25.0M
            }
772
5.34M
        }
773
5.34M
    }
774
775
5.34M
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
776
777
    /* Top-left CTB */
778
5.34M
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
779
4.88M
    {
780
4.88M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
781
4.88M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
782
4.88M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
783
4.88M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
784
785
4.88M
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
786
4.88M
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
787
4.88M
        WORD32 au4_idx_tl[8], idx_tl;
788
789
4.88M
        slice_header_t *ps_slice_hdr_top_left;
790
4.88M
        {
791
4.88M
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
792
4.88M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
793
4.88M
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
794
4.88M
        }
795
796
797
4.88M
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
798
4.88M
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
799
4.88M
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
800
4.88M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
801
4.88M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
802
4.88M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
803
4.88M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
804
805
4.88M
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
806
1.48M
        {
807
1.48M
            if(0 == ps_sao->b3_y_type_idx)
808
1.43M
            {
809
                /* Update left, top and top-left */
810
12.8M
                for(row = 0; row < sao_ht_luma; row++)
811
11.4M
                {
812
11.4M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
813
11.4M
                }
814
1.43M
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
815
816
1.43M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
817
818
819
1.43M
            }
820
821
53.7k
            else if(1 == ps_sao->b3_y_type_idx)
822
27.5k
            {
823
27.5k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
824
27.5k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
825
27.5k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
826
27.5k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
827
828
27.5k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
829
27.5k
                                                                          src_strd,
830
27.5k
                                                                          pu1_src_left_luma,
831
27.5k
                                                                          pu1_src_top_luma,
832
27.5k
                                                                          pu1_sao_src_luma_top_left_ctb,
833
27.5k
                                                                          ps_sao->b5_y_band_pos,
834
27.5k
                                                                          ai1_offset_y,
835
27.5k
                                                                          sao_wd_luma,
836
27.5k
                                                                          sao_ht_luma
837
27.5k
                                                                         );
838
27.5k
            }
839
840
26.1k
            else // if(2 <= ps_sao->b3_y_type_idx)
841
26.1k
            {
842
26.1k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
843
26.1k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
844
26.1k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
845
26.1k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
846
847
236k
                for(i = 0; i < 8; i++)
848
210k
                {
849
210k
                    au1_avail_luma[i] = 255;
850
210k
                    au1_tile_slice_boundary[i] = 0;
851
210k
                    au4_idx_tl[i] = 0;
852
210k
                    au4_ilf_across_tile_slice_enable[i] = 1;
853
210k
                }
854
855
                /******************************************************************
856
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
857
                 *
858
                 *          TL_T
859
                 *       4  _2__5________
860
                 *     0   |    |       |
861
                 *    TL_L | TL | 1 TL_R|
862
                 *         |____|_______|____
863
                 *        6|TL_D|7      |    |
864
                 *         | 3  |       |    |
865
                 *         |____|_______|    |
866
                 *              |            |
867
                 *              |            |
868
                 *              |____________|
869
                 *
870
                 *****************************************************************/
871
872
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
873
26.1k
                {
874
26.1k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
875
16.6k
                    {
876
16.6k
                        {
877
                            /*Assuming that sao shift is uniform along x and y directions*/
878
16.6k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
879
0
                            {
880
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
881
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
882
0
                            }
883
16.6k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
884
16.6k
                            {
885
16.6k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
886
16.6k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
887
16.6k
                            }
888
16.6k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
889
16.6k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
890
891
16.6k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
892
16.6k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
893
894
16.6k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
895
16.6k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
896
897
16.6k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
898
16.6k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
899
16.6k
                        }
900
901
16.6k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
902
7.01k
                        {
903
                            /*Calculate slice indices for neighbor pixels*/
904
7.01k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
905
7.01k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
906
7.01k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
907
7.01k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
908
7.01k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
909
7.01k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
911
7.01k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
912
0
                            {
913
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
914
0
                                {
915
0
                                    au4_idx_tl[6] = -1;
916
0
                                    au4_idx_tl[4] = -1;
917
0
                                }
918
0
                                else
919
0
                                {
920
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
921
0
                                }
922
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
923
0
                                {
924
0
                                    au4_idx_tl[5] = -1;
925
0
                                    au4_idx_tl[4] = -1;
926
0
                                }
927
0
                                else
928
0
                                {
929
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
930
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
931
0
                                }
932
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
933
0
                            }
934
935
                            /* Verify that the neighbor ctbs dont cross pic boundary.
936
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
937
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
938
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
939
                             * the respective pixel's flags are checked
940
                             */
941
942
7.01k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
943
0
                            {
944
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
945
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
946
0
                            }
947
7.01k
                            else
948
7.01k
                            {
949
7.01k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
950
7.01k
                            }
951
7.01k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
952
0
                            {
953
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
954
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
955
0
                            }
956
7.01k
                            else
957
7.01k
                            {
958
7.01k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959
7.01k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960
7.01k
                            }
961
7.01k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
962
7.01k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963
7.01k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
964
7.01k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
965
7.01k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
966
967
7.01k
                            if(au4_idx_tl[5] > idx_tl)
968
795
                            {
969
795
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
970
795
                            }
971
972
                            /*
973
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
974
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
975
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
976
                             * the respective pixel's flags are checked
977
                             */
978
63.1k
                            for(i = 0; i < 8; i++)
979
56.1k
                            {
980
                                /*Sets the edges that lie on the slice/tile boundary*/
981
56.1k
                                if(au4_idx_tl[i] != idx_tl)
982
15.3k
                                {
983
15.3k
                                    au1_tile_slice_boundary[i] = 1;
984
15.3k
                                }
985
40.8k
                                else
986
40.8k
                                {
987
40.8k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
988
40.8k
                                }
989
56.1k
                            }
990
991
7.01k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
992
7.01k
                        }
993
994
16.6k
                        if(ps_pps->i1_tiles_enabled_flag)
995
9.74k
                        {
996
                            /* Calculate availability flags at slice boundary */
997
9.74k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
998
4.94k
                            {
999
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1000
4.94k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1001
4.51k
                                {
1002
                                    /*Set the boundary arrays*/
1003
                                    /*Calculate tile indices for neighbor pixels*/
1004
4.51k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1005
4.51k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1006
4.51k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1007
4.51k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1008
4.51k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1009
4.51k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
1011
4.51k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1012
0
                                    {
1013
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1014
0
                                        {
1015
0
                                            au4_idx_tl[6] = -1;
1016
0
                                            au4_idx_tl[4] = -1;
1017
0
                                        }
1018
0
                                        else
1019
0
                                        {
1020
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1021
0
                                        }
1022
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1023
0
                                        {
1024
0
                                            au4_idx_tl[5] = -1;
1025
0
                                            au4_idx_tl[4] = -1;
1026
0
                                        }
1027
0
                                        else
1028
0
                                        {
1029
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1030
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1031
0
                                        }
1032
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1033
0
                                    }
1034
40.6k
                                    for(i = 0; i < 8; i++)
1035
36.1k
                                    {
1036
                                        /*Sets the edges that lie on the tile boundary*/
1037
36.1k
                                        if(au4_idx_tl[i] != idx_tl)
1038
14.5k
                                        {
1039
14.5k
                                            au1_tile_slice_boundary[i] |= 1;
1040
14.5k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1041
14.5k
                                        }
1042
36.1k
                                    }
1043
4.51k
                                }
1044
4.94k
                            }
1045
9.74k
                        }
1046
1047
1048
                        /*Set availability flags based on tile and slice boundaries*/
1049
149k
                        for(i = 0; i < 8; i++)
1050
132k
                        {
1051
                            /*Sets the edges that lie on the slice/tile boundary*/
1052
132k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1053
14.6k
                            {
1054
14.6k
                                au1_avail_luma[i] = 0;
1055
14.6k
                            }
1056
132k
                        }
1057
16.6k
                    }
1058
26.1k
                }
1059
1060
26.1k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1061
0
                {
1062
0
                    au1_avail_luma[0] = 0;
1063
0
                    au1_avail_luma[4] = 0;
1064
0
                    au1_avail_luma[6] = 0;
1065
0
                }
1066
1067
26.1k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1068
0
                {
1069
0
                    au1_avail_luma[1] = 0;
1070
0
                    au1_avail_luma[5] = 0;
1071
0
                    au1_avail_luma[7] = 0;
1072
0
                }
1073
                //y==1 case
1074
26.1k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1075
0
                {
1076
0
                    au1_avail_luma[2] = 0;
1077
0
                    au1_avail_luma[4] = 0;
1078
0
                    au1_avail_luma[5] = 0;
1079
0
                }
1080
26.1k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1081
0
                {
1082
0
                    au1_avail_luma[3] = 0;
1083
0
                    au1_avail_luma[6] = 0;
1084
0
                    au1_avail_luma[7] = 0;
1085
0
                }
1086
1087
26.1k
                {
1088
26.1k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1089
26.1k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1090
26.1k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1091
26.1k
                                                                      src_strd,
1092
26.1k
                                                                      pu1_src_left_luma,
1093
26.1k
                                                                      pu1_src_top_luma,
1094
26.1k
                                                                      pu1_sao_src_luma_top_left_ctb,
1095
26.1k
                                                                      au1_src_top_right,
1096
26.1k
                                                                      &u1_sao_src_top_left_luma_bot_left,
1097
26.1k
                                                                      au1_avail_luma,
1098
26.1k
                                                                      ai1_offset_y,
1099
26.1k
                                                                      sao_wd_luma,
1100
26.1k
                                                                      sao_ht_luma);
1101
26.1k
                }
1102
26.1k
            }
1103
1104
1.48M
        }
1105
3.40M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1106
1.43M
        {
1107
            /* Update left, top and top-left */
1108
12.8M
            for(row = 0; row < sao_ht_luma; row++)
1109
11.4M
            {
1110
11.4M
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1111
11.4M
            }
1112
1.43M
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1113
1114
1.43M
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1115
1.43M
        }
1116
1117
4.88M
        if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1118
1.50M
        {
1119
1.50M
            if(0 == ps_sao->b3_cb_type_idx)
1120
1.47M
            {
1121
13.2M
                for(row = 0; row < sao_ht_chroma; row++)
1122
11.7M
                {
1123
11.7M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1124
11.7M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1125
11.7M
                }
1126
1.47M
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1127
1.47M
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1128
1129
1.47M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1130
1131
1.47M
            }
1132
1133
35.3k
            else if(1 == ps_sao->b3_cb_type_idx)
1134
16.4k
            {
1135
16.4k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1136
16.4k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1137
16.4k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1138
16.4k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1139
1140
16.4k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1141
16.4k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1142
16.4k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1143
16.4k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1144
1145
16.4k
                if(chroma_yuv420sp_vu)
1146
4.70k
                {
1147
4.70k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1148
4.70k
                                                                                src_strd,
1149
4.70k
                                                                                pu1_src_left_chroma,
1150
4.70k
                                                                                pu1_src_top_chroma,
1151
4.70k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1152
4.70k
                                                                                ps_sao->b5_cr_band_pos,
1153
4.70k
                                                                                ps_sao->b5_cb_band_pos,
1154
4.70k
                                                                                ai1_offset_cr,
1155
4.70k
                                                                                ai1_offset_cb,
1156
4.70k
                                                                                sao_wd_chroma,
1157
4.70k
                                                                                sao_ht_chroma
1158
4.70k
                                                                               );
1159
4.70k
                }
1160
11.7k
                else
1161
11.7k
                {
1162
11.7k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1163
11.7k
                                                                                src_strd,
1164
11.7k
                                                                                pu1_src_left_chroma,
1165
11.7k
                                                                                pu1_src_top_chroma,
1166
11.7k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1167
11.7k
                                                                                ps_sao->b5_cb_band_pos,
1168
11.7k
                                                                                ps_sao->b5_cr_band_pos,
1169
11.7k
                                                                                ai1_offset_cb,
1170
11.7k
                                                                                ai1_offset_cr,
1171
11.7k
                                                                                sao_wd_chroma,
1172
11.7k
                                                                                sao_ht_chroma
1173
11.7k
                                                                               );
1174
11.7k
                }
1175
16.4k
            }
1176
1177
18.8k
            else // if(2 <= ps_sao->b3_cb_type_idx)
1178
18.8k
            {
1179
18.8k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1180
18.8k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1181
18.8k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1182
18.8k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1183
1184
18.8k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1185
18.8k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1186
18.8k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1187
18.8k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1188
171k
                for(i = 0; i < 8; i++)
1189
152k
                {
1190
152k
                    au1_avail_chroma[i] = 255;
1191
152k
                    au1_tile_slice_boundary[i] = 0;
1192
152k
                    au4_idx_tl[i] = 0;
1193
152k
                    au4_ilf_across_tile_slice_enable[i] = 1;
1194
152k
                }
1195
                /*In case of slices*/
1196
18.8k
                {
1197
18.8k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1198
12.2k
                    {
1199
12.2k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1200
1.59k
                        {
1201
1.59k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1202
1.59k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1203
1.59k
                        }
1204
10.6k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1205
9.96k
                        {
1206
9.96k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1207
9.96k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1208
9.96k
                        }
1209
12.2k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1210
12.2k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1211
1212
12.2k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1213
12.2k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1214
1215
12.2k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1216
12.2k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1217
1218
12.2k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1219
12.2k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1220
1221
12.2k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1222
4.41k
                        {
1223
1224
4.41k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1225
4.41k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1226
4.41k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1227
4.41k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1228
4.41k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1229
4.41k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
1231
4.41k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1232
2.14k
                            {
1233
2.14k
                                if(ps_sao_ctxt->i4_ctb_x == 1)
1234
549
                                {
1235
549
                                    au4_idx_tl[6] = -1;
1236
549
                                    au4_idx_tl[4] = -1;
1237
549
                                }
1238
1.60k
                                else
1239
1.60k
                                {
1240
1.60k
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1241
1.60k
                                }
1242
2.14k
                                if(ps_sao_ctxt->i4_ctb_y == 1)
1243
8
                                {
1244
8
                                    au4_idx_tl[5] = -1;
1245
8
                                    au4_idx_tl[4] = -1;
1246
8
                                }
1247
2.14k
                                else
1248
2.14k
                                {
1249
2.14k
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1250
2.14k
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1251
2.14k
                                }
1252
2.14k
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1253
2.14k
                            }
1254
1255
                            /* Verify that the neighbor ctbs don't cross pic boundary
1256
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1257
4.41k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1258
549
                            {
1259
549
                                au4_ilf_across_tile_slice_enable[4] = 0;
1260
549
                                au4_ilf_across_tile_slice_enable[6] = 0;
1261
549
                            }
1262
3.86k
                            else
1263
3.86k
                            {
1264
3.86k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1265
3.86k
                            }
1266
4.41k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1267
8
                            {
1268
8
                                au4_ilf_across_tile_slice_enable[5] = 0;
1269
8
                                au4_ilf_across_tile_slice_enable[4] = 0;
1270
8
                            }
1271
4.40k
                            else
1272
4.40k
                            {
1273
4.40k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1274
4.40k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1275
4.40k
                            }
1276
4.41k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1277
4.41k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278
4.41k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1279
4.41k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1280
4.41k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1281
                            /*
1282
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1283
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1284
                             */
1285
39.6k
                            for(i = 0; i < 8; i++)
1286
35.2k
                            {
1287
                                /*Sets the edges that lie on the slice/tile boundary*/
1288
35.2k
                                if(au4_idx_tl[i] != idx_tl)
1289
6.98k
                                {
1290
6.98k
                                    au1_tile_slice_boundary[i] = 1;
1291
6.98k
                                }
1292
28.2k
                                else
1293
28.2k
                                {
1294
28.2k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
1295
28.2k
                                }
1296
35.2k
                            }
1297
1298
                            /*Reset indices*/
1299
39.6k
                            for(i = 0; i < 8; i++)
1300
35.2k
                            {
1301
35.2k
                                au4_idx_tl[i] = 0;
1302
35.2k
                            }
1303
4.41k
                        }
1304
12.2k
                        if(ps_pps->i1_tiles_enabled_flag)
1305
7.89k
                        {
1306
                            /* Calculate availability flags at slice boundary */
1307
7.89k
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1308
2.97k
                            {
1309
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1310
2.97k
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1311
2.68k
                                {
1312
                                    /*Set the boundary arrays*/
1313
                                    /*Calculate tile indices for neighbor pixels*/
1314
2.68k
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1315
2.68k
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1316
2.68k
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1317
2.68k
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1318
2.68k
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1319
2.68k
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
1321
2.68k
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1322
0
                                    {
1323
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1324
0
                                        {
1325
0
                                            au4_idx_tl[6] = -1;
1326
0
                                            au4_idx_tl[4] = -1;
1327
0
                                        }
1328
0
                                        else
1329
0
                                        {
1330
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1331
0
                                        }
1332
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1333
0
                                        {
1334
0
                                            au4_idx_tl[5] = -1;
1335
0
                                            au4_idx_tl[4] = -1;
1336
0
                                        }
1337
0
                                        else
1338
0
                                        {
1339
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1340
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1341
0
                                        }
1342
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1343
0
                                    }
1344
24.1k
                                    for(i = 0; i < 8; i++)
1345
21.4k
                                    {
1346
                                        /*Sets the edges that lie on the tile boundary*/
1347
21.4k
                                        if(au4_idx_tl[i] != idx_tl)
1348
8.30k
                                        {
1349
8.30k
                                            au1_tile_slice_boundary[i] |= 1;
1350
8.30k
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1351
8.30k
                                        }
1352
21.4k
                                    }
1353
2.68k
                                }
1354
2.97k
                            }
1355
7.89k
                        }
1356
1357
109k
                        for(i = 0; i < 8; i++)
1358
97.6k
                        {
1359
                            /*Sets the edges that lie on the slice/tile boundary*/
1360
97.6k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1361
8.97k
                            {
1362
8.97k
                                au1_avail_chroma[i] = 0;
1363
8.97k
                            }
1364
97.6k
                        }
1365
12.2k
                    }
1366
18.8k
                }
1367
1368
18.8k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1369
601
                {
1370
601
                    au1_avail_chroma[0] = 0;
1371
601
                    au1_avail_chroma[4] = 0;
1372
601
                    au1_avail_chroma[6] = 0;
1373
601
                }
1374
18.8k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1375
0
                {
1376
0
                    au1_avail_chroma[1] = 0;
1377
0
                    au1_avail_chroma[5] = 0;
1378
0
                    au1_avail_chroma[7] = 0;
1379
0
                }
1380
1381
18.8k
                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1382
194
                {
1383
194
                    au1_avail_chroma[2] = 0;
1384
194
                    au1_avail_chroma[4] = 0;
1385
194
                    au1_avail_chroma[5] = 0;
1386
194
                }
1387
18.8k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1388
0
                {
1389
0
                    au1_avail_chroma[3] = 0;
1390
0
                    au1_avail_chroma[6] = 0;
1391
0
                    au1_avail_chroma[7] = 0;
1392
0
                }
1393
1394
18.8k
                {
1395
18.8k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1396
18.8k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1397
18.8k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1398
18.8k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1399
18.8k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1400
2.16k
                    {
1401
2.16k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1402
2.16k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1403
2.16k
                    }
1404
1405
18.8k
                    if(chroma_yuv420sp_vu)
1406
4.81k
                    {
1407
4.81k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1408
4.81k
                                                                             src_strd,
1409
4.81k
                                                                             pu1_src_left_chroma,
1410
4.81k
                                                                             pu1_src_top_chroma,
1411
4.81k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1412
4.81k
                                                                             au1_src_top_right,
1413
4.81k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1414
4.81k
                                                                             au1_avail_chroma,
1415
4.81k
                                                                             ai1_offset_cr,
1416
4.81k
                                                                             ai1_offset_cb,
1417
4.81k
                                                                             sao_wd_chroma,
1418
4.81k
                                                                             sao_ht_chroma);
1419
4.81k
                    }
1420
14.0k
                    else
1421
14.0k
                    {
1422
14.0k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1423
14.0k
                                                                             src_strd,
1424
14.0k
                                                                             pu1_src_left_chroma,
1425
14.0k
                                                                             pu1_src_top_chroma,
1426
14.0k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1427
14.0k
                                                                             au1_src_top_right,
1428
14.0k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1429
14.0k
                                                                             au1_avail_chroma,
1430
14.0k
                                                                             ai1_offset_cb,
1431
14.0k
                                                                             ai1_offset_cr,
1432
14.0k
                                                                             sao_wd_chroma,
1433
14.0k
                                                                             sao_ht_chroma);
1434
14.0k
                    }
1435
18.8k
                }
1436
18.8k
            }
1437
1.50M
        }
1438
3.38M
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1439
1.42M
        {
1440
12.8M
            for(row = 0; row < sao_ht_chroma; row++)
1441
11.4M
            {
1442
11.4M
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1443
11.4M
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1444
11.4M
            }
1445
1.42M
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1446
1.42M
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1447
1448
1.42M
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1449
1.42M
        }
1450
1451
4.88M
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1452
4.88M
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1453
4.88M
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1454
4.88M
    }
1455
1456
1457
    /* Top CTB */
1458
5.34M
    if((ps_sao_ctxt->i4_ctb_y > 0))
1459
5.01M
    {
1460
5.01M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1461
5.01M
        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1462
5.01M
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1463
5.01M
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1464
1465
5.01M
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1466
5.01M
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1467
5.01M
        WORD32 au4_idx_t[8], idx_t;
1468
1469
5.01M
        WORD32 remaining_cols;
1470
1471
5.01M
        slice_header_t *ps_slice_hdr_top;
1472
5.01M
        {
1473
5.01M
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1474
5.01M
                                        (ps_sao_ctxt->i4_ctb_x);
1475
5.01M
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1476
5.01M
        }
1477
1478
5.01M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1479
5.01M
        if(remaining_cols <= SAO_SHIFT_CTB)
1480
132k
        {
1481
132k
            sao_wd_luma += remaining_cols;
1482
132k
        }
1483
5.01M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1484
5.01M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1485
132k
        {
1486
132k
            sao_wd_chroma += remaining_cols;
1487
132k
        }
1488
1489
5.01M
        pu1_src_luma -= (sao_ht_luma * src_strd);
1490
5.01M
        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1491
5.01M
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1492
5.01M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1493
5.01M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494
5.01M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1495
5.01M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1496
1497
5.01M
        if(0 != sao_wd_luma)
1498
5.00M
        {
1499
5.00M
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1500
1.51M
            {
1501
1.51M
                if(0 == ps_sao->b3_y_type_idx)
1502
1.45M
                {
1503
                    /* Update left, top and top-left */
1504
13.0M
                    for(row = 0; row < sao_ht_luma; row++)
1505
11.6M
                    {
1506
11.6M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1507
11.6M
                    }
1508
1.45M
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1509
1510
1.45M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1511
1512
1.45M
                }
1513
1514
59.6k
                else if(1 == ps_sao->b3_y_type_idx)
1515
30.2k
                {
1516
30.2k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1517
30.2k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1518
30.2k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1519
30.2k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1520
1521
30.2k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1522
30.2k
                                                                              src_strd,
1523
30.2k
                                                                              pu1_src_left_luma,
1524
30.2k
                                                                              pu1_src_top_luma,
1525
30.2k
                                                                              pu1_sao_src_luma_top_left_ctb,
1526
30.2k
                                                                              ps_sao->b5_y_band_pos,
1527
30.2k
                                                                              ai1_offset_y,
1528
30.2k
                                                                              sao_wd_luma,
1529
30.2k
                                                                              sao_ht_luma
1530
30.2k
                                                                             );
1531
30.2k
                }
1532
1533
29.3k
                else // if(2 <= ps_sao->b3_y_type_idx)
1534
29.3k
                {
1535
29.3k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1536
29.3k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1537
29.3k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1538
29.3k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1539
1540
29.3k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1541
29.3k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1542
29.3k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1543
1544
265k
                    for(i = 0; i < 8; i++)
1545
235k
                    {
1546
1547
235k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1548
235k
                    }
1549
                    /******************************************************************
1550
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1551
                     *
1552
                     *               T_T
1553
                     *          ____________
1554
                     *         |    |       |
1555
                     *         | T_L|  T    |T_R
1556
                     *         |    | ______|____
1557
                     *         |    |  T_D  |    |
1558
                     *         |    |       |    |
1559
                     *         |____|_______|    |
1560
                     *              |            |
1561
                     *              |            |
1562
                     *              |____________|
1563
                     *
1564
                     *****************************************************************/
1565
1566
                    /*In case of slices*/
1567
29.3k
                    {
1568
29.3k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1569
18.3k
                        {
1570
1571
18.3k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1572
18.3k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1573
1574
18.3k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1575
18.3k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1576
1577
18.3k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1578
18.3k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1579
1580
18.3k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1581
18.3k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1582
1583
18.3k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1584
18.3k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1585
1586
18.3k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1587
7.56k
                            {
1588
                                /*Calculate neighbor ctb slice indices*/
1589
7.56k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1590
894
                                {
1591
894
                                    au4_idx_t[0] = -1;
1592
894
                                    au4_idx_t[6] = -1;
1593
894
                                    au4_idx_t[4] = -1;
1594
894
                                }
1595
6.66k
                                else
1596
6.66k
                                {
1597
6.66k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1598
6.66k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1599
6.66k
                                }
1600
7.56k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1601
7.56k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1602
7.56k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1603
7.56k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604
1605
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1606
7.56k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1607
894
                                {
1608
894
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1609
894
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1610
894
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1611
894
                                }
1612
6.66k
                                else
1613
6.66k
                                {
1614
6.66k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1615
6.66k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1616
6.66k
                                }
1617
1618
1619
1620
7.56k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621
7.56k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622
7.56k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1623
7.56k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1624
7.56k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
1626
7.56k
                                if(au4_idx_t[6] < idx_t)
1627
894
                                {
1628
894
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1629
894
                                }
1630
1631
                                /*
1632
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1633
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1634
                                 */
1635
1636
68.0k
                                for(i = 0; i < 8; i++)
1637
60.4k
                                {
1638
                                    /*Sets the edges that lie on the slice/tile boundary*/
1639
60.4k
                                    if(au4_idx_t[i] != idx_t)
1640
17.7k
                                    {
1641
17.7k
                                        au1_tile_slice_boundary[i] = 1;
1642
                                        /*Check for slice flag at such boundaries*/
1643
17.7k
                                    }
1644
42.6k
                                    else
1645
42.6k
                                    {
1646
42.6k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1647
42.6k
                                    }
1648
60.4k
                                }
1649
                                /*Reset indices*/
1650
68.0k
                                for(i = 0; i < 8; i++)
1651
60.4k
                                {
1652
60.4k
                                    au4_idx_t[i] = 0;
1653
60.4k
                                }
1654
7.56k
                            }
1655
1656
18.3k
                            if(ps_pps->i1_tiles_enabled_flag)
1657
10.8k
                            {
1658
                                /* Calculate availability flags at slice boundary */
1659
10.8k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1660
5.86k
                                {
1661
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1662
5.86k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1663
5.47k
                                    {
1664
                                        /*Calculate neighbor ctb slice indices*/
1665
5.47k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1666
1.04k
                                        {
1667
1.04k
                                            au4_idx_t[0] = -1;
1668
1.04k
                                            au4_idx_t[6] = -1;
1669
1.04k
                                            au4_idx_t[4] = -1;
1670
1.04k
                                        }
1671
4.42k
                                        else
1672
4.42k
                                        {
1673
4.42k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1674
4.42k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1675
4.42k
                                        }
1676
5.47k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1677
5.47k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1678
5.47k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1679
5.47k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680
1681
49.2k
                                        for(i = 0; i < 8; i++)
1682
43.8k
                                        {
1683
                                            /*Sets the edges that lie on the tile boundary*/
1684
43.8k
                                            if(au4_idx_t[i] != idx_t)
1685
18.9k
                                            {
1686
18.9k
                                                au1_tile_slice_boundary[i] |= 1;
1687
18.9k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1688
18.9k
                                            }
1689
43.8k
                                        }
1690
5.47k
                                    }
1691
5.86k
                                }
1692
10.8k
                            }
1693
1694
165k
                            for(i = 0; i < 8; i++)
1695
146k
                            {
1696
                                /*Sets the edges that lie on the slice/tile boundary*/
1697
146k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1698
20.7k
                                {
1699
20.7k
                                    au1_avail_luma[i] = 0;
1700
20.7k
                                }
1701
146k
                            }
1702
18.3k
                        }
1703
29.3k
                    }
1704
1705
1706
29.3k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1707
3.92k
                    {
1708
3.92k
                        au1_avail_luma[0] = 0;
1709
3.92k
                        au1_avail_luma[4] = 0;
1710
3.92k
                        au1_avail_luma[6] = 0;
1711
3.92k
                    }
1712
1713
29.3k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1714
3.17k
                    {
1715
3.17k
                        au1_avail_luma[1] = 0;
1716
3.17k
                        au1_avail_luma[5] = 0;
1717
3.17k
                        au1_avail_luma[7] = 0;
1718
3.17k
                    }
1719
1720
29.3k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1721
0
                    {
1722
0
                        au1_avail_luma[2] = 0;
1723
0
                        au1_avail_luma[4] = 0;
1724
0
                        au1_avail_luma[5] = 0;
1725
0
                    }
1726
1727
29.3k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1728
0
                    {
1729
0
                        au1_avail_luma[3] = 0;
1730
0
                        au1_avail_luma[6] = 0;
1731
0
                        au1_avail_luma[7] = 0;
1732
0
                    }
1733
1734
29.3k
                    {
1735
29.3k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1736
29.3k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1737
29.3k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1738
29.3k
                                                                          src_strd,
1739
29.3k
                                                                          pu1_src_left_luma,
1740
29.3k
                                                                          pu1_src_top_luma,
1741
29.3k
                                                                          pu1_sao_src_luma_top_left_ctb,
1742
29.3k
                                                                          au1_src_top_right,
1743
29.3k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1744
29.3k
                                                                          au1_avail_luma,
1745
29.3k
                                                                          ai1_offset_y,
1746
29.3k
                                                                          sao_wd_luma,
1747
29.3k
                                                                          sao_ht_luma);
1748
29.3k
                    }
1749
29.3k
                }
1750
1.51M
            }
1751
3.49M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1752
1.45M
            {
1753
                /* Update left, top and top-left */
1754
13.0M
                for(row = 0; row < sao_ht_luma; row++)
1755
11.6M
                {
1756
11.6M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1757
11.6M
                }
1758
1.45M
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1759
1760
1.45M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1761
1.45M
            }
1762
5.00M
        }
1763
1764
5.01M
        if(0 != sao_wd_chroma)
1765
2.76M
        {
1766
2.76M
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1767
242k
            {
1768
242k
                if(0 == ps_sao->b3_cb_type_idx)
1769
207k
                {
1770
1771
1.86M
                    for(row = 0; row < sao_ht_chroma; row++)
1772
1.65M
                    {
1773
1.65M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1774
1.65M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1775
1.65M
                    }
1776
207k
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1777
207k
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1778
1779
207k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1780
1781
207k
                }
1782
1783
35.6k
                else if(1 == ps_sao->b3_cb_type_idx)
1784
16.4k
                {
1785
16.4k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1786
16.4k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1787
16.4k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1788
16.4k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1789
1790
16.4k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1791
16.4k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1792
16.4k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1793
16.4k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1794
1795
16.4k
                    if(chroma_yuv420sp_vu)
1796
4.68k
                    {
1797
4.68k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1798
4.68k
                                                                                    src_strd,
1799
4.68k
                                                                                    pu1_src_left_chroma,
1800
4.68k
                                                                                    pu1_src_top_chroma,
1801
4.68k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1802
4.68k
                                                                                    ps_sao->b5_cr_band_pos,
1803
4.68k
                                                                                    ps_sao->b5_cb_band_pos,
1804
4.68k
                                                                                    ai1_offset_cr,
1805
4.68k
                                                                                    ai1_offset_cb,
1806
4.68k
                                                                                    sao_wd_chroma,
1807
4.68k
                                                                                    sao_ht_chroma
1808
4.68k
                                                                                   );
1809
4.68k
                    }
1810
11.7k
                    else
1811
11.7k
                    {
1812
11.7k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1813
11.7k
                                                                                    src_strd,
1814
11.7k
                                                                                    pu1_src_left_chroma,
1815
11.7k
                                                                                    pu1_src_top_chroma,
1816
11.7k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1817
11.7k
                                                                                    ps_sao->b5_cb_band_pos,
1818
11.7k
                                                                                    ps_sao->b5_cr_band_pos,
1819
11.7k
                                                                                    ai1_offset_cb,
1820
11.7k
                                                                                    ai1_offset_cr,
1821
11.7k
                                                                                    sao_wd_chroma,
1822
11.7k
                                                                                    sao_ht_chroma
1823
11.7k
                                                                                   );
1824
11.7k
                    }
1825
16.4k
                }
1826
19.1k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1827
19.1k
                {
1828
19.1k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1829
19.1k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1830
19.1k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1831
19.1k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1832
1833
19.1k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1834
19.1k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1835
19.1k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1836
19.1k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1837
1838
173k
                    for(i = 0; i < 8; i++)
1839
154k
                    {
1840
154k
                        au1_avail_chroma[i] = 255;
1841
154k
                        au1_tile_slice_boundary[i] = 0;
1842
154k
                        au4_idx_t[i] = 0;
1843
154k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1844
154k
                    }
1845
1846
19.1k
                    {
1847
19.1k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1848
11.6k
                        {
1849
11.6k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1850
11.6k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1851
1852
11.6k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1853
11.6k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1854
1855
11.6k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1856
11.6k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1857
1858
11.6k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1859
11.6k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1860
1861
11.6k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1862
11.6k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1863
1864
11.6k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1865
2.76k
                            {
1866
2.76k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1867
272
                                {
1868
272
                                    au4_idx_t[0] = -1;
1869
272
                                    au4_idx_t[6] = -1;
1870
272
                                    au4_idx_t[4] = -1;
1871
272
                                }
1872
2.49k
                                else
1873
2.49k
                                {
1874
2.49k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1875
2.49k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1876
2.49k
                                }
1877
2.76k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1878
2.76k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1879
2.76k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1880
2.76k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881
1882
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1883
1884
2.76k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1885
272
                                {
1886
272
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1887
272
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1888
272
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1889
272
                                }
1890
2.49k
                                else
1891
2.49k
                                {
1892
2.49k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1893
2.49k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1894
2.49k
                                }
1895
1896
2.76k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1897
2.76k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898
2.76k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1899
2.76k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1900
2.76k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
1902
2.76k
                                if(idx_t > au4_idx_t[6])
1903
272
                                {
1904
272
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1905
272
                                }
1906
1907
                                /*
1908
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1909
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1910
                                 */
1911
24.8k
                                for(i = 0; i < 8; i++)
1912
22.0k
                                {
1913
                                    /*Sets the edges that lie on the slice/tile boundary*/
1914
22.0k
                                    if(au4_idx_t[i] != idx_t)
1915
6.18k
                                    {
1916
6.18k
                                        au1_tile_slice_boundary[i] = 1;
1917
6.18k
                                    }
1918
15.9k
                                    else
1919
15.9k
                                    {
1920
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1921
15.9k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1922
15.9k
                                    }
1923
22.0k
                                }
1924
                                /*Reset indices*/
1925
24.8k
                                for(i = 0; i < 8; i++)
1926
22.0k
                                {
1927
22.0k
                                    au4_idx_t[i] = 0;
1928
22.0k
                                }
1929
2.76k
                            }
1930
11.6k
                            if(ps_pps->i1_tiles_enabled_flag)
1931
8.90k
                            {
1932
                                /* Calculate availability flags at slice boundary */
1933
8.90k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1934
3.35k
                                {
1935
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1936
3.35k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1937
3.16k
                                    {
1938
                                        /*Calculate neighbor ctb slice indices*/
1939
3.16k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1940
899
                                        {
1941
899
                                            au4_idx_t[0] = -1;
1942
899
                                            au4_idx_t[6] = -1;
1943
899
                                            au4_idx_t[4] = -1;
1944
899
                                        }
1945
2.26k
                                        else
1946
2.26k
                                        {
1947
2.26k
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1948
2.26k
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1949
2.26k
                                        }
1950
3.16k
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1951
3.16k
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1952
3.16k
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1953
3.16k
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954
1955
28.4k
                                        for(i = 0; i < 8; i++)
1956
25.3k
                                        {
1957
                                            /*Sets the edges that lie on the tile boundary*/
1958
25.3k
                                            if(au4_idx_t[i] != idx_t)
1959
10.5k
                                            {
1960
10.5k
                                                au1_tile_slice_boundary[i] |= 1;
1961
10.5k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1962
10.5k
                                            }
1963
25.3k
                                        }
1964
3.16k
                                    }
1965
3.35k
                                }
1966
8.90k
                            }
1967
104k
                            for(i = 0; i < 8; i++)
1968
93.0k
                            {
1969
                                /*Sets the edges that lie on the slice/tile boundary*/
1970
93.0k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1971
11.1k
                                {
1972
11.1k
                                    au1_avail_chroma[i] = 0;
1973
11.1k
                                }
1974
93.0k
                            }
1975
1976
11.6k
                        }
1977
19.1k
                    }
1978
19.1k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1979
2.85k
                    {
1980
2.85k
                        au1_avail_chroma[0] = 0;
1981
2.85k
                        au1_avail_chroma[4] = 0;
1982
2.85k
                        au1_avail_chroma[6] = 0;
1983
2.85k
                    }
1984
1985
19.1k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1986
2.69k
                    {
1987
2.69k
                        au1_avail_chroma[1] = 0;
1988
2.69k
                        au1_avail_chroma[5] = 0;
1989
2.69k
                        au1_avail_chroma[7] = 0;
1990
2.69k
                    }
1991
1992
19.1k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1993
18
                    {
1994
18
                        au1_avail_chroma[2] = 0;
1995
18
                        au1_avail_chroma[4] = 0;
1996
18
                        au1_avail_chroma[5] = 0;
1997
18
                    }
1998
1999
19.1k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2000
0
                    {
2001
0
                        au1_avail_chroma[3] = 0;
2002
0
                        au1_avail_chroma[6] = 0;
2003
0
                        au1_avail_chroma[7] = 0;
2004
0
                    }
2005
2006
19.1k
                    {
2007
19.1k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2008
19.1k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2009
19.1k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2010
19.1k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2011
2012
19.1k
                        if(chroma_yuv420sp_vu)
2013
5.28k
                        {
2014
5.28k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2015
5.28k
                                                                                 src_strd,
2016
5.28k
                                                                                 pu1_src_left_chroma,
2017
5.28k
                                                                                 pu1_src_top_chroma,
2018
5.28k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2019
5.28k
                                                                                 au1_src_top_right,
2020
5.28k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2021
5.28k
                                                                                 au1_avail_chroma,
2022
5.28k
                                                                                 ai1_offset_cr,
2023
5.28k
                                                                                 ai1_offset_cb,
2024
5.28k
                                                                                 sao_wd_chroma,
2025
5.28k
                                                                                 sao_ht_chroma);
2026
5.28k
                        }
2027
13.9k
                        else
2028
13.9k
                        {
2029
13.9k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2030
13.9k
                                                                                 src_strd,
2031
13.9k
                                                                                 pu1_src_left_chroma,
2032
13.9k
                                                                                 pu1_src_top_chroma,
2033
13.9k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2034
13.9k
                                                                                 au1_src_top_right,
2035
13.9k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2036
13.9k
                                                                                 au1_avail_chroma,
2037
13.9k
                                                                                 ai1_offset_cb,
2038
13.9k
                                                                                 ai1_offset_cr,
2039
13.9k
                                                                                 sao_wd_chroma,
2040
13.9k
                                                                                 sao_ht_chroma);
2041
13.9k
                        }
2042
19.1k
                    }
2043
2044
19.1k
                }
2045
242k
            }
2046
2.51M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2047
507k
            {
2048
4.56M
                for(row = 0; row < sao_ht_chroma; row++)
2049
4.05M
                {
2050
4.05M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2051
4.05M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2052
4.05M
                }
2053
507k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2054
507k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2055
2056
507k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2057
507k
            }
2058
2.76M
        }
2059
2060
5.01M
        pu1_src_luma += sao_ht_luma * src_strd;
2061
5.01M
        pu1_src_chroma += sao_ht_chroma * src_strd;
2062
5.01M
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2063
5.01M
    }
2064
2065
    /* Left CTB */
2066
5.34M
    if(ps_sao_ctxt->i4_ctb_x > 0)
2067
5.17M
    {
2068
5.17M
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2069
5.17M
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2070
5.17M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2071
5.17M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2072
2073
5.17M
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2074
5.17M
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2075
5.17M
        WORD32 au4_idx_l[8], idx_l;
2076
2077
5.17M
        WORD32 remaining_rows;
2078
5.17M
        slice_header_t *ps_slice_hdr_left;
2079
5.17M
        {
2080
5.17M
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2081
5.17M
                                        (ps_sao_ctxt->i4_ctb_x - 1);
2082
5.17M
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2083
5.17M
        }
2084
2085
5.17M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2086
5.17M
        if(remaining_rows <= SAO_SHIFT_CTB)
2087
306k
        {
2088
306k
            sao_ht_luma += remaining_rows;
2089
306k
        }
2090
5.17M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2091
5.17M
        if(remaining_rows <= SAO_SHIFT_CTB)
2092
306k
        {
2093
306k
            sao_ht_chroma += remaining_rows;
2094
306k
        }
2095
2096
5.17M
        pu1_src_luma -= sao_wd_luma;
2097
5.17M
        pu1_src_chroma -= sao_wd_chroma;
2098
5.17M
        ps_sao -= 1;
2099
5.17M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2100
5.17M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2101
5.17M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2102
5.17M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
2104
2105
5.17M
        if(0 != sao_ht_luma)
2106
5.17M
        {
2107
5.17M
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2108
1.51M
            {
2109
1.51M
                if(0 == ps_sao->b3_y_type_idx)
2110
1.45M
                {
2111
                    /* Update left, top and top-left */
2112
19.1M
                    for(row = 0; row < sao_ht_luma; row++)
2113
17.6M
                    {
2114
17.6M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2115
17.6M
                    }
2116
                    /*Update in next location*/
2117
1.45M
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2118
2119
1.45M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2120
2121
1.45M
                }
2122
2123
59.8k
                else if(1 == ps_sao->b3_y_type_idx)
2124
30.8k
                {
2125
30.8k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2126
30.8k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2127
30.8k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2128
30.8k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2129
2130
30.8k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2131
30.8k
                                                                              src_strd,
2132
30.8k
                                                                              pu1_src_left_luma,
2133
30.8k
                                                                              pu1_src_top_luma,
2134
30.8k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2135
30.8k
                                                                              ps_sao->b5_y_band_pos,
2136
30.8k
                                                                              ai1_offset_y,
2137
30.8k
                                                                              sao_wd_luma,
2138
30.8k
                                                                              sao_ht_luma
2139
30.8k
                                                                             );
2140
30.8k
                }
2141
2142
28.9k
                else // if(2 <= ps_sao->b3_y_type_idx)
2143
28.9k
                {
2144
28.9k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2145
28.9k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2146
28.9k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2147
28.9k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2148
2149
260k
                    for(i = 0; i < 8; i++)
2150
231k
                    {
2151
231k
                        au1_avail_luma[i] = 255;
2152
231k
                        au1_tile_slice_boundary[i] = 0;
2153
231k
                        au4_idx_l[i] = 0;
2154
231k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2155
231k
                    }
2156
                    /******************************************************************
2157
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2158
                     *
2159
                     *
2160
                     *          ____________
2161
                     *         |    |       |
2162
                     *         | L_T|       |
2163
                     *         |____|_______|____
2164
                     *         |    |       |    |
2165
                     *     L_L |  L |  L_R  |    |
2166
                     *         |____|_______|    |
2167
                     *              |            |
2168
                     *          L_D |            |
2169
                     *              |____________|
2170
                     *
2171
                     *****************************************************************/
2172
2173
                    /*In case of slices or tiles*/
2174
28.9k
                    {
2175
28.9k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2176
17.0k
                        {
2177
17.0k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2178
17.0k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2179
2180
17.0k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2181
17.0k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2182
2183
17.0k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2184
17.0k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2185
2186
17.0k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2187
17.0k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2188
2189
17.0k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2190
17.0k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2191
2192
17.0k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2193
6.56k
                            {
2194
6.56k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2195
347
                                {
2196
347
                                    au4_idx_l[2] = -1;
2197
347
                                    au4_idx_l[4] = -1;
2198
347
                                    au4_idx_l[5] = -1;
2199
347
                                }
2200
6.22k
                                else
2201
6.22k
                                {
2202
6.22k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2203
6.22k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2204
6.22k
                                }
2205
6.56k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2206
6.56k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2207
6.56k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2208
6.56k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2209
2210
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2211
6.56k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2212
347
                                {
2213
347
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2214
347
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2215
347
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2216
347
                                }
2217
6.22k
                                else
2218
6.22k
                                {
2219
6.22k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2220
6.22k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2221
2222
6.22k
                                }
2223
                                //TODO: ILF flag checks for [0] and [6] is missing.
2224
6.56k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2225
6.56k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2226
6.56k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
2228
6.56k
                                if(idx_l < au4_idx_l[5])
2229
60
                                {
2230
60
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2231
60
                                }
2232
2233
                                /*
2234
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2235
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2236
                                 */
2237
59.1k
                                for(i = 0; i < 8; i++)
2238
52.5k
                                {
2239
                                    /*Sets the edges that lie on the slice/tile boundary*/
2240
52.5k
                                    if(au4_idx_l[i] != idx_l)
2241
15.9k
                                    {
2242
15.9k
                                        au1_tile_slice_boundary[i] = 1;
2243
15.9k
                                    }
2244
36.5k
                                    else
2245
36.5k
                                    {
2246
36.5k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2247
36.5k
                                    }
2248
52.5k
                                }
2249
                                /*Reset indices*/
2250
59.1k
                                for(i = 0; i < 8; i++)
2251
52.5k
                                {
2252
52.5k
                                    au4_idx_l[i] = 0;
2253
52.5k
                                }
2254
6.56k
                            }
2255
2256
17.0k
                            if(ps_pps->i1_tiles_enabled_flag)
2257
10.5k
                            {
2258
                                /* Calculate availability flags at slice boundary */
2259
10.5k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2260
4.45k
                                {
2261
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2262
4.45k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2263
3.98k
                                    {
2264
3.98k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2265
836
                                        {
2266
836
                                            au4_idx_l[2] = -1;
2267
836
                                            au4_idx_l[4] = -1;
2268
836
                                            au4_idx_l[5] = -1;
2269
836
                                        }
2270
3.14k
                                        else
2271
3.14k
                                        {
2272
3.14k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2273
3.14k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2274
3.14k
                                        }
2275
2276
3.98k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2277
3.98k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2278
3.98k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2279
3.98k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2280
2281
35.8k
                                        for(i = 0; i < 8; i++)
2282
31.8k
                                        {
2283
                                            /*Sets the edges that lie on the slice/tile boundary*/
2284
31.8k
                                            if(au4_idx_l[i] != idx_l)
2285
13.0k
                                            {
2286
13.0k
                                                au1_tile_slice_boundary[i] |= 1;
2287
13.0k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2288
13.0k
                                            }
2289
31.8k
                                        }
2290
3.98k
                                    }
2291
4.45k
                                }
2292
10.5k
                            }
2293
2294
152k
                            for(i = 0; i < 8; i++)
2295
135k
                            {
2296
                                /*Sets the edges that lie on the slice/tile boundary*/
2297
135k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2298
14.1k
                                {
2299
14.1k
                                    au1_avail_luma[i] = 0;
2300
14.1k
                                }
2301
135k
                            }
2302
17.0k
                        }
2303
28.9k
                    }
2304
28.9k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2305
0
                    {
2306
0
                        au1_avail_luma[0] = 0;
2307
0
                        au1_avail_luma[4] = 0;
2308
0
                        au1_avail_luma[6] = 0;
2309
0
                    }
2310
28.9k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2311
0
                    {
2312
0
                        au1_avail_luma[1] = 0;
2313
0
                        au1_avail_luma[5] = 0;
2314
0
                        au1_avail_luma[7] = 0;
2315
0
                    }
2316
2317
28.9k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2318
6.83k
                    {
2319
6.83k
                        au1_avail_luma[2] = 0;
2320
6.83k
                        au1_avail_luma[4] = 0;
2321
6.83k
                        au1_avail_luma[5] = 0;
2322
6.83k
                    }
2323
2324
28.9k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2325
2.65k
                    {
2326
2.65k
                        au1_avail_luma[3] = 0;
2327
2.65k
                        au1_avail_luma[6] = 0;
2328
2.65k
                        au1_avail_luma[7] = 0;
2329
2.65k
                    }
2330
2331
28.9k
                    {
2332
28.9k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2333
28.9k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2334
28.9k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2335
28.9k
                                                                          src_strd,
2336
28.9k
                                                                          pu1_src_left_luma,
2337
28.9k
                                                                          pu1_src_top_luma,
2338
28.9k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2339
28.9k
                                                                          au1_src_top_right,
2340
28.9k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2341
28.9k
                                                                          au1_avail_luma,
2342
28.9k
                                                                          ai1_offset_y,
2343
28.9k
                                                                          sao_wd_luma,
2344
28.9k
                                                                          sao_ht_luma);
2345
28.9k
                    }
2346
2347
28.9k
                }
2348
1.51M
            }
2349
3.66M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2350
1.46M
            {
2351
                /* Update left, top and top-left */
2352
24.1M
                for(row = 0; row < sao_ht_luma; row++)
2353
22.6M
                {
2354
22.6M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2355
22.6M
                }
2356
                /*Update in next location*/
2357
1.46M
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2358
2359
1.46M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2360
1.46M
            }
2361
5.17M
        }
2362
2363
5.17M
        if(0 != sao_ht_chroma)
2364
2.93M
        {
2365
2.93M
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2366
251k
            {
2367
251k
                if(0 == ps_sao->b3_cb_type_idx)
2368
215k
                {
2369
4.15M
                    for(row = 0; row < sao_ht_chroma; row++)
2370
3.93M
                    {
2371
3.93M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2372
3.93M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2373
3.93M
                    }
2374
215k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2375
215k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2376
2377
215k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2378
215k
                }
2379
2380
35.3k
                else if(1 == ps_sao->b3_cb_type_idx)
2381
16.5k
                {
2382
16.5k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2383
16.5k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2384
16.5k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2385
16.5k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2386
2387
16.5k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2388
16.5k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2389
16.5k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2390
16.5k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2391
2392
16.5k
                    if(chroma_yuv420sp_vu)
2393
4.99k
                    {
2394
4.99k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2395
4.99k
                                                                                    src_strd,
2396
4.99k
                                                                                    pu1_src_left_chroma,
2397
4.99k
                                                                                    pu1_src_top_chroma,
2398
4.99k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2399
4.99k
                                                                                    ps_sao->b5_cr_band_pos,
2400
4.99k
                                                                                    ps_sao->b5_cb_band_pos,
2401
4.99k
                                                                                    ai1_offset_cr,
2402
4.99k
                                                                                    ai1_offset_cb,
2403
4.99k
                                                                                    sao_wd_chroma,
2404
4.99k
                                                                                    sao_ht_chroma
2405
4.99k
                                                                                   );
2406
4.99k
                    }
2407
11.5k
                    else
2408
11.5k
                    {
2409
11.5k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2410
11.5k
                                                                                    src_strd,
2411
11.5k
                                                                                    pu1_src_left_chroma,
2412
11.5k
                                                                                    pu1_src_top_chroma,
2413
11.5k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2414
11.5k
                                                                                    ps_sao->b5_cb_band_pos,
2415
11.5k
                                                                                    ps_sao->b5_cr_band_pos,
2416
11.5k
                                                                                    ai1_offset_cb,
2417
11.5k
                                                                                    ai1_offset_cr,
2418
11.5k
                                                                                    sao_wd_chroma,
2419
11.5k
                                                                                    sao_ht_chroma
2420
11.5k
                                                                                   );
2421
11.5k
                    }
2422
16.5k
                }
2423
2424
18.8k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2425
18.8k
                {
2426
18.8k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2427
18.8k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2428
18.8k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2429
18.8k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2430
2431
18.8k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2432
18.8k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2433
18.8k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2434
18.8k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2435
2436
169k
                    for(i = 0; i < 8; i++)
2437
150k
                    {
2438
150k
                        au1_avail_chroma[i] = 255;
2439
150k
                        au1_tile_slice_boundary[i] = 0;
2440
150k
                        au4_idx_l[i] = 0;
2441
150k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2442
150k
                    }
2443
                    /*In case of slices*/
2444
18.8k
                    {
2445
18.8k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2446
10.8k
                        {
2447
10.8k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2448
10.8k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2449
2450
10.8k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2451
10.8k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2452
2453
10.8k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2454
10.8k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2455
2456
10.8k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2457
10.8k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2458
2459
10.8k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2460
10.8k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2461
2462
10.8k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2463
2.34k
                            {
2464
2.34k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2465
77
                                {
2466
77
                                    au4_idx_l[2] = -1;
2467
77
                                    au4_idx_l[4] = -1;
2468
77
                                    au4_idx_l[5] = -1;
2469
77
                                }
2470
2.27k
                                else
2471
2.27k
                                {
2472
2.27k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2473
2.27k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2474
2.27k
                                }
2475
2.34k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2476
2.34k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2477
2.34k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2478
2.34k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2479
2480
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2481
2.34k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2482
77
                                {
2483
77
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2484
77
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2485
77
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2486
77
                                }
2487
2.27k
                                else
2488
2.27k
                                {
2489
2.27k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2490
2.27k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2491
2.27k
                                }
2492
2493
2.34k
                                if(au4_idx_l[5] > idx_l)
2494
60
                                {
2495
60
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2496
60
                                }
2497
2498
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2499
2.34k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2500
2.34k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2501
2.34k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2502
                                /*
2503
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2504
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2505
                                 */
2506
21.1k
                                for(i = 0; i < 8; i++)
2507
18.7k
                                {
2508
                                    /*Sets the edges that lie on the slice/tile boundary*/
2509
18.7k
                                    if(au4_idx_l[i] != idx_l)
2510
5.76k
                                    {
2511
5.76k
                                        au1_tile_slice_boundary[i] = 1;
2512
5.76k
                                    }
2513
13.0k
                                    else
2514
13.0k
                                    {
2515
13.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2516
13.0k
                                    }
2517
18.7k
                                }
2518
                                /*Reset indices*/
2519
21.1k
                                for(i = 0; i < 8; i++)
2520
18.7k
                                {
2521
18.7k
                                    au4_idx_l[i] = 0;
2522
18.7k
                                }
2523
2.34k
                            }
2524
10.8k
                            if(ps_pps->i1_tiles_enabled_flag)
2525
8.56k
                            {
2526
                                /* Calculate availability flags at slice boundary */
2527
8.56k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2528
3.75k
                                {
2529
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2530
3.75k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2531
3.40k
                                    {
2532
3.40k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2533
1.10k
                                        {
2534
1.10k
                                            au4_idx_l[2] = -1;
2535
1.10k
                                            au4_idx_l[4] = -1;
2536
1.10k
                                            au4_idx_l[5] = -1;
2537
1.10k
                                        }
2538
2.30k
                                        else
2539
2.30k
                                        {
2540
2.30k
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2541
2.30k
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2542
2.30k
                                        }
2543
2544
3.40k
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2545
3.40k
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2546
3.40k
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2547
3.40k
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2548
2549
30.6k
                                        for(i = 0; i < 8; i++)
2550
27.2k
                                        {
2551
                                            /*Sets the edges that lie on the slice/tile boundary*/
2552
27.2k
                                            if(au4_idx_l[i] != idx_l)
2553
10.6k
                                            {
2554
10.6k
                                                au1_tile_slice_boundary[i] |= 1;
2555
10.6k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2556
10.6k
                                            }
2557
27.2k
                                        }
2558
3.40k
                                    }
2559
3.75k
                                }
2560
8.56k
                            }
2561
97.2k
                            for(i = 0; i < 8; i++)
2562
86.4k
                            {
2563
                                /*Sets the edges that lie on the slice/tile boundary*/
2564
86.4k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2565
10.9k
                                {
2566
10.9k
                                    au1_avail_chroma[i] = 0;
2567
10.9k
                                }
2568
86.4k
                            }
2569
10.8k
                        }
2570
18.8k
                    }
2571
18.8k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2572
53
                    {
2573
53
                        au1_avail_chroma[0] = 0;
2574
53
                        au1_avail_chroma[4] = 0;
2575
53
                        au1_avail_chroma[6] = 0;
2576
53
                    }
2577
2578
18.8k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2579
0
                    {
2580
0
                        au1_avail_chroma[1] = 0;
2581
0
                        au1_avail_chroma[5] = 0;
2582
0
                        au1_avail_chroma[7] = 0;
2583
0
                    }
2584
2585
18.8k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2586
3.95k
                    {
2587
3.95k
                        au1_avail_chroma[2] = 0;
2588
3.95k
                        au1_avail_chroma[4] = 0;
2589
3.95k
                        au1_avail_chroma[5] = 0;
2590
3.95k
                    }
2591
2592
18.8k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2593
2.26k
                    {
2594
2.26k
                        au1_avail_chroma[3] = 0;
2595
2.26k
                        au1_avail_chroma[6] = 0;
2596
2.26k
                        au1_avail_chroma[7] = 0;
2597
2.26k
                    }
2598
2599
18.8k
                    {
2600
18.8k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2601
18.8k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2602
18.8k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2603
18.8k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2604
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2605
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2606
18.8k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2607
164
                        {
2608
164
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2609
164
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2610
164
                        }
2611
2612
2613
18.8k
                        if(chroma_yuv420sp_vu)
2614
5.14k
                        {
2615
5.14k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2616
5.14k
                                                                                 src_strd,
2617
5.14k
                                                                                 pu1_src_left_chroma,
2618
5.14k
                                                                                 pu1_src_top_chroma,
2619
5.14k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2620
5.14k
                                                                                 au1_src_top_right,
2621
5.14k
                                                                                 au1_src_bot_left,
2622
5.14k
                                                                                 au1_avail_chroma,
2623
5.14k
                                                                                 ai1_offset_cr,
2624
5.14k
                                                                                 ai1_offset_cb,
2625
5.14k
                                                                                 sao_wd_chroma,
2626
5.14k
                                                                                 sao_ht_chroma);
2627
5.14k
                        }
2628
13.6k
                        else
2629
13.6k
                        {
2630
13.6k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2631
13.6k
                                                                                 src_strd,
2632
13.6k
                                                                                 pu1_src_left_chroma,
2633
13.6k
                                                                                 pu1_src_top_chroma,
2634
13.6k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2635
13.6k
                                                                                 au1_src_top_right,
2636
13.6k
                                                                                 au1_src_bot_left,
2637
13.6k
                                                                                 au1_avail_chroma,
2638
13.6k
                                                                                 ai1_offset_cb,
2639
13.6k
                                                                                 ai1_offset_cr,
2640
13.6k
                                                                                 sao_wd_chroma,
2641
13.6k
                                                                                 sao_ht_chroma);
2642
13.6k
                        }
2643
18.8k
                    }
2644
2645
18.8k
                }
2646
251k
            }
2647
2.68M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2648
514k
            {
2649
6.16M
                for(row = 0; row < sao_ht_chroma; row++)
2650
5.65M
                {
2651
5.65M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2652
5.65M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2653
5.65M
                }
2654
514k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2655
514k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2656
2657
514k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2658
514k
            }
2659
2660
2.93M
        }
2661
5.17M
        pu1_src_luma += sao_wd_luma;
2662
5.17M
        pu1_src_chroma += sao_wd_chroma;
2663
5.17M
        ps_sao += 1;
2664
5.17M
    }
2665
2666
2667
    /* Current CTB */
2668
5.34M
    {
2669
5.34M
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2670
5.34M
        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2671
5.34M
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2672
5.34M
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2673
5.34M
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2674
5.34M
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2675
5.34M
        WORD32 au4_idx_c[8], idx_c;
2676
2677
5.34M
        WORD32 remaining_rows;
2678
5.34M
        WORD32 remaining_cols;
2679
2680
5.34M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2681
5.34M
        if(remaining_cols <= SAO_SHIFT_CTB)
2682
143k
        {
2683
143k
            sao_wd_luma += remaining_cols;
2684
143k
        }
2685
5.34M
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2686
5.34M
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2687
143k
        {
2688
143k
            sao_wd_chroma += remaining_cols;
2689
143k
        }
2690
2691
5.34M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2692
5.34M
        if(remaining_rows <= SAO_SHIFT_CTB)
2693
317k
        {
2694
317k
            sao_ht_luma += remaining_rows;
2695
317k
        }
2696
5.34M
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2697
5.34M
        if(remaining_rows <= SAO_SHIFT_CTB)
2698
317k
        {
2699
317k
            sao_ht_chroma += remaining_rows;
2700
317k
        }
2701
2702
5.34M
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2703
5.34M
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704
5.34M
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2705
5.34M
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
2707
5.34M
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2708
5.31M
        {
2709
5.31M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2710
1.55M
            {
2711
1.55M
                if(0 == ps_sao->b3_y_type_idx)
2712
1.48M
                {
2713
                    /* Update left, top and top-left */
2714
19.8M
                    for(row = 0; row < sao_ht_luma; row++)
2715
18.3M
                    {
2716
18.3M
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2717
18.3M
                    }
2718
1.48M
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2719
2720
1.48M
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2721
2722
1.48M
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2723
2724
1.48M
                }
2725
2726
66.7k
                else if(1 == ps_sao->b3_y_type_idx)
2727
34.3k
                {
2728
34.3k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2729
34.3k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2730
34.3k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2731
34.3k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2732
2733
34.3k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2734
34.3k
                                                                              src_strd,
2735
34.3k
                                                                              pu1_src_left_luma,
2736
34.3k
                                                                              pu1_src_top_luma,
2737
34.3k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2738
34.3k
                                                                              ps_sao->b5_y_band_pos,
2739
34.3k
                                                                              ai1_offset_y,
2740
34.3k
                                                                              sao_wd_luma,
2741
34.3k
                                                                              sao_ht_luma
2742
34.3k
                                                                             );
2743
34.3k
                }
2744
2745
32.3k
                else // if(2 <= ps_sao->b3_y_type_idx)
2746
32.3k
                {
2747
32.3k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2748
32.3k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2749
32.3k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2750
32.3k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2751
2752
291k
                    for(i = 0; i < 8; i++)
2753
259k
                    {
2754
259k
                        au1_avail_luma[i] = 255;
2755
259k
                        au1_tile_slice_boundary[i] = 0;
2756
259k
                        au4_idx_c[i] = 0;
2757
259k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2758
259k
                    }
2759
                    /******************************************************************
2760
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2761
                     *
2762
                     *
2763
                     *          ____________
2764
                     *         |    |       |
2765
                     *         |    | C_T   |
2766
                     *         |____|_______|____
2767
                     *         |    |       |    |
2768
                     *         | C_L|   C   | C_R|
2769
                     *         |____|_______|    |
2770
                     *              |  C_D       |
2771
                     *              |            |
2772
                     *              |____________|
2773
                     *
2774
                     *****************************************************************/
2775
2776
                    /*In case of slices*/
2777
32.3k
                    {
2778
32.3k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2779
18.7k
                        {
2780
18.7k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2781
18.7k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2782
2783
18.7k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2784
18.7k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2785
2786
18.7k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2787
18.7k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2788
2789
18.7k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2790
18.7k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2791
2792
18.7k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2793
18.7k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2794
2795
18.7k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2796
7.03k
                            {
2797
7.03k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2798
745
                                {
2799
745
                                    au4_idx_c[6] = -1;
2800
745
                                    au4_idx_c[0] = -1;
2801
745
                                    au4_idx_c[4] = -1;
2802
745
                                }
2803
6.28k
                                else
2804
6.28k
                                {
2805
6.28k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2806
6.28k
                                }
2807
2808
7.03k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2809
309
                                {
2810
309
                                    au4_idx_c[2] = -1;
2811
309
                                    au4_idx_c[5] = -1;
2812
309
                                    au4_idx_c[4] = -1;
2813
309
                                }
2814
6.72k
                                else
2815
6.72k
                                {
2816
6.72k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2817
6.72k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2818
6.72k
                                }
2819
7.03k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2820
7.03k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2821
7.03k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2822
2823
7.03k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2824
745
                                {
2825
745
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2826
745
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2827
745
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2828
745
                                }
2829
6.28k
                                else
2830
6.28k
                                {
2831
6.28k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2832
6.28k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2833
6.28k
                                }
2834
7.03k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2835
309
                                {
2836
309
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2837
309
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2838
309
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2839
309
                                }
2840
6.72k
                                else
2841
6.72k
                                {
2842
6.72k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2843
6.72k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2844
6.72k
                                }
2845
7.03k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2846
7.03k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2847
7.03k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
2849
7.03k
                                if(au4_idx_c[6] < idx_c)
2850
1.49k
                                {
2851
1.49k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2852
1.49k
                                }
2853
2854
                                /*
2855
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2856
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2857
                                 */
2858
63.2k
                                for(i = 0; i < 8; i++)
2859
56.2k
                                {
2860
                                    /*Sets the edges that lie on the slice/tile boundary*/
2861
56.2k
                                    if(au4_idx_c[i] != idx_c)
2862
17.8k
                                    {
2863
17.8k
                                        au1_tile_slice_boundary[i] = 1;
2864
17.8k
                                    }
2865
38.4k
                                    else
2866
38.4k
                                    {
2867
38.4k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2868
38.4k
                                    }
2869
56.2k
                                }
2870
                                /*Reset indices*/
2871
63.2k
                                for(i = 0; i < 8; i++)
2872
56.2k
                                {
2873
56.2k
                                    au4_idx_c[i] = 0;
2874
56.2k
                                }
2875
7.03k
                            }
2876
2877
18.7k
                            if(ps_pps->i1_tiles_enabled_flag)
2878
11.7k
                            {
2879
                                /* Calculate availability flags at slice boundary */
2880
11.7k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2881
5.18k
                                {
2882
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2883
5.18k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2884
4.74k
                                    {
2885
4.74k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2886
729
                                        {
2887
729
                                            au4_idx_c[6] = -1;
2888
729
                                            au4_idx_c[0] = -1;
2889
729
                                            au4_idx_c[4] = -1;
2890
729
                                        }
2891
4.02k
                                        else
2892
4.02k
                                        {
2893
4.02k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2894
4.02k
                                        }
2895
2896
4.74k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2897
835
                                        {
2898
835
                                            au4_idx_c[2] = -1;
2899
835
                                            au4_idx_c[5] = -1;
2900
835
                                            au4_idx_c[4] = -1;
2901
835
                                        }
2902
3.91k
                                        else
2903
3.91k
                                        {
2904
3.91k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2905
3.91k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2906
3.91k
                                        }
2907
4.74k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2908
4.74k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2909
4.74k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2910
2911
42.7k
                                        for(i = 0; i < 8; i++)
2912
37.9k
                                        {
2913
                                            /*Sets the edges that lie on the slice/tile boundary*/
2914
37.9k
                                            if(au4_idx_c[i] != idx_c)
2915
16.0k
                                            {
2916
16.0k
                                                au1_tile_slice_boundary[i] |= 1;
2917
16.0k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2918
16.0k
                                            }
2919
37.9k
                                        }
2920
4.74k
                                    }
2921
5.18k
                                }
2922
11.7k
                            }
2923
2924
168k
                            for(i = 0; i < 8; i++)
2925
150k
                            {
2926
                                /*Sets the edges that lie on the slice/tile boundary*/
2927
150k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2928
17.7k
                                {
2929
17.7k
                                    au1_avail_luma[i] = 0;
2930
17.7k
                                }
2931
150k
                            }
2932
2933
18.7k
                        }
2934
32.3k
                    }
2935
32.3k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2936
4.28k
                    {
2937
4.28k
                        au1_avail_luma[0] = 0;
2938
4.28k
                        au1_avail_luma[4] = 0;
2939
4.28k
                        au1_avail_luma[6] = 0;
2940
4.28k
                    }
2941
2942
32.3k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2943
3.47k
                    {
2944
3.47k
                        au1_avail_luma[1] = 0;
2945
3.47k
                        au1_avail_luma[5] = 0;
2946
3.47k
                        au1_avail_luma[7] = 0;
2947
3.47k
                    }
2948
2949
32.3k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2950
7.59k
                    {
2951
7.59k
                        au1_avail_luma[2] = 0;
2952
7.59k
                        au1_avail_luma[4] = 0;
2953
7.59k
                        au1_avail_luma[5] = 0;
2954
7.59k
                    }
2955
2956
32.3k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2957
2.95k
                    {
2958
2.95k
                        au1_avail_luma[3] = 0;
2959
2.95k
                        au1_avail_luma[6] = 0;
2960
2.95k
                        au1_avail_luma[7] = 0;
2961
2.95k
                    }
2962
2963
32.3k
                    {
2964
32.3k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2965
32.3k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2966
2967
32.3k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2968
32.3k
                                                                          src_strd,
2969
32.3k
                                                                          pu1_src_left_luma,
2970
32.3k
                                                                          pu1_src_top_luma,
2971
32.3k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2972
32.3k
                                                                          au1_src_top_right,
2973
32.3k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2974
32.3k
                                                                          au1_avail_luma,
2975
32.3k
                                                                          ai1_offset_y,
2976
32.3k
                                                                          sao_wd_luma,
2977
32.3k
                                                                          sao_ht_luma);
2978
32.3k
                    }
2979
32.3k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2980
32.3k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2981
32.3k
                }
2982
1.55M
            }
2983
3.76M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2984
1.49M
            {
2985
                /* Update left, top and top-left */
2986
24.8M
                for(row = 0; row < sao_ht_luma; row++)
2987
23.3M
                {
2988
23.3M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2989
23.3M
                }
2990
1.49M
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2991
2992
1.49M
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2993
2994
1.49M
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2995
1.49M
            }
2996
5.31M
        }
2997
2998
5.34M
        if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2999
3.03M
        {
3000
3.03M
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3001
261k
            {
3002
261k
                if(0 == ps_sao->b3_cb_type_idx)
3003
222k
                {
3004
4.36M
                    for(row = 0; row < sao_ht_chroma; row++)
3005
4.14M
                    {
3006
4.14M
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3007
4.14M
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3008
4.14M
                    }
3009
222k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3010
222k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3011
3012
222k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3013
3014
222k
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3015
222k
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3016
222k
                }
3017
3018
39.4k
                else if(1 == ps_sao->b3_cb_type_idx)
3019
18.0k
                {
3020
18.0k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3021
18.0k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3022
18.0k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3023
18.0k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3024
3025
18.0k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3026
18.0k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3027
18.0k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3028
18.0k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3029
3030
18.0k
                    if(chroma_yuv420sp_vu)
3031
5.37k
                    {
3032
5.37k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3033
5.37k
                                                                                    src_strd,
3034
5.37k
                                                                                    pu1_src_left_chroma,
3035
5.37k
                                                                                    pu1_src_top_chroma,
3036
5.37k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3037
5.37k
                                                                                    ps_sao->b5_cr_band_pos,
3038
5.37k
                                                                                    ps_sao->b5_cb_band_pos,
3039
5.37k
                                                                                    ai1_offset_cr,
3040
5.37k
                                                                                    ai1_offset_cb,
3041
5.37k
                                                                                    sao_wd_chroma,
3042
5.37k
                                                                                    sao_ht_chroma
3043
5.37k
                                                                                   );
3044
5.37k
                    }
3045
12.6k
                    else
3046
12.6k
                    {
3047
12.6k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3048
12.6k
                                                                                    src_strd,
3049
12.6k
                                                                                    pu1_src_left_chroma,
3050
12.6k
                                                                                    pu1_src_top_chroma,
3051
12.6k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3052
12.6k
                                                                                    ps_sao->b5_cb_band_pos,
3053
12.6k
                                                                                    ps_sao->b5_cr_band_pos,
3054
12.6k
                                                                                    ai1_offset_cb,
3055
12.6k
                                                                                    ai1_offset_cr,
3056
12.6k
                                                                                    sao_wd_chroma,
3057
12.6k
                                                                                    sao_ht_chroma
3058
12.6k
                                                                                   );
3059
12.6k
                    }
3060
18.0k
                }
3061
3062
21.4k
                else // if(2 <= ps_sao->b3_cb_type_idx)
3063
21.4k
                {
3064
21.4k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3065
21.4k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3066
21.4k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3067
21.4k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3068
3069
21.4k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3070
21.4k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3071
21.4k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3072
21.4k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3073
3074
193k
                    for(i = 0; i < 8; i++)
3075
171k
                    {
3076
171k
                        au1_avail_chroma[i] = 255;
3077
171k
                        au1_tile_slice_boundary[i] = 0;
3078
171k
                        au4_idx_c[i] = 0;
3079
171k
                        au4_ilf_across_tile_slice_enable[i] = 1;
3080
171k
                    }
3081
21.4k
                    {
3082
21.4k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3083
12.0k
                        {
3084
12.0k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3085
12.0k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3086
3087
12.0k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3088
12.0k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3089
3090
12.0k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3091
12.0k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3092
3093
12.0k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3094
12.0k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3095
3096
12.0k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
3097
12.0k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
3098
3099
12.0k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3100
2.31k
                            {
3101
2.31k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3102
228
                                {
3103
228
                                    au4_idx_c[0] = -1;
3104
228
                                    au4_idx_c[4] = -1;
3105
228
                                    au4_idx_c[6] = -1;
3106
228
                                }
3107
2.08k
                                else
3108
2.08k
                                {
3109
2.08k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3110
2.08k
                                }
3111
3112
2.31k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3113
80
                                {
3114
80
                                    au4_idx_c[2] = -1;
3115
80
                                    au4_idx_c[4] = -1;
3116
80
                                    au4_idx_c[5] = -1;
3117
80
                                }
3118
2.23k
                                else
3119
2.23k
                                {
3120
2.23k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3121
2.23k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3122
2.23k
                                }
3123
2.31k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3124
2.31k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3125
2.31k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3126
3127
2.31k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3128
228
                                {
3129
228
                                    au4_ilf_across_tile_slice_enable[0] = 0;
3130
228
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3131
228
                                    au4_ilf_across_tile_slice_enable[6] = 0;
3132
228
                                }
3133
2.08k
                                else
3134
2.08k
                                {
3135
2.08k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3136
2.08k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3137
2.08k
                                }
3138
3139
2.31k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3140
80
                                {
3141
80
                                    au4_ilf_across_tile_slice_enable[2] = 0;
3142
80
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3143
80
                                    au4_ilf_across_tile_slice_enable[5] = 0;
3144
80
                                }
3145
2.23k
                                else
3146
2.23k
                                {
3147
2.23k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3148
2.23k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3149
2.23k
                                }
3150
3151
2.31k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3152
2.31k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3153
2.31k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
3155
2.31k
                                if(idx_c > au4_idx_c[6])
3156
502
                                {
3157
502
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3158
502
                                }
3159
3160
                                /*
3161
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3162
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3163
                                 */
3164
20.8k
                                for(i = 0; i < 8; i++)
3165
18.5k
                                {
3166
                                    /*Sets the edges that lie on the slice/tile boundary*/
3167
18.5k
                                    if(au4_idx_c[i] != idx_c)
3168
6.50k
                                    {
3169
6.50k
                                        au1_tile_slice_boundary[i] = 1;
3170
6.50k
                                    }
3171
12.0k
                                    else
3172
12.0k
                                    {
3173
12.0k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
3174
12.0k
                                    }
3175
18.5k
                                }
3176
                                /*Reset indices*/
3177
20.8k
                                for(i = 0; i < 8; i++)
3178
18.5k
                                {
3179
18.5k
                                    au4_idx_c[i] = 0;
3180
18.5k
                                }
3181
2.31k
                            }
3182
3183
12.0k
                            if(ps_pps->i1_tiles_enabled_flag)
3184
9.74k
                            {
3185
                                /* Calculate availability flags at slice boundary */
3186
9.74k
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3187
4.12k
                                {
3188
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3189
4.12k
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3190
3.89k
                                    {
3191
3.89k
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3192
843
                                        {
3193
843
                                            au4_idx_c[6] = -1;
3194
843
                                            au4_idx_c[0] = -1;
3195
843
                                            au4_idx_c[4] = -1;
3196
843
                                        }
3197
3.04k
                                        else
3198
3.04k
                                        {
3199
3.04k
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3200
3.04k
                                        }
3201
3202
3.89k
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3203
1.00k
                                        {
3204
1.00k
                                            au4_idx_c[2] = -1;
3205
1.00k
                                            au4_idx_c[5] = -1;
3206
1.00k
                                            au4_idx_c[4] = -1;
3207
1.00k
                                        }
3208
2.88k
                                        else
3209
2.88k
                                        {
3210
2.88k
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3211
2.88k
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3212
2.88k
                                        }
3213
3.89k
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3214
3.89k
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3215
3.89k
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3216
3217
35.0k
                                        for(i = 0; i < 8; i++)
3218
31.1k
                                        {
3219
                                            /*Sets the edges that lie on the slice/tile boundary*/
3220
31.1k
                                            if(au4_idx_c[i] != idx_c)
3221
12.5k
                                            {
3222
12.5k
                                                au1_tile_slice_boundary[i] |= 1;
3223
12.5k
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3224
12.5k
                                            }
3225
31.1k
                                        }
3226
3.89k
                                    }
3227
4.12k
                                }
3228
9.74k
                            }
3229
3230
108k
                            for(i = 0; i < 8; i++)
3231
96.2k
                            {
3232
                                /*Sets the edges that lie on the slice/tile boundary*/
3233
96.2k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3234
13.1k
                                {
3235
13.1k
                                    au1_avail_chroma[i] = 0;
3236
13.1k
                                }
3237
96.2k
                            }
3238
12.0k
                        }
3239
21.4k
                    }
3240
3241
21.4k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
3242
3.35k
                    {
3243
3.35k
                        au1_avail_chroma[0] = 0;
3244
3.35k
                        au1_avail_chroma[4] = 0;
3245
3.35k
                        au1_avail_chroma[6] = 0;
3246
3.35k
                    }
3247
3248
21.4k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3249
2.81k
                    {
3250
2.81k
                        au1_avail_chroma[1] = 0;
3251
2.81k
                        au1_avail_chroma[5] = 0;
3252
2.81k
                        au1_avail_chroma[7] = 0;
3253
2.81k
                    }
3254
3255
21.4k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
3256
4.63k
                    {
3257
4.63k
                        au1_avail_chroma[2] = 0;
3258
4.63k
                        au1_avail_chroma[4] = 0;
3259
4.63k
                        au1_avail_chroma[5] = 0;
3260
4.63k
                    }
3261
3262
21.4k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3263
2.66k
                    {
3264
2.66k
                        au1_avail_chroma[3] = 0;
3265
2.66k
                        au1_avail_chroma[6] = 0;
3266
2.66k
                        au1_avail_chroma[7] = 0;
3267
2.66k
                    }
3268
3269
21.4k
                    {
3270
21.4k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3271
21.4k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3272
3273
21.4k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3274
21.4k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3275
3276
21.4k
                        if(chroma_yuv420sp_vu)
3277
5.87k
                        {
3278
5.87k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3279
5.87k
                                                                                 src_strd,
3280
5.87k
                                                                                 pu1_src_left_chroma,
3281
5.87k
                                                                                 pu1_src_top_chroma,
3282
5.87k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3283
5.87k
                                                                                 au1_src_top_right,
3284
5.87k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3285
5.87k
                                                                                 au1_avail_chroma,
3286
5.87k
                                                                                 ai1_offset_cr,
3287
5.87k
                                                                                 ai1_offset_cb,
3288
5.87k
                                                                                 sao_wd_chroma,
3289
5.87k
                                                                                 sao_ht_chroma);
3290
5.87k
                        }
3291
15.5k
                        else
3292
15.5k
                        {
3293
15.5k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3294
15.5k
                                                                                 src_strd,
3295
15.5k
                                                                                 pu1_src_left_chroma,
3296
15.5k
                                                                                 pu1_src_top_chroma,
3297
15.5k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3298
15.5k
                                                                                 au1_src_top_right,
3299
15.5k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3300
15.5k
                                                                                 au1_avail_chroma,
3301
15.5k
                                                                                 ai1_offset_cb,
3302
15.5k
                                                                                 ai1_offset_cr,
3303
15.5k
                                                                                 sao_wd_chroma,
3304
15.5k
                                                                                 sao_ht_chroma);
3305
15.5k
                        }
3306
21.4k
                    }
3307
3308
21.4k
                }
3309
261k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3310
261k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3311
3312
261k
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3313
261k
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3314
261k
            }
3315
2.77M
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3316
528k
            {
3317
6.41M
                for(row = 0; row < sao_ht_chroma; row++)
3318
5.89M
                {
3319
5.89M
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3320
5.89M
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3321
5.89M
                }
3322
528k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3323
528k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3324
3325
528k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3326
3327
528k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3328
528k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3329
528k
            }
3330
3331
3.03M
        }
3332
5.34M
    }
3333
3334
3335
3336
3337
/* If no loop filter is enabled copy the backed up values */
3338
5.34M
    {
3339
        /* Luma */
3340
5.34M
        if(no_loop_filter_enabled_luma)
3341
115k
        {
3342
115k
            UWORD32 u4_no_loop_filter_flag;
3343
115k
            WORD32 loop_filter_bit_pos;
3344
115k
            WORD32 log2_min_cu = 3;
3345
115k
            WORD32 min_cu = (1 << log2_min_cu);
3346
115k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3347
115k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3348
115k
            WORD32 sao_blk_wd = ctb_size;
3349
115k
            WORD32 remaining_rows;
3350
115k
            WORD32 remaining_cols;
3351
3352
115k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3353
115k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354
115k
            if(remaining_rows <= SAO_SHIFT_CTB)
3355
10.9k
                sao_blk_ht += remaining_rows;
3356
115k
            if(remaining_cols <= SAO_SHIFT_CTB)
3357
2.91k
                sao_blk_wd += remaining_cols;
3358
3359
115k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3360
115k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3361
3362
115k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3363
3364
115k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3365
115k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3366
115k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3367
110k
                loop_filter_bit_pos -= 1;
3368
3369
115k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3370
115k
                            (loop_filter_bit_pos >> 3);
3371
3372
115k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3373
966k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3374
850k
            {
3375
850k
                WORD32 tmp_wd = sao_blk_wd;
3376
3377
850k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3378
850k
                                (loop_filter_bit_pos & 7);
3379
850k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3380
3381
850k
                if(u4_no_loop_filter_flag)
3382
783k
                {
3383
1.66M
                    while(tmp_wd > 0)
3384
885k
                    {
3385
885k
                        if(CTZ(u4_no_loop_filter_flag))
3386
92.1k
                        {
3387
92.1k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3388
92.1k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389
92.1k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3390
92.1k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3391
92.1k
                        }
3392
793k
                        else
3393
793k
                        {
3394
6.68M
                            for(row = 0; row < min_cu; row++)
3395
5.89M
                            {
3396
327M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3397
321M
                                {
3398
321M
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3399
321M
                                }
3400
5.89M
                            }
3401
793k
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3402
793k
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403
793k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3404
793k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3405
793k
                        }
3406
885k
                    }
3407
3408
783k
                    pu1_src_tmp_luma -= sao_blk_wd;
3409
783k
                    pu1_src_backup_luma -= sao_blk_wd;
3410
783k
                }
3411
3412
850k
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3413
850k
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3414
850k
            }
3415
115k
        }
3416
3417
        /* Chroma */
3418
5.34M
        if(no_loop_filter_enabled_chroma)
3419
114k
        {
3420
114k
            UWORD32 u4_no_loop_filter_flag;
3421
114k
            WORD32 loop_filter_bit_pos;
3422
114k
            WORD32 log2_min_cu = 3;
3423
114k
            WORD32 min_cu = (1 << log2_min_cu);
3424
114k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3425
114k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3426
114k
            WORD32 sao_blk_wd = ctb_size;
3427
114k
            WORD32 remaining_rows;
3428
114k
            WORD32 remaining_cols;
3429
3430
114k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3431
114k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432
114k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3433
10.9k
                sao_blk_ht += remaining_rows;
3434
114k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3435
2.88k
                sao_blk_wd += remaining_cols;
3436
3437
114k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3438
114k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3439
3440
114k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3441
3442
114k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3443
114k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3444
114k
            if(ps_sao_ctxt->i4_ctb_x > 0)
3445
109k
                loop_filter_bit_pos -= 2;
3446
3447
114k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3448
114k
                            (loop_filter_bit_pos >> 3);
3449
3450
114k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3451
958k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3452
843k
            {
3453
843k
                WORD32 tmp_wd = sao_blk_wd;
3454
3455
843k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3456
843k
                                (loop_filter_bit_pos & 7);
3457
843k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3458
3459
843k
                if(u4_no_loop_filter_flag)
3460
783k
                {
3461
1.66M
                    while(tmp_wd > 0)
3462
881k
                    {
3463
881k
                        if(CTZ(u4_no_loop_filter_flag))
3464
89.2k
                        {
3465
89.2k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3466
89.2k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467
89.2k
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3468
89.2k
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3469
89.2k
                        }
3470
791k
                        else
3471
791k
                        {
3472
3.85M
                            for(row = 0; row < min_cu / 2; row++)
3473
3.06M
                            {
3474
177M
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3475
174M
                                {
3476
174M
                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3477
174M
                                }
3478
3.06M
                            }
3479
3480
791k
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3481
791k
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482
791k
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3483
791k
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3484
791k
                        }
3485
881k
                    }
3486
3487
783k
                    pu1_src_tmp_chroma -= sao_blk_wd;
3488
783k
                    pu1_src_backup_chroma -= sao_blk_wd;
3489
783k
                }
3490
3491
843k
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3492
843k
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3493
843k
            }
3494
114k
        }
3495
5.34M
    }
3496
3497
5.34M
}
3498