Coverage Report

Created: 2025-08-28 06:34

/src/libhevc/decoder/ihevcd_sao.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_sao.c
22
 *
23
 * @brief
24
 *  Contains function definitions for sample adaptive offset process
25
 *
26
 * @author
27
 *  Srinivas T
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
37
#include <stdio.h>
38
#include <stddef.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
43
#include "ihevc_typedefs.h"
44
#include "iv.h"
45
#include "ivd.h"
46
#include "ihevcd_cxa.h"
47
#include "ithread.h"
48
49
#include "ihevc_defs.h"
50
#include "ihevc_debug.h"
51
#include "ihevc_defs.h"
52
#include "ihevc_structs.h"
53
#include "ihevc_macros.h"
54
#include "ihevc_platform_macros.h"
55
#include "ihevc_cabac_tables.h"
56
#include "ihevc_sao.h"
57
#include "ihevc_mem_fns.h"
58
59
#include "ihevc_error.h"
60
#include "ihevc_common_tables.h"
61
62
#include "ihevcd_trace.h"
63
#include "ihevcd_defs.h"
64
#include "ihevcd_function_selector.h"
65
#include "ihevcd_structs.h"
66
#include "ihevcd_error.h"
67
#include "ihevcd_nal.h"
68
#include "ihevcd_bitstream.h"
69
#include "ihevcd_job_queue.h"
70
#include "ihevcd_utils.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
#include "ihevcd_sao.h"
76
#include "ihevcd_debug.h"
77
78
4.41M
#define SAO_SHIFT_CTB    8
79
80
/**
81
 * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
82
 */
83
void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
84
0
{
85
0
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
86
0
    UWORD8 *pu1_src_luma;
87
0
    UWORD8 *pu1_src_chroma;
88
0
    WORD32 src_strd;
89
0
    WORD32 ctb_size;
90
0
    WORD32 log2_ctb_size;
91
0
    sps_t *ps_sps;
92
0
    sao_t *ps_sao;
93
0
    WORD32 row, col;
94
0
    UWORD8 au1_avail_luma[8];
95
0
    UWORD8 au1_avail_chroma[8];
96
0
    WORD32 i;
97
0
    UWORD8 *pu1_src_top_luma;
98
0
    UWORD8 *pu1_src_top_chroma;
99
0
    UWORD8 *pu1_src_left_luma;
100
0
    UWORD8 *pu1_src_left_chroma;
101
0
    UWORD8 au1_src_top_right[2];
102
0
    UWORD8 au1_src_bot_left[2];
103
0
    UWORD8 *pu1_no_loop_filter_flag;
104
0
    WORD32 loop_filter_strd;
105
106
    /* Only first 5 values are used, but arrays are large
107
     enough so that SIMD functions can read 64 bits at a time */
108
0
    WORD8 ai1_offset_y[8] = {0};
109
0
    WORD8 ai1_offset_cb[8] = {0};
110
0
    WORD8 ai1_offset_cr[8] = {0};
111
112
0
    PROFILE_DISABLE_SAO();
113
114
0
    ps_sps = ps_sao_ctxt->ps_sps;
115
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
116
0
    ctb_size = (1 << log2_ctb_size);
117
0
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
118
0
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
119
0
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
120
121
0
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
122
0
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
123
124
    /* Current CTB */
125
0
    {
126
0
        WORD32 sao_wd_luma;
127
0
        WORD32 sao_wd_chroma;
128
0
        WORD32 sao_ht_luma;
129
0
        WORD32 sao_ht_chroma;
130
131
0
        WORD32 remaining_rows;
132
0
        WORD32 remaining_cols;
133
134
0
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
135
0
        sao_wd_luma = MIN(ctb_size, remaining_cols);
136
0
        sao_wd_chroma = MIN(ctb_size, remaining_cols);
137
138
0
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
139
0
        sao_ht_luma = MIN(ctb_size, remaining_rows);
140
0
        sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
141
142
0
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
143
0
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
144
0
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
145
0
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
146
147
0
        pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
148
0
                        ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
149
0
                        ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
150
151
0
        ai1_offset_y[1] = ps_sao->b4_y_offset_1;
152
0
        ai1_offset_y[2] = ps_sao->b4_y_offset_2;
153
0
        ai1_offset_y[3] = ps_sao->b4_y_offset_3;
154
0
        ai1_offset_y[4] = ps_sao->b4_y_offset_4;
155
156
0
        ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
157
0
        ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
158
0
        ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
159
0
        ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
160
161
0
        ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
162
0
        ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
163
0
        ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
164
0
        ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
165
166
0
        for(i = 0; i < 8; i++)
167
0
        {
168
0
            au1_avail_luma[i] = 255;
169
0
            au1_avail_chroma[i] = 255;
170
0
        }
171
172
173
0
        if(0 == ps_sao_ctxt->i4_ctb_x)
174
0
        {
175
0
            au1_avail_luma[0] = 0;
176
0
            au1_avail_luma[4] = 0;
177
0
            au1_avail_luma[6] = 0;
178
179
0
            au1_avail_chroma[0] = 0;
180
0
            au1_avail_chroma[4] = 0;
181
0
            au1_avail_chroma[6] = 0;
182
0
        }
183
184
0
        if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
185
0
        {
186
0
            au1_avail_luma[1] = 0;
187
0
            au1_avail_luma[5] = 0;
188
0
            au1_avail_luma[7] = 0;
189
190
0
            au1_avail_chroma[1] = 0;
191
0
            au1_avail_chroma[5] = 0;
192
0
            au1_avail_chroma[7] = 0;
193
0
        }
194
195
0
        if(0 == ps_sao_ctxt->i4_ctb_y)
196
0
        {
197
0
            au1_avail_luma[2] = 0;
198
0
            au1_avail_luma[4] = 0;
199
0
            au1_avail_luma[5] = 0;
200
201
0
            au1_avail_chroma[2] = 0;
202
0
            au1_avail_chroma[4] = 0;
203
0
            au1_avail_chroma[5] = 0;
204
0
        }
205
206
0
        if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
207
0
        {
208
0
            au1_avail_luma[3] = 0;
209
0
            au1_avail_luma[6] = 0;
210
0
            au1_avail_luma[7] = 0;
211
212
0
            au1_avail_chroma[3] = 0;
213
0
            au1_avail_chroma[6] = 0;
214
0
            au1_avail_chroma[7] = 0;
215
0
        }
216
217
218
0
        if(0 == ps_sao->b3_y_type_idx)
219
0
        {
220
            /* Update left, top and top-left */
221
0
            for(row = 0; row < sao_ht_luma; row++)
222
0
            {
223
0
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
224
0
            }
225
0
            ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
226
227
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
228
229
0
        }
230
0
        else
231
0
        {
232
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
233
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
234
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 2;
235
0
            WORD32 no_loop_filter_enabled = 0;
236
237
            /* Check the loop filter flags and copy the original values for back up */
238
0
            {
239
0
                UWORD32 u4_no_loop_filter_flag;
240
0
                WORD32 min_cu = 8;
241
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
242
243
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
244
0
                {
245
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
246
0
                                    ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
247
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
248
249
0
                    if(u4_no_loop_filter_flag)
250
0
                    {
251
0
                        WORD32 tmp_wd = sao_wd_luma;
252
0
                        no_loop_filter_enabled = 1;
253
0
                        while(tmp_wd > 0)
254
0
                        {
255
0
                            if(CTZ(u4_no_loop_filter_flag))
256
0
                            {
257
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
258
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
259
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
260
0
                                tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
261
0
                            }
262
0
                            else
263
0
                            {
264
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
265
0
                                {
266
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
267
0
                                    {
268
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
269
0
                                    }
270
0
                                }
271
272
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
273
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
274
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
275
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
276
0
                            }
277
0
                        }
278
279
0
                        pu1_src_tmp -= sao_wd_luma;
280
0
                    }
281
282
0
                    pu1_src_tmp += min_cu * src_strd;
283
0
                    pu1_src_copy += min_cu * tmp_strd;
284
0
                }
285
0
            }
286
287
0
            if(1 == ps_sao->b3_y_type_idx)
288
0
            {
289
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
290
0
                                                                          src_strd,
291
0
                                                                          pu1_src_left_luma,
292
0
                                                                          pu1_src_top_luma,
293
0
                                                                          ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
294
0
                                                                          ps_sao->b5_y_band_pos,
295
0
                                                                          ai1_offset_y,
296
0
                                                                          sao_wd_luma,
297
0
                                                                          sao_ht_luma);
298
0
            }
299
0
            else // if(2 <= ps_sao->b3_y_type_idx)
300
0
            {
301
0
                au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
302
0
                au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
303
0
                ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
304
0
                                                                  src_strd,
305
0
                                                                  pu1_src_left_luma,
306
0
                                                                  pu1_src_top_luma,
307
0
                                                                  ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
308
0
                                                                  au1_src_top_right,
309
0
                                                                  au1_src_bot_left,
310
0
                                                                  au1_avail_luma,
311
0
                                                                  ai1_offset_y,
312
0
                                                                  sao_wd_luma,
313
0
                                                                  sao_ht_luma);
314
0
            }
315
316
            /* Check the loop filter flags and copy the original values back if they are set */
317
0
            if(no_loop_filter_enabled)
318
0
            {
319
0
                UWORD32 u4_no_loop_filter_flag;
320
0
                WORD32 min_cu = 8;
321
0
                UWORD8 *pu1_src_tmp = pu1_src_luma;
322
323
0
                for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
324
0
                {
325
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
326
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
327
328
0
                    if(u4_no_loop_filter_flag)
329
0
                    {
330
0
                        WORD32 tmp_wd = sao_wd_luma;
331
0
                        while(tmp_wd > 0)
332
0
                        {
333
0
                            if(CTZ(u4_no_loop_filter_flag))
334
0
                            {
335
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
336
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
337
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
338
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
339
0
                            }
340
0
                            else
341
0
                            {
342
0
                                for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
343
0
                                {
344
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
345
0
                                    {
346
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
347
0
                                    }
348
0
                                }
349
350
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
351
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
352
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
353
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
354
0
                            }
355
0
                        }
356
357
0
                        pu1_src_tmp -= sao_wd_luma;
358
0
                    }
359
360
0
                    pu1_src_tmp += min_cu * src_strd;
361
0
                    pu1_src_copy += min_cu * tmp_strd;
362
0
                }
363
0
            }
364
365
0
        }
366
367
0
        if(0 == ps_sao->b3_cb_type_idx)
368
0
        {
369
0
            for(row = 0; row < sao_ht_chroma; row++)
370
0
            {
371
0
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
372
0
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
373
0
            }
374
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
375
0
            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
376
377
0
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
378
0
        }
379
0
        else
380
0
        {
381
0
            UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
382
0
            UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
383
0
            WORD32 tmp_strd = MAX_CTB_SIZE + 4;
384
0
            WORD32 no_loop_filter_enabled = 0;
385
386
            /* Check the loop filter flags and copy the original values for back up */
387
0
            {
388
0
                UWORD32 u4_no_loop_filter_flag;
389
0
                WORD32 min_cu = 4;
390
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
391
392
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
393
0
                {
394
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
395
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
396
397
0
                    if(u4_no_loop_filter_flag)
398
0
                    {
399
0
                        WORD32 tmp_wd = sao_wd_chroma;
400
0
                        no_loop_filter_enabled = 1;
401
0
                        while(tmp_wd > 0)
402
0
                        {
403
0
                            if(CTZ(u4_no_loop_filter_flag))
404
0
                            {
405
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
406
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
407
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
408
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
409
0
                            }
410
0
                            else
411
0
                            {
412
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
413
0
                                {
414
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
415
0
                                    {
416
0
                                        pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
417
0
                                    }
418
0
                                }
419
420
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
421
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
422
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
423
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
424
0
                            }
425
0
                        }
426
427
0
                        pu1_src_tmp -= sao_wd_chroma;
428
0
                    }
429
430
0
                    pu1_src_tmp += min_cu * src_strd;
431
0
                    pu1_src_copy += min_cu * tmp_strd;
432
0
                }
433
0
            }
434
435
0
            if(1 == ps_sao->b3_cb_type_idx)
436
0
            {
437
0
                ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
438
0
                                                                            src_strd,
439
0
                                                                            pu1_src_left_chroma,
440
0
                                                                            pu1_src_top_chroma,
441
0
                                                                            ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
442
0
                                                                            ps_sao->b5_cb_band_pos,
443
0
                                                                            ps_sao->b5_cr_band_pos,
444
0
                                                                            ai1_offset_cb,
445
0
                                                                            ai1_offset_cr,
446
0
                                                                            sao_wd_chroma,
447
0
                                                                            sao_ht_chroma
448
0
                                                                           );
449
0
            }
450
0
            else // if(2 <= ps_sao->b3_cb_type_idx)
451
0
            {
452
0
                au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
453
0
                au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
454
0
                au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
455
0
                au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
456
0
                ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
457
0
                                                                     src_strd,
458
0
                                                                     pu1_src_left_chroma,
459
0
                                                                     pu1_src_top_chroma,
460
0
                                                                     ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
461
0
                                                                     au1_src_top_right,
462
0
                                                                     au1_src_bot_left,
463
0
                                                                     au1_avail_chroma,
464
0
                                                                     ai1_offset_cb,
465
0
                                                                     ai1_offset_cr,
466
0
                                                                     sao_wd_chroma,
467
0
                                                                     sao_ht_chroma);
468
0
            }
469
470
            /* Check the loop filter flags and copy the original values back if they are set */
471
0
            if(no_loop_filter_enabled)
472
0
            {
473
0
                UWORD32 u4_no_loop_filter_flag;
474
0
                WORD32 min_cu = 4;
475
0
                UWORD8 *pu1_src_tmp = pu1_src_chroma;
476
477
0
                for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
478
0
                {
479
0
                    u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
480
0
                    u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
481
482
0
                    if(u4_no_loop_filter_flag)
483
0
                    {
484
0
                        WORD32 tmp_wd = sao_wd_chroma;
485
0
                        while(tmp_wd > 0)
486
0
                        {
487
0
                            if(CTZ(u4_no_loop_filter_flag))
488
0
                            {
489
0
                                u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
490
0
                                pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
491
0
                                pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
492
0
                                tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
493
0
                            }
494
0
                            else
495
0
                            {
496
0
                                for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
497
0
                                {
498
0
                                    for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
499
0
                                    {
500
0
                                        pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
501
0
                                    }
502
0
                                }
503
504
0
                                u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
505
0
                                pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
506
0
                                pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
507
0
                                tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
508
0
                            }
509
0
                        }
510
511
0
                        pu1_src_tmp -= sao_wd_chroma;
512
0
                    }
513
514
0
                    pu1_src_tmp += min_cu * src_strd;
515
0
                    pu1_src_copy += min_cu * tmp_strd;
516
0
                }
517
0
            }
518
519
0
        }
520
521
0
    }
522
0
}
523
524
void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
525
116k
{
526
116k
    codec_t *ps_codec = ps_sao_ctxt->ps_codec;
527
116k
    UWORD8 *pu1_src_luma;
528
116k
    UWORD8 *pu1_src_chroma;
529
116k
    WORD32 src_strd;
530
116k
    WORD32 ctb_size;
531
116k
    WORD32 log2_ctb_size;
532
116k
    sps_t *ps_sps;
533
116k
    sao_t *ps_sao;
534
116k
    pps_t *ps_pps;
535
116k
    slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
536
116k
    tile_t *ps_tile;
537
116k
    UWORD16 *pu1_slice_idx;
538
116k
    UWORD16 *pu1_tile_idx;
539
116k
    WORD32 row, col;
540
116k
    UWORD8 au1_avail_luma[8];
541
116k
    UWORD8 au1_avail_chroma[8];
542
116k
    UWORD8 au1_tile_slice_boundary[8];
543
116k
    UWORD8 au4_ilf_across_tile_slice_enable[8];
544
116k
    WORD32 i;
545
116k
    UWORD8 *pu1_src_top_luma;
546
116k
    UWORD8 *pu1_src_top_chroma;
547
116k
    UWORD8 *pu1_src_left_luma;
548
116k
    UWORD8 *pu1_src_left_chroma;
549
116k
    UWORD8 au1_src_top_right[2];
550
116k
    UWORD8 au1_src_bot_left[2];
551
116k
    UWORD8 *pu1_no_loop_filter_flag;
552
116k
    UWORD8 *pu1_src_backup_luma;
553
116k
    UWORD8 *pu1_src_backup_chroma;
554
116k
    WORD32 backup_strd;
555
116k
    WORD32 loop_filter_strd;
556
557
116k
    WORD32 no_loop_filter_enabled_luma = 0;
558
116k
    WORD32 no_loop_filter_enabled_chroma = 0;
559
116k
    UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
560
116k
    UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
561
116k
    UWORD8 *pu1_sao_src_luma_top_left_ctb;
562
116k
    UWORD8 *pu1_sao_src_chroma_top_left_ctb;
563
116k
    UWORD8 *pu1_sao_src_top_left_luma_top_right;
564
116k
    UWORD8 *pu1_sao_src_top_left_chroma_top_right;
565
116k
    UWORD8  u1_sao_src_top_left_luma_bot_left;
566
116k
    UWORD8  *pu1_sao_src_top_left_luma_bot_left;
567
116k
    UWORD8 *au1_sao_src_top_left_chroma_bot_left;
568
116k
    UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
569
    /* Only first 5 values are used, but arrays are large
570
     enough so that SIMD functions can read 64 bits at a time */
571
116k
    WORD8 ai1_offset_y[8] = {0};
572
116k
    WORD8 ai1_offset_cb[8] = {0};
573
116k
    WORD8 ai1_offset_cr[8] = {0};
574
116k
    WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
575
576
116k
    PROFILE_DISABLE_SAO();
577
578
116k
    ps_sps = ps_sao_ctxt->ps_sps;
579
116k
    ps_pps = ps_sao_ctxt->ps_pps;
580
116k
    ps_tile = ps_sao_ctxt->ps_tile;
581
582
116k
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
583
116k
    ctb_size = (1 << log2_ctb_size);
584
116k
    src_strd = ps_sao_ctxt->ps_codec->i4_strd;
585
116k
    ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
586
116k
    ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
587
588
116k
    pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
589
116k
    pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
590
116k
    pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
591
116k
    pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
592
593
    /*Stores the left value for each row ctbs- Needed for column tiles*/
594
116k
    pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
595
116k
    pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
596
116k
    pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
597
116k
    pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
598
116k
    u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
599
116k
    pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
600
116k
    au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
601
116k
    pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
602
116k
    pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
603
116k
    pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
604
605
116k
    ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
606
116k
    loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
607
116k
    backup_strd = 2 * MAX_CTB_SIZE;
608
609
116k
    DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
610
611
116k
    {
612
        /* Check the loop filter flags and copy the original values for back up */
613
        /* Luma */
614
615
        /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
616
         * can belong to different slice with their own sao_enable flag */
617
116k
        {
618
116k
            UWORD32 u4_no_loop_filter_flag;
619
116k
            WORD32 loop_filter_bit_pos;
620
116k
            WORD32 log2_min_cu = 3;
621
116k
            WORD32 min_cu = (1 << log2_min_cu);
622
116k
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
623
116k
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
624
116k
            WORD32 sao_blk_wd = ctb_size;
625
116k
            WORD32 remaining_rows;
626
116k
            WORD32 remaining_cols;
627
628
116k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
629
116k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
630
116k
            if(remaining_rows <= SAO_SHIFT_CTB)
631
10.9k
                sao_blk_ht += remaining_rows;
632
116k
            if(remaining_cols <= SAO_SHIFT_CTB)
633
7.04k
                sao_blk_wd += remaining_cols;
634
635
116k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
636
116k
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
637
638
116k
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
639
640
116k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
641
116k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
642
116k
            if(ps_sao_ctxt->i4_ctb_x > 0)
643
109k
                loop_filter_bit_pos -= 1;
644
645
116k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
646
116k
                            (loop_filter_bit_pos >> 3);
647
648
116k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
649
976k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
650
860k
            {
651
860k
                WORD32 tmp_wd = sao_blk_wd;
652
653
860k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
654
860k
                                (loop_filter_bit_pos & 7);
655
860k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
656
657
860k
                if(u4_no_loop_filter_flag)
658
352
                {
659
352
                    no_loop_filter_enabled_luma = 1;
660
998
                    while(tmp_wd > 0)
661
646
                    {
662
646
                        if(CTZ(u4_no_loop_filter_flag))
663
272
                        {
664
272
                            pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
665
272
                            pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
666
272
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
667
272
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
668
272
                        }
669
374
                        else
670
374
                        {
671
3.35k
                            for(row = 0; row < min_cu; row++)
672
2.97k
                            {
673
75.6k
                                for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
674
72.6k
                                {
675
72.6k
                                    pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
676
72.6k
                                }
677
2.97k
                            }
678
374
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
679
374
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
680
374
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
681
374
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
682
374
                        }
683
646
                    }
684
685
352
                    pu1_src_tmp_luma -= sao_blk_wd;
686
352
                    pu1_src_backup_luma -= sao_blk_wd;
687
352
                }
688
689
860k
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
690
860k
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
691
860k
            }
692
116k
        }
693
694
        /* Chroma */
695
696
116k
        {
697
116k
            UWORD32 u4_no_loop_filter_flag;
698
116k
            WORD32 loop_filter_bit_pos;
699
116k
            WORD32 log2_min_cu = 3;
700
116k
            WORD32 min_cu = (1 << log2_min_cu);
701
116k
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
702
116k
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
703
116k
            WORD32 sao_blk_wd = ctb_size;
704
116k
            WORD32 remaining_rows;
705
116k
            WORD32 remaining_cols;
706
707
116k
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
708
116k
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
709
116k
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
710
10.9k
                sao_blk_ht += remaining_rows;
711
116k
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
712
7.04k
                sao_blk_wd += remaining_cols;
713
714
116k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
715
116k
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
716
717
116k
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
718
719
116k
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
720
116k
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
721
116k
            if(ps_sao_ctxt->i4_ctb_x > 0)
722
109k
                loop_filter_bit_pos -= 2;
723
724
116k
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
725
116k
                            (loop_filter_bit_pos >> 3);
726
727
116k
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
728
976k
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
729
860k
            {
730
860k
                WORD32 tmp_wd = sao_blk_wd;
731
732
860k
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
733
860k
                                (loop_filter_bit_pos & 7);
734
860k
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
735
736
860k
                if(u4_no_loop_filter_flag)
737
354
                {
738
354
                    no_loop_filter_enabled_chroma = 1;
739
998
                    while(tmp_wd > 0)
740
644
                    {
741
644
                        if(CTZ(u4_no_loop_filter_flag))
742
280
                        {
743
280
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
744
280
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
745
280
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
746
280
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
747
280
                        }
748
364
                        else
749
364
                        {
750
1.81k
                            for(row = 0; row < min_cu / 2; row++)
751
1.45k
                            {
752
39.8k
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
753
38.4k
                                {
754
38.4k
                                    pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
755
38.4k
                                }
756
1.45k
                            }
757
758
364
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
759
364
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
760
364
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
761
364
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
762
364
                        }
763
644
                    }
764
765
354
                    pu1_src_tmp_chroma -= sao_blk_wd;
766
354
                    pu1_src_backup_chroma -= sao_blk_wd;
767
354
                }
768
769
860k
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
770
860k
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
771
860k
            }
772
116k
        }
773
116k
    }
774
775
116k
    DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
776
777
    /* Top-left CTB */
778
116k
    if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
779
99.0k
    {
780
99.0k
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
781
99.0k
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
782
99.0k
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
783
99.0k
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
784
785
99.0k
        WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
786
99.0k
        WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
787
99.0k
        WORD32 au4_idx_tl[8], idx_tl;
788
789
99.0k
        slice_header_t *ps_slice_hdr_top_left;
790
99.0k
        {
791
99.0k
            WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
792
99.0k
                                        (ps_sao_ctxt->i4_ctb_x - 1);
793
99.0k
            ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
794
99.0k
        }
795
796
797
99.0k
        pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
798
99.0k
        pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
799
99.0k
        ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
800
99.0k
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
801
99.0k
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
802
99.0k
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
803
99.0k
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
804
805
99.0k
        if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
806
20.2k
        {
807
20.2k
            if(0 == ps_sao->b3_y_type_idx)
808
13.5k
            {
809
                /* Update left, top and top-left */
810
121k
                for(row = 0; row < sao_ht_luma; row++)
811
108k
                {
812
108k
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
813
108k
                }
814
13.5k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
815
816
13.5k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
817
818
819
13.5k
            }
820
821
6.76k
            else if(1 == ps_sao->b3_y_type_idx)
822
1.98k
            {
823
1.98k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
824
1.98k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
825
1.98k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
826
1.98k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
827
828
1.98k
                ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
829
1.98k
                                                                          src_strd,
830
1.98k
                                                                          pu1_src_left_luma,
831
1.98k
                                                                          pu1_src_top_luma,
832
1.98k
                                                                          pu1_sao_src_luma_top_left_ctb,
833
1.98k
                                                                          ps_sao->b5_y_band_pos,
834
1.98k
                                                                          ai1_offset_y,
835
1.98k
                                                                          sao_wd_luma,
836
1.98k
                                                                          sao_ht_luma
837
1.98k
                                                                         );
838
1.98k
            }
839
840
4.78k
            else // if(2 <= ps_sao->b3_y_type_idx)
841
4.78k
            {
842
4.78k
                ai1_offset_y[1] = ps_sao->b4_y_offset_1;
843
4.78k
                ai1_offset_y[2] = ps_sao->b4_y_offset_2;
844
4.78k
                ai1_offset_y[3] = ps_sao->b4_y_offset_3;
845
4.78k
                ai1_offset_y[4] = ps_sao->b4_y_offset_4;
846
847
43.0k
                for(i = 0; i < 8; i++)
848
38.2k
                {
849
38.2k
                    au1_avail_luma[i] = 255;
850
38.2k
                    au1_tile_slice_boundary[i] = 0;
851
38.2k
                    au4_idx_tl[i] = 0;
852
38.2k
                    au4_ilf_across_tile_slice_enable[i] = 1;
853
38.2k
                }
854
855
                /******************************************************************
856
                 * Derive the  Top-left CTB's neighbor pixel's slice indices.
857
                 *
858
                 *          TL_T
859
                 *       4  _2__5________
860
                 *     0   |    |       |
861
                 *    TL_L | TL | 1 TL_R|
862
                 *         |____|_______|____
863
                 *        6|TL_D|7      |    |
864
                 *         | 3  |       |    |
865
                 *         |____|_______|    |
866
                 *              |            |
867
                 *              |            |
868
                 *              |____________|
869
                 *
870
                 *****************************************************************/
871
872
                /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
873
4.78k
                {
874
4.78k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
875
4.35k
                    {
876
4.35k
                        {
877
                            /*Assuming that sao shift is uniform along x and y directions*/
878
4.35k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
879
0
                            {
880
0
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
881
0
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
882
0
                            }
883
4.35k
                            else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
884
4.36k
                            {
885
4.36k
                                ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
886
4.36k
                                ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
887
4.36k
                            }
888
4.35k
                            ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
889
4.35k
                            ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
890
891
4.35k
                            ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
892
4.35k
                            ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
893
894
4.35k
                            ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
895
4.35k
                            ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
896
897
4.35k
                            ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
898
4.35k
                            ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
899
4.35k
                        }
900
901
4.35k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
902
4.20k
                        {
903
                            /*Calculate slice indices for neighbor pixels*/
904
4.20k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
905
4.20k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
906
4.20k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
907
4.20k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
908
4.20k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
909
4.20k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
910
911
4.20k
                            if((0 == (1 << log2_ctb_size) - sao_wd_luma))
912
0
                            {
913
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
914
0
                                {
915
0
                                    au4_idx_tl[6] = -1;
916
0
                                    au4_idx_tl[4] = -1;
917
0
                                }
918
0
                                else
919
0
                                {
920
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
921
0
                                }
922
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
923
0
                                {
924
0
                                    au4_idx_tl[5] = -1;
925
0
                                    au4_idx_tl[4] = -1;
926
0
                                }
927
0
                                else
928
0
                                {
929
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
930
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
931
0
                                }
932
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
933
0
                            }
934
935
                            /* Verify that the neighbor ctbs dont cross pic boundary.
936
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
937
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
938
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
939
                             * the respective pixel's flags are checked
940
                             */
941
942
4.20k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
943
0
                            {
944
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
945
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
946
0
                            }
947
4.20k
                            else
948
4.20k
                            {
949
4.20k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
950
4.20k
                            }
951
4.20k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
952
0
                            {
953
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
954
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
955
0
                            }
956
4.20k
                            else
957
4.20k
                            {
958
4.20k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
959
4.20k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
960
4.20k
                            }
961
4.20k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
962
4.20k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
963
4.20k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
964
4.20k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
965
4.20k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
966
967
4.20k
                            if(au4_idx_tl[5] > idx_tl)
968
363
                            {
969
363
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
970
363
                            }
971
972
                            /*
973
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
974
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags.
975
                             * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
976
                             * the respective pixel's flags are checked
977
                             */
978
37.7k
                            for(i = 0; i < 8; i++)
979
33.5k
                            {
980
                                /*Sets the edges that lie on the slice/tile boundary*/
981
33.5k
                                if(au4_idx_tl[i] != idx_tl)
982
11.6k
                                {
983
11.6k
                                    au1_tile_slice_boundary[i] = 1;
984
11.6k
                                }
985
21.9k
                                else
986
21.9k
                                {
987
21.9k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
988
21.9k
                                }
989
33.5k
                            }
990
991
4.20k
                            ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
992
4.20k
                        }
993
994
4.35k
                        if(ps_pps->i1_tiles_enabled_flag)
995
159
                        {
996
                            /* Calculate availability flags at slice boundary */
997
159
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
998
159
                            {
999
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1000
159
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1001
15
                                {
1002
                                    /*Set the boundary arrays*/
1003
                                    /*Calculate tile indices for neighbor pixels*/
1004
15
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1005
15
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1006
15
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1007
15
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1008
15
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1009
15
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1010
1011
15
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1012
0
                                    {
1013
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1014
0
                                        {
1015
0
                                            au4_idx_tl[6] = -1;
1016
0
                                            au4_idx_tl[4] = -1;
1017
0
                                        }
1018
0
                                        else
1019
0
                                        {
1020
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1021
0
                                        }
1022
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1023
0
                                        {
1024
0
                                            au4_idx_tl[5] = -1;
1025
0
                                            au4_idx_tl[4] = -1;
1026
0
                                        }
1027
0
                                        else
1028
0
                                        {
1029
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1030
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1031
0
                                        }
1032
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1033
0
                                    }
1034
135
                                    for(i = 0; i < 8; i++)
1035
120
                                    {
1036
                                        /*Sets the edges that lie on the tile boundary*/
1037
120
                                        if(au4_idx_tl[i] != idx_tl)
1038
45
                                        {
1039
45
                                            au1_tile_slice_boundary[i] |= 1;
1040
45
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1041
45
                                        }
1042
120
                                    }
1043
15
                                }
1044
159
                            }
1045
159
                        }
1046
1047
1048
                        /*Set availability flags based on tile and slice boundaries*/
1049
39.2k
                        for(i = 0; i < 8; i++)
1050
34.8k
                        {
1051
                            /*Sets the edges that lie on the slice/tile boundary*/
1052
34.8k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1053
45
                            {
1054
45
                                au1_avail_luma[i] = 0;
1055
45
                            }
1056
34.8k
                        }
1057
4.35k
                    }
1058
4.78k
                }
1059
1060
4.78k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
1061
0
                {
1062
0
                    au1_avail_luma[0] = 0;
1063
0
                    au1_avail_luma[4] = 0;
1064
0
                    au1_avail_luma[6] = 0;
1065
0
                }
1066
1067
4.78k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1068
0
                {
1069
0
                    au1_avail_luma[1] = 0;
1070
0
                    au1_avail_luma[5] = 0;
1071
0
                    au1_avail_luma[7] = 0;
1072
0
                }
1073
                //y==1 case
1074
4.78k
                if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
1075
0
                {
1076
0
                    au1_avail_luma[2] = 0;
1077
0
                    au1_avail_luma[4] = 0;
1078
0
                    au1_avail_luma[5] = 0;
1079
0
                }
1080
4.78k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1081
0
                {
1082
0
                    au1_avail_luma[3] = 0;
1083
0
                    au1_avail_luma[6] = 0;
1084
0
                    au1_avail_luma[7] = 0;
1085
0
                }
1086
1087
4.78k
                {
1088
4.78k
                    au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
1089
4.78k
                    u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
1090
4.78k
                    ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1091
4.78k
                                                                      src_strd,
1092
4.78k
                                                                      pu1_src_left_luma,
1093
4.78k
                                                                      pu1_src_top_luma,
1094
4.78k
                                                                      pu1_sao_src_luma_top_left_ctb,
1095
4.78k
                                                                      au1_src_top_right,
1096
4.78k
                                                                      &u1_sao_src_top_left_luma_bot_left,
1097
4.78k
                                                                      au1_avail_luma,
1098
4.78k
                                                                      ai1_offset_y,
1099
4.78k
                                                                      sao_wd_luma,
1100
4.78k
                                                                      sao_ht_luma);
1101
4.78k
                }
1102
4.78k
            }
1103
1104
20.2k
        }
1105
78.8k
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1106
31.3k
        {
1107
            /* Update left, top and top-left */
1108
281k
            for(row = 0; row < sao_ht_luma; row++)
1109
250k
            {
1110
250k
                pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1111
250k
            }
1112
31.3k
            pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1113
1114
31.3k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1115
31.3k
        }
1116
1117
99.0k
        if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
1118
39.6k
        {
1119
39.6k
            if(0 == ps_sao->b3_cb_type_idx)
1120
35.9k
            {
1121
322k
                for(row = 0; row < sao_ht_chroma; row++)
1122
286k
                {
1123
286k
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1124
286k
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1125
286k
                }
1126
35.9k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1127
35.9k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1128
1129
35.9k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1130
1131
35.9k
            }
1132
1133
3.74k
            else if(1 == ps_sao->b3_cb_type_idx)
1134
1.67k
            {
1135
1.67k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1136
1.67k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1137
1.67k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1138
1.67k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1139
1140
1.67k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1141
1.67k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1142
1.67k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1143
1.67k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1144
1145
1.67k
                if(chroma_yuv420sp_vu)
1146
205
                {
1147
205
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1148
205
                                                                                src_strd,
1149
205
                                                                                pu1_src_left_chroma,
1150
205
                                                                                pu1_src_top_chroma,
1151
205
                                                                                pu1_sao_src_chroma_top_left_ctb,
1152
205
                                                                                ps_sao->b5_cr_band_pos,
1153
205
                                                                                ps_sao->b5_cb_band_pos,
1154
205
                                                                                ai1_offset_cr,
1155
205
                                                                                ai1_offset_cb,
1156
205
                                                                                sao_wd_chroma,
1157
205
                                                                                sao_ht_chroma
1158
205
                                                                               );
1159
205
                }
1160
1.46k
                else
1161
1.46k
                {
1162
1.46k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1163
1.46k
                                                                                src_strd,
1164
1.46k
                                                                                pu1_src_left_chroma,
1165
1.46k
                                                                                pu1_src_top_chroma,
1166
1.46k
                                                                                pu1_sao_src_chroma_top_left_ctb,
1167
1.46k
                                                                                ps_sao->b5_cb_band_pos,
1168
1.46k
                                                                                ps_sao->b5_cr_band_pos,
1169
1.46k
                                                                                ai1_offset_cb,
1170
1.46k
                                                                                ai1_offset_cr,
1171
1.46k
                                                                                sao_wd_chroma,
1172
1.46k
                                                                                sao_ht_chroma
1173
1.46k
                                                                               );
1174
1.46k
                }
1175
1.67k
            }
1176
1177
2.07k
            else // if(2 <= ps_sao->b3_cb_type_idx)
1178
2.07k
            {
1179
2.07k
                ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1180
2.07k
                ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1181
2.07k
                ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1182
2.07k
                ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1183
1184
2.07k
                ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1185
2.07k
                ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1186
2.07k
                ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1187
2.07k
                ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1188
18.6k
                for(i = 0; i < 8; i++)
1189
16.6k
                {
1190
16.6k
                    au1_avail_chroma[i] = 255;
1191
16.6k
                    au1_tile_slice_boundary[i] = 0;
1192
16.6k
                    au4_idx_tl[i] = 0;
1193
16.6k
                    au4_ilf_across_tile_slice_enable[i] = 1;
1194
16.6k
                }
1195
                /*In case of slices*/
1196
2.07k
                {
1197
2.07k
                    if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1198
1.75k
                    {
1199
1.75k
                        if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
1200
0
                        {
1201
0
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
1202
0
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
1203
0
                        }
1204
1.75k
                        else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
1205
1.75k
                        {
1206
1.75k
                            ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
1207
1.75k
                            ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
1208
1.75k
                        }
1209
1.75k
                        ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
1210
1.75k
                        ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
1211
1212
1.75k
                        ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
1213
1.75k
                        ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
1214
1215
1.75k
                        ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
1216
1.75k
                        ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
1217
1218
1.75k
                        ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
1219
1.75k
                        ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
1220
1221
1.75k
                        if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1222
1.63k
                        {
1223
1224
1.63k
                            idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1225
1.63k
                            au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1226
1.63k
                            au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1227
1.63k
                            au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1228
1.63k
                            au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1229
1.63k
                            au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1230
1231
1.63k
                            if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
1232
0
                            {
1233
0
                                if(ps_sao_ctxt->i4_ctb_x == 1)
1234
0
                                {
1235
0
                                    au4_idx_tl[6] = -1;
1236
0
                                    au4_idx_tl[4] = -1;
1237
0
                                }
1238
0
                                else
1239
0
                                {
1240
0
                                    au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1241
0
                                }
1242
0
                                if(ps_sao_ctxt->i4_ctb_y == 1)
1243
0
                                {
1244
0
                                    au4_idx_tl[5] = -1;
1245
0
                                    au4_idx_tl[4] = -1;
1246
0
                                }
1247
0
                                else
1248
0
                                {
1249
0
                                    au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1250
0
                                    au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1251
0
                                }
1252
0
                                au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1253
0
                            }
1254
1255
                            /* Verify that the neighbor ctbs don't cross pic boundary
1256
                             * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
1257
1.63k
                            if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
1258
0
                            {
1259
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
1260
0
                                au4_ilf_across_tile_slice_enable[6] = 0;
1261
0
                            }
1262
1.63k
                            else
1263
1.63k
                            {
1264
1.63k
                                au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1265
1.63k
                            }
1266
1.63k
                            if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
1267
0
                            {
1268
0
                                au4_ilf_across_tile_slice_enable[5] = 0;
1269
0
                                au4_ilf_across_tile_slice_enable[4] = 0;
1270
0
                            }
1271
1.63k
                            else
1272
1.63k
                            {
1273
1.63k
                                au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1274
1.63k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1275
1.63k
                            }
1276
1.63k
                            au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1277
1.63k
                            au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
1278
1.63k
                            au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1279
1.63k
                            au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1280
1.63k
                            au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1281
                            /*
1282
                             * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1283
                             * of the pixel having a greater address is checked. Accordingly, set the availability flags
1284
                             */
1285
14.7k
                            for(i = 0; i < 8; i++)
1286
13.0k
                            {
1287
                                /*Sets the edges that lie on the slice/tile boundary*/
1288
13.0k
                                if(au4_idx_tl[i] != idx_tl)
1289
3.91k
                                {
1290
3.91k
                                    au1_tile_slice_boundary[i] = 1;
1291
3.91k
                                }
1292
9.16k
                                else
1293
9.16k
                                {
1294
9.16k
                                    au4_ilf_across_tile_slice_enable[i] = 1;
1295
9.16k
                                }
1296
13.0k
                            }
1297
1298
                            /*Reset indices*/
1299
14.7k
                            for(i = 0; i < 8; i++)
1300
13.0k
                            {
1301
13.0k
                                au4_idx_tl[i] = 0;
1302
13.0k
                            }
1303
1.63k
                        }
1304
1.75k
                        if(ps_pps->i1_tiles_enabled_flag)
1305
119
                        {
1306
                            /* Calculate availability flags at slice boundary */
1307
119
                            if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1308
107
                            {
1309
                                /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1310
107
                                if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1311
17
                                {
1312
                                    /*Set the boundary arrays*/
1313
                                    /*Calculate tile indices for neighbor pixels*/
1314
17
                                    idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
1315
17
                                    au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
1316
17
                                    au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1317
17
                                    au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1318
17
                                    au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1319
17
                                    au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1320
1321
17
                                    if((0 == (1 << log2_ctb_size) - sao_wd_luma))
1322
0
                                    {
1323
0
                                        if(ps_sao_ctxt->i4_ctb_x == 1)
1324
0
                                        {
1325
0
                                            au4_idx_tl[6] = -1;
1326
0
                                            au4_idx_tl[4] = -1;
1327
0
                                        }
1328
0
                                        else
1329
0
                                        {
1330
0
                                            au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
1331
0
                                        }
1332
0
                                        if(ps_sao_ctxt->i4_ctb_y == 1)
1333
0
                                        {
1334
0
                                            au4_idx_tl[5] = -1;
1335
0
                                            au4_idx_tl[4] = -1;
1336
0
                                        }
1337
0
                                        else
1338
0
                                        {
1339
0
                                            au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
1340
0
                                            au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
1341
0
                                        }
1342
0
                                        au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
1343
0
                                    }
1344
153
                                    for(i = 0; i < 8; i++)
1345
136
                                    {
1346
                                        /*Sets the edges that lie on the tile boundary*/
1347
136
                                        if(au4_idx_tl[i] != idx_tl)
1348
53
                                        {
1349
53
                                            au1_tile_slice_boundary[i] |= 1;
1350
53
                                            au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
1351
53
                                        }
1352
136
                                    }
1353
17
                                }
1354
107
                            }
1355
119
                        }
1356
1357
15.7k
                        for(i = 0; i < 8; i++)
1358
14.0k
                        {
1359
                            /*Sets the edges that lie on the slice/tile boundary*/
1360
14.0k
                            if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1361
53
                            {
1362
53
                                au1_avail_chroma[i] = 0;
1363
53
                            }
1364
14.0k
                        }
1365
1.75k
                    }
1366
2.07k
                }
1367
1368
2.07k
                if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
1369
0
                {
1370
0
                    au1_avail_chroma[0] = 0;
1371
0
                    au1_avail_chroma[4] = 0;
1372
0
                    au1_avail_chroma[6] = 0;
1373
0
                }
1374
2.07k
                if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
1375
0
                {
1376
0
                    au1_avail_chroma[1] = 0;
1377
0
                    au1_avail_chroma[5] = 0;
1378
0
                    au1_avail_chroma[7] = 0;
1379
0
                }
1380
1381
2.07k
                if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1382
0
                {
1383
0
                    au1_avail_chroma[2] = 0;
1384
0
                    au1_avail_chroma[4] = 0;
1385
0
                    au1_avail_chroma[5] = 0;
1386
0
                }
1387
2.07k
                if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1388
0
                {
1389
0
                    au1_avail_chroma[3] = 0;
1390
0
                    au1_avail_chroma[6] = 0;
1391
0
                    au1_avail_chroma[7] = 0;
1392
0
                }
1393
1394
2.07k
                {
1395
2.07k
                    au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
1396
2.07k
                    au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
1397
2.07k
                    au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
1398
2.07k
                    au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
1399
2.07k
                    if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
1400
0
                    {
1401
0
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
1402
0
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
1403
0
                    }
1404
1405
2.07k
                    if(chroma_yuv420sp_vu)
1406
338
                    {
1407
338
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1408
338
                                                                             src_strd,
1409
338
                                                                             pu1_src_left_chroma,
1410
338
                                                                             pu1_src_top_chroma,
1411
338
                                                                             pu1_sao_src_chroma_top_left_ctb,
1412
338
                                                                             au1_src_top_right,
1413
338
                                                                             au1_sao_src_top_left_chroma_bot_left,
1414
338
                                                                             au1_avail_chroma,
1415
338
                                                                             ai1_offset_cr,
1416
338
                                                                             ai1_offset_cb,
1417
338
                                                                             sao_wd_chroma,
1418
338
                                                                             sao_ht_chroma);
1419
338
                    }
1420
1.73k
                    else
1421
1.73k
                    {
1422
1.73k
                        ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
1423
1.73k
                                                                             src_strd,
1424
1.73k
                                                                             pu1_src_left_chroma,
1425
1.73k
                                                                             pu1_src_top_chroma,
1426
1.73k
                                                                             pu1_sao_src_chroma_top_left_ctb,
1427
1.73k
                                                                             au1_src_top_right,
1428
1.73k
                                                                             au1_sao_src_top_left_chroma_bot_left,
1429
1.73k
                                                                             au1_avail_chroma,
1430
1.73k
                                                                             ai1_offset_cb,
1431
1.73k
                                                                             ai1_offset_cr,
1432
1.73k
                                                                             sao_wd_chroma,
1433
1.73k
                                                                             sao_ht_chroma);
1434
1.73k
                    }
1435
2.07k
                }
1436
2.07k
            }
1437
39.6k
        }
1438
59.4k
        else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1439
36.2k
        {
1440
326k
            for(row = 0; row < sao_ht_chroma; row++)
1441
289k
            {
1442
289k
                pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1443
289k
                pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1444
289k
            }
1445
36.2k
            pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1446
36.2k
            pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1447
1448
36.2k
            ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1449
36.2k
        }
1450
1451
99.0k
        pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
1452
99.0k
        pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
1453
99.0k
        ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
1454
99.0k
    }
1455
1456
1457
    /* Top CTB */
1458
116k
    if((ps_sao_ctxt->i4_ctb_y > 0))
1459
105k
    {
1460
105k
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
1461
105k
        WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
1462
105k
        WORD32 sao_ht_luma = SAO_SHIFT_CTB;
1463
105k
        WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
1464
1465
105k
        WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
1466
105k
        WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
1467
105k
        WORD32 au4_idx_t[8], idx_t;
1468
1469
105k
        WORD32 remaining_cols;
1470
1471
105k
        slice_header_t *ps_slice_hdr_top;
1472
105k
        {
1473
105k
            WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
1474
105k
                                        (ps_sao_ctxt->i4_ctb_x);
1475
105k
            ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
1476
105k
        }
1477
1478
105k
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
1479
105k
        if(remaining_cols <= SAO_SHIFT_CTB)
1480
6.35k
        {
1481
6.35k
            sao_wd_luma += remaining_cols;
1482
6.35k
        }
1483
105k
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
1484
105k
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
1485
6.35k
        {
1486
6.35k
            sao_wd_chroma += remaining_cols;
1487
6.35k
        }
1488
1489
105k
        pu1_src_luma -= (sao_ht_luma * src_strd);
1490
105k
        pu1_src_chroma -= (sao_ht_chroma * src_strd);
1491
105k
        ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
1492
105k
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1493
105k
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
1494
105k
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
1495
105k
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
1496
1497
105k
        if(0 != sao_wd_luma)
1498
105k
        {
1499
105k
            if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
1500
22.1k
            {
1501
22.1k
                if(0 == ps_sao->b3_y_type_idx)
1502
14.9k
                {
1503
                    /* Update left, top and top-left */
1504
134k
                    for(row = 0; row < sao_ht_luma; row++)
1505
119k
                    {
1506
119k
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1507
119k
                    }
1508
14.9k
                    pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1509
1510
14.9k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1511
1512
14.9k
                }
1513
1514
7.28k
                else if(1 == ps_sao->b3_y_type_idx)
1515
2.23k
                {
1516
2.23k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1517
2.23k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1518
2.23k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1519
2.23k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1520
1521
2.23k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
1522
2.23k
                                                                              src_strd,
1523
2.23k
                                                                              pu1_src_left_luma,
1524
2.23k
                                                                              pu1_src_top_luma,
1525
2.23k
                                                                              pu1_sao_src_luma_top_left_ctb,
1526
2.23k
                                                                              ps_sao->b5_y_band_pos,
1527
2.23k
                                                                              ai1_offset_y,
1528
2.23k
                                                                              sao_wd_luma,
1529
2.23k
                                                                              sao_ht_luma
1530
2.23k
                                                                             );
1531
2.23k
                }
1532
1533
5.04k
                else // if(2 <= ps_sao->b3_y_type_idx)
1534
5.04k
                {
1535
5.04k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
1536
5.04k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
1537
5.04k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
1538
5.04k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
1539
1540
5.04k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
1541
5.04k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
1542
5.04k
                    ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
1543
1544
45.4k
                    for(i = 0; i < 8; i++)
1545
40.3k
                    {
1546
1547
40.3k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1548
40.3k
                    }
1549
                    /******************************************************************
1550
                     * Derive the  Top-left CTB's neighbor pixel's slice indices.
1551
                     *
1552
                     *               T_T
1553
                     *          ____________
1554
                     *         |    |       |
1555
                     *         | T_L|  T    |T_R
1556
                     *         |    | ______|____
1557
                     *         |    |  T_D  |    |
1558
                     *         |    |       |    |
1559
                     *         |____|_______|    |
1560
                     *              |            |
1561
                     *              |            |
1562
                     *              |____________|
1563
                     *
1564
                     *****************************************************************/
1565
1566
                    /*In case of slices*/
1567
5.04k
                    {
1568
5.04k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1569
4.55k
                        {
1570
1571
4.55k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1572
4.55k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1573
1574
4.55k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1575
4.55k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1576
1577
4.55k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1578
4.55k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1579
1580
4.55k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1581
4.55k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1582
1583
4.55k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1584
4.55k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1585
1586
4.55k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1587
4.37k
                            {
1588
                                /*Calculate neighbor ctb slice indices*/
1589
4.37k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1590
592
                                {
1591
592
                                    au4_idx_t[0] = -1;
1592
592
                                    au4_idx_t[6] = -1;
1593
592
                                    au4_idx_t[4] = -1;
1594
592
                                }
1595
3.78k
                                else
1596
3.78k
                                {
1597
3.78k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1598
3.78k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1599
3.78k
                                }
1600
4.37k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1601
4.37k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1602
4.37k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1603
4.37k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1604
1605
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1606
4.37k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1607
592
                                {
1608
592
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1609
592
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1610
592
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1611
592
                                }
1612
3.78k
                                else
1613
3.78k
                                {
1614
3.78k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1615
3.78k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1616
3.78k
                                }
1617
1618
1619
1620
4.37k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1621
4.37k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1622
4.37k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1623
4.37k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1624
4.37k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1625
1626
4.37k
                                if(au4_idx_t[6] < idx_t)
1627
592
                                {
1628
592
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1629
592
                                }
1630
1631
                                /*
1632
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1633
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1634
                                 */
1635
1636
39.3k
                                for(i = 0; i < 8; i++)
1637
35.0k
                                {
1638
                                    /*Sets the edges that lie on the slice/tile boundary*/
1639
35.0k
                                    if(au4_idx_t[i] != idx_t)
1640
13.2k
                                    {
1641
13.2k
                                        au1_tile_slice_boundary[i] = 1;
1642
                                        /*Check for slice flag at such boundaries*/
1643
13.2k
                                    }
1644
21.7k
                                    else
1645
21.7k
                                    {
1646
21.7k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1647
21.7k
                                    }
1648
35.0k
                                }
1649
                                /*Reset indices*/
1650
39.3k
                                for(i = 0; i < 8; i++)
1651
35.0k
                                {
1652
35.0k
                                    au4_idx_t[i] = 0;
1653
35.0k
                                }
1654
4.37k
                            }
1655
1656
4.55k
                            if(ps_pps->i1_tiles_enabled_flag)
1657
182
                            {
1658
                                /* Calculate availability flags at slice boundary */
1659
182
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1660
173
                                {
1661
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1662
173
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1663
15
                                    {
1664
                                        /*Calculate neighbor ctb slice indices*/
1665
15
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1666
7
                                        {
1667
7
                                            au4_idx_t[0] = -1;
1668
7
                                            au4_idx_t[6] = -1;
1669
7
                                            au4_idx_t[4] = -1;
1670
7
                                        }
1671
8
                                        else
1672
8
                                        {
1673
8
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1674
8
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1675
8
                                        }
1676
15
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1677
15
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1678
15
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1679
15
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1680
1681
135
                                        for(i = 0; i < 8; i++)
1682
120
                                        {
1683
                                            /*Sets the edges that lie on the tile boundary*/
1684
120
                                            if(au4_idx_t[i] != idx_t)
1685
59
                                            {
1686
59
                                                au1_tile_slice_boundary[i] |= 1;
1687
59
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1688
59
                                            }
1689
120
                                        }
1690
15
                                    }
1691
173
                                }
1692
182
                            }
1693
1694
41.0k
                            for(i = 0; i < 8; i++)
1695
36.4k
                            {
1696
                                /*Sets the edges that lie on the slice/tile boundary*/
1697
36.4k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1698
1.24k
                                {
1699
1.24k
                                    au1_avail_luma[i] = 0;
1700
1.24k
                                }
1701
36.4k
                            }
1702
4.55k
                        }
1703
5.04k
                    }
1704
1705
1706
5.04k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1707
717
                    {
1708
717
                        au1_avail_luma[0] = 0;
1709
717
                        au1_avail_luma[4] = 0;
1710
717
                        au1_avail_luma[6] = 0;
1711
717
                    }
1712
1713
5.04k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
1714
265
                    {
1715
265
                        au1_avail_luma[1] = 0;
1716
265
                        au1_avail_luma[5] = 0;
1717
265
                        au1_avail_luma[7] = 0;
1718
265
                    }
1719
1720
5.04k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
1721
0
                    {
1722
0
                        au1_avail_luma[2] = 0;
1723
0
                        au1_avail_luma[4] = 0;
1724
0
                        au1_avail_luma[5] = 0;
1725
0
                    }
1726
1727
5.04k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
1728
0
                    {
1729
0
                        au1_avail_luma[3] = 0;
1730
0
                        au1_avail_luma[6] = 0;
1731
0
                        au1_avail_luma[7] = 0;
1732
0
                    }
1733
1734
5.04k
                    {
1735
5.04k
                        au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
1736
5.04k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
1737
5.04k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
1738
5.04k
                                                                          src_strd,
1739
5.04k
                                                                          pu1_src_left_luma,
1740
5.04k
                                                                          pu1_src_top_luma,
1741
5.04k
                                                                          pu1_sao_src_luma_top_left_ctb,
1742
5.04k
                                                                          au1_src_top_right,
1743
5.04k
                                                                          &u1_sao_src_top_left_luma_bot_left,
1744
5.04k
                                                                          au1_avail_luma,
1745
5.04k
                                                                          ai1_offset_y,
1746
5.04k
                                                                          sao_wd_luma,
1747
5.04k
                                                                          sao_ht_luma);
1748
5.04k
                    }
1749
5.04k
                }
1750
22.1k
            }
1751
83.1k
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1752
33.8k
            {
1753
                /* Update left, top and top-left */
1754
304k
                for(row = 0; row < sao_ht_luma; row++)
1755
270k
                {
1756
270k
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
1757
270k
                }
1758
33.8k
                pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
1759
1760
33.8k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
1761
33.8k
            }
1762
105k
        }
1763
1764
105k
        if(0 != sao_wd_chroma)
1765
105k
        {
1766
105k
            if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
1767
41.4k
            {
1768
41.4k
                if(0 == ps_sao->b3_cb_type_idx)
1769
37.3k
                {
1770
1771
336k
                    for(row = 0; row < sao_ht_chroma; row++)
1772
298k
                    {
1773
298k
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
1774
298k
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
1775
298k
                    }
1776
37.3k
                    pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
1777
37.3k
                    pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
1778
1779
37.3k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
1780
1781
37.3k
                }
1782
1783
4.10k
                else if(1 == ps_sao->b3_cb_type_idx)
1784
1.83k
                {
1785
1.83k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1786
1.83k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1787
1.83k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1788
1.83k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1789
1790
1.83k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1791
1.83k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1792
1.83k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1793
1.83k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1794
1795
1.83k
                    if(chroma_yuv420sp_vu)
1796
227
                    {
1797
227
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1798
227
                                                                                    src_strd,
1799
227
                                                                                    pu1_src_left_chroma,
1800
227
                                                                                    pu1_src_top_chroma,
1801
227
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1802
227
                                                                                    ps_sao->b5_cr_band_pos,
1803
227
                                                                                    ps_sao->b5_cb_band_pos,
1804
227
                                                                                    ai1_offset_cr,
1805
227
                                                                                    ai1_offset_cb,
1806
227
                                                                                    sao_wd_chroma,
1807
227
                                                                                    sao_ht_chroma
1808
227
                                                                                   );
1809
227
                    }
1810
1.61k
                    else
1811
1.61k
                    {
1812
1.61k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
1813
1.61k
                                                                                    src_strd,
1814
1.61k
                                                                                    pu1_src_left_chroma,
1815
1.61k
                                                                                    pu1_src_top_chroma,
1816
1.61k
                                                                                    pu1_sao_src_chroma_top_left_ctb,
1817
1.61k
                                                                                    ps_sao->b5_cb_band_pos,
1818
1.61k
                                                                                    ps_sao->b5_cr_band_pos,
1819
1.61k
                                                                                    ai1_offset_cb,
1820
1.61k
                                                                                    ai1_offset_cr,
1821
1.61k
                                                                                    sao_wd_chroma,
1822
1.61k
                                                                                    sao_ht_chroma
1823
1.61k
                                                                                   );
1824
1.61k
                    }
1825
1.83k
                }
1826
2.26k
                else // if(2 <= ps_sao->b3_cb_type_idx)
1827
2.26k
                {
1828
2.26k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
1829
2.26k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
1830
2.26k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
1831
2.26k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
1832
1833
2.26k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
1834
2.26k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
1835
2.26k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
1836
2.26k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
1837
1838
20.3k
                    for(i = 0; i < 8; i++)
1839
18.1k
                    {
1840
18.1k
                        au1_avail_chroma[i] = 255;
1841
18.1k
                        au1_tile_slice_boundary[i] = 0;
1842
18.1k
                        au4_idx_t[i] = 0;
1843
18.1k
                        au4_ilf_across_tile_slice_enable[i] = 1;
1844
18.1k
                    }
1845
1846
2.26k
                    {
1847
2.26k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
1848
1.90k
                        {
1849
1.90k
                            ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
1850
1.90k
                            ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
1851
1852
1.90k
                            ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
1853
1.90k
                            ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
1854
1855
1.90k
                            ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
1856
1.90k
                            ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
1857
1858
1.90k
                            ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
1859
1.90k
                            ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
1860
1861
1.90k
                            ctbx_t = ps_sao_ctxt->i4_ctb_x;
1862
1.90k
                            ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
1863
1864
1.90k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
1865
1.76k
                            {
1866
1.76k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1867
226
                                {
1868
226
                                    au4_idx_t[0] = -1;
1869
226
                                    au4_idx_t[6] = -1;
1870
226
                                    au4_idx_t[4] = -1;
1871
226
                                }
1872
1.53k
                                else
1873
1.53k
                                {
1874
1.53k
                                    au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1875
1.53k
                                    au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1876
1.53k
                                }
1877
1.76k
                                idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1878
1.76k
                                au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1879
1.76k
                                au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1880
1.76k
                                au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1881
1882
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
1883
1884
1.76k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
1885
226
                                {
1886
226
                                    au4_ilf_across_tile_slice_enable[4] = 0;
1887
226
                                    au4_ilf_across_tile_slice_enable[6] = 0;
1888
226
                                    au4_ilf_across_tile_slice_enable[0] = 0;
1889
226
                                }
1890
1.53k
                                else
1891
1.53k
                                {
1892
1.53k
                                    au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1893
1.53k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
1894
1.53k
                                }
1895
1896
1.76k
                                au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
1897
1.76k
                                au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1898
1.76k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
1899
1.76k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
1900
1.76k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
1901
1902
1.76k
                                if(idx_t > au4_idx_t[6])
1903
226
                                {
1904
226
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
1905
226
                                }
1906
1907
                                /*
1908
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
1909
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
1910
                                 */
1911
15.8k
                                for(i = 0; i < 8; i++)
1912
14.1k
                                {
1913
                                    /*Sets the edges that lie on the slice/tile boundary*/
1914
14.1k
                                    if(au4_idx_t[i] != idx_t)
1915
4.60k
                                    {
1916
4.60k
                                        au1_tile_slice_boundary[i] = 1;
1917
4.60k
                                    }
1918
9.52k
                                    else
1919
9.52k
                                    {
1920
                                        /*Indicates that the neighbour belongs to same/dependent slice*/
1921
9.52k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
1922
9.52k
                                    }
1923
14.1k
                                }
1924
                                /*Reset indices*/
1925
15.8k
                                for(i = 0; i < 8; i++)
1926
14.1k
                                {
1927
14.1k
                                    au4_idx_t[i] = 0;
1928
14.1k
                                }
1929
1.76k
                            }
1930
1.90k
                            if(ps_pps->i1_tiles_enabled_flag)
1931
143
                            {
1932
                                /* Calculate availability flags at slice boundary */
1933
143
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
1934
126
                                {
1935
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
1936
126
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
1937
24
                                    {
1938
                                        /*Calculate neighbor ctb slice indices*/
1939
24
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
1940
8
                                        {
1941
8
                                            au4_idx_t[0] = -1;
1942
8
                                            au4_idx_t[6] = -1;
1943
8
                                            au4_idx_t[4] = -1;
1944
8
                                        }
1945
16
                                        else
1946
16
                                        {
1947
16
                                            au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
1948
16
                                            au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1949
16
                                        }
1950
24
                                        idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
1951
24
                                        au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
1952
24
                                        au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
1953
24
                                        au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
1954
1955
216
                                        for(i = 0; i < 8; i++)
1956
192
                                        {
1957
                                            /*Sets the edges that lie on the tile boundary*/
1958
192
                                            if(au4_idx_t[i] != idx_t)
1959
90
                                            {
1960
90
                                                au1_tile_slice_boundary[i] |= 1;
1961
90
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
1962
90
                                            }
1963
192
                                        }
1964
24
                                    }
1965
126
                                }
1966
143
                            }
1967
17.1k
                            for(i = 0; i < 8; i++)
1968
15.2k
                            {
1969
                                /*Sets the edges that lie on the slice/tile boundary*/
1970
15.2k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
1971
542
                                {
1972
542
                                    au1_avail_chroma[i] = 0;
1973
542
                                }
1974
15.2k
                            }
1975
1976
1.90k
                        }
1977
2.26k
                    }
1978
2.26k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
1979
300
                    {
1980
300
                        au1_avail_chroma[0] = 0;
1981
300
                        au1_avail_chroma[4] = 0;
1982
300
                        au1_avail_chroma[6] = 0;
1983
300
                    }
1984
1985
2.26k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
1986
191
                    {
1987
191
                        au1_avail_chroma[1] = 0;
1988
191
                        au1_avail_chroma[5] = 0;
1989
191
                        au1_avail_chroma[7] = 0;
1990
191
                    }
1991
1992
2.26k
                    if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
1993
0
                    {
1994
0
                        au1_avail_chroma[2] = 0;
1995
0
                        au1_avail_chroma[4] = 0;
1996
0
                        au1_avail_chroma[5] = 0;
1997
0
                    }
1998
1999
2.26k
                    if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
2000
0
                    {
2001
0
                        au1_avail_chroma[3] = 0;
2002
0
                        au1_avail_chroma[6] = 0;
2003
0
                        au1_avail_chroma[7] = 0;
2004
0
                    }
2005
2006
2.26k
                    {
2007
2.26k
                        au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
2008
2.26k
                        au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
2009
2.26k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2010
2.26k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2011
2012
2.26k
                        if(chroma_yuv420sp_vu)
2013
359
                        {
2014
359
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2015
359
                                                                                 src_strd,
2016
359
                                                                                 pu1_src_left_chroma,
2017
359
                                                                                 pu1_src_top_chroma,
2018
359
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2019
359
                                                                                 au1_src_top_right,
2020
359
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2021
359
                                                                                 au1_avail_chroma,
2022
359
                                                                                 ai1_offset_cr,
2023
359
                                                                                 ai1_offset_cb,
2024
359
                                                                                 sao_wd_chroma,
2025
359
                                                                                 sao_ht_chroma);
2026
359
                        }
2027
1.90k
                        else
2028
1.90k
                        {
2029
1.90k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2030
1.90k
                                                                                 src_strd,
2031
1.90k
                                                                                 pu1_src_left_chroma,
2032
1.90k
                                                                                 pu1_src_top_chroma,
2033
1.90k
                                                                                 pu1_sao_src_chroma_top_left_ctb,
2034
1.90k
                                                                                 au1_src_top_right,
2035
1.90k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
2036
1.90k
                                                                                 au1_avail_chroma,
2037
1.90k
                                                                                 ai1_offset_cb,
2038
1.90k
                                                                                 ai1_offset_cr,
2039
1.90k
                                                                                 sao_wd_chroma,
2040
1.90k
                                                                                 sao_ht_chroma);
2041
1.90k
                        }
2042
2.26k
                    }
2043
2044
2.26k
                }
2045
41.4k
            }
2046
63.8k
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2047
39.2k
            {
2048
352k
                for(row = 0; row < sao_ht_chroma; row++)
2049
313k
                {
2050
313k
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2051
313k
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2052
313k
                }
2053
39.2k
                pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2054
39.2k
                pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2055
2056
39.2k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2057
39.2k
            }
2058
105k
        }
2059
2060
105k
        pu1_src_luma += sao_ht_luma * src_strd;
2061
105k
        pu1_src_chroma += sao_ht_chroma * src_strd;
2062
105k
        ps_sao += (ps_sps->i2_pic_wd_in_ctb);
2063
105k
    }
2064
2065
    /* Left CTB */
2066
116k
    if(ps_sao_ctxt->i4_ctb_x > 0)
2067
109k
    {
2068
109k
        WORD32 sao_wd_luma = SAO_SHIFT_CTB;
2069
109k
        WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
2070
109k
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2071
109k
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2072
2073
109k
        WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
2074
109k
        WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
2075
109k
        WORD32 au4_idx_l[8], idx_l;
2076
2077
109k
        WORD32 remaining_rows;
2078
109k
        slice_header_t *ps_slice_hdr_left;
2079
109k
        {
2080
109k
            WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
2081
109k
                                        (ps_sao_ctxt->i4_ctb_x - 1);
2082
109k
            ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
2083
109k
        }
2084
2085
109k
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2086
109k
        if(remaining_rows <= SAO_SHIFT_CTB)
2087
10.2k
        {
2088
10.2k
            sao_ht_luma += remaining_rows;
2089
10.2k
        }
2090
109k
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2091
109k
        if(remaining_rows <= SAO_SHIFT_CTB)
2092
10.2k
        {
2093
10.2k
            sao_ht_chroma += remaining_rows;
2094
10.2k
        }
2095
2096
109k
        pu1_src_luma -= sao_wd_luma;
2097
109k
        pu1_src_chroma -= sao_wd_chroma;
2098
109k
        ps_sao -= 1;
2099
109k
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
2100
109k
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
2101
109k
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2102
109k
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2103
2104
2105
109k
        if(0 != sao_ht_luma)
2106
109k
        {
2107
109k
            if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
2108
23.1k
            {
2109
23.1k
                if(0 == ps_sao->b3_y_type_idx)
2110
15.8k
                {
2111
                    /* Update left, top and top-left */
2112
848k
                    for(row = 0; row < sao_ht_luma; row++)
2113
832k
                    {
2114
832k
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2115
832k
                    }
2116
                    /*Update in next location*/
2117
15.8k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2118
2119
15.8k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2120
2121
15.8k
                }
2122
2123
7.26k
                else if(1 == ps_sao->b3_y_type_idx)
2124
2.13k
                {
2125
2.13k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2126
2.13k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2127
2.13k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2128
2.13k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2129
2130
2.13k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2131
2.13k
                                                                              src_strd,
2132
2.13k
                                                                              pu1_src_left_luma,
2133
2.13k
                                                                              pu1_src_top_luma,
2134
2.13k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2135
2.13k
                                                                              ps_sao->b5_y_band_pos,
2136
2.13k
                                                                              ai1_offset_y,
2137
2.13k
                                                                              sao_wd_luma,
2138
2.13k
                                                                              sao_ht_luma
2139
2.13k
                                                                             );
2140
2.13k
                }
2141
2142
5.12k
                else // if(2 <= ps_sao->b3_y_type_idx)
2143
5.12k
                {
2144
5.12k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2145
5.12k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2146
5.12k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2147
5.12k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2148
2149
46.1k
                    for(i = 0; i < 8; i++)
2150
41.0k
                    {
2151
41.0k
                        au1_avail_luma[i] = 255;
2152
41.0k
                        au1_tile_slice_boundary[i] = 0;
2153
41.0k
                        au4_idx_l[i] = 0;
2154
41.0k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2155
41.0k
                    }
2156
                    /******************************************************************
2157
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2158
                     *
2159
                     *
2160
                     *          ____________
2161
                     *         |    |       |
2162
                     *         | L_T|       |
2163
                     *         |____|_______|____
2164
                     *         |    |       |    |
2165
                     *     L_L |  L |  L_R  |    |
2166
                     *         |____|_______|    |
2167
                     *              |            |
2168
                     *          L_D |            |
2169
                     *              |____________|
2170
                     *
2171
                     *****************************************************************/
2172
2173
                    /*In case of slices or tiles*/
2174
5.12k
                    {
2175
5.12k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2176
4.08k
                        {
2177
4.08k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2178
4.08k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2179
2180
4.08k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2181
4.08k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2182
2183
4.08k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2184
4.08k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2185
2186
4.08k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2187
4.08k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2188
2189
4.08k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2190
4.08k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2191
2192
4.08k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2193
3.90k
                            {
2194
3.90k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2195
107
                                {
2196
107
                                    au4_idx_l[2] = -1;
2197
107
                                    au4_idx_l[4] = -1;
2198
107
                                    au4_idx_l[5] = -1;
2199
107
                                }
2200
3.79k
                                else
2201
3.79k
                                {
2202
3.79k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2203
3.79k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2204
3.79k
                                }
2205
3.90k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2206
3.90k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2207
3.90k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2208
3.90k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2209
2210
                                /*Verify that the neighbor ctbs don't cross pic boundary.*/
2211
3.90k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2212
107
                                {
2213
107
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2214
107
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2215
107
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2216
107
                                }
2217
3.79k
                                else
2218
3.79k
                                {
2219
3.79k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2220
3.79k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2221
2222
3.79k
                                }
2223
                                //TODO: ILF flag checks for [0] and [6] is missing.
2224
3.90k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2225
3.90k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2226
3.90k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2227
2228
3.90k
                                if(idx_l < au4_idx_l[5])
2229
0
                                {
2230
0
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2231
0
                                }
2232
2233
                                /*
2234
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2235
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2236
                                 */
2237
35.1k
                                for(i = 0; i < 8; i++)
2238
31.2k
                                {
2239
                                    /*Sets the edges that lie on the slice/tile boundary*/
2240
31.2k
                                    if(au4_idx_l[i] != idx_l)
2241
11.4k
                                    {
2242
11.4k
                                        au1_tile_slice_boundary[i] = 1;
2243
11.4k
                                    }
2244
19.7k
                                    else
2245
19.7k
                                    {
2246
19.7k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2247
19.7k
                                    }
2248
31.2k
                                }
2249
                                /*Reset indices*/
2250
35.1k
                                for(i = 0; i < 8; i++)
2251
31.2k
                                {
2252
31.2k
                                    au4_idx_l[i] = 0;
2253
31.2k
                                }
2254
3.90k
                            }
2255
2256
4.08k
                            if(ps_pps->i1_tiles_enabled_flag)
2257
179
                            {
2258
                                /* Calculate availability flags at slice boundary */
2259
179
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2260
128
                                {
2261
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2262
128
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2263
0
                                    {
2264
0
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2265
0
                                        {
2266
0
                                            au4_idx_l[2] = -1;
2267
0
                                            au4_idx_l[4] = -1;
2268
0
                                            au4_idx_l[5] = -1;
2269
0
                                        }
2270
0
                                        else
2271
0
                                        {
2272
0
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2273
0
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2274
0
                                        }
2275
2276
0
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2277
0
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2278
0
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2279
0
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2280
2281
0
                                        for(i = 0; i < 8; i++)
2282
0
                                        {
2283
                                            /*Sets the edges that lie on the slice/tile boundary*/
2284
0
                                            if(au4_idx_l[i] != idx_l)
2285
0
                                            {
2286
0
                                                au1_tile_slice_boundary[i] |= 1;
2287
0
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
2288
0
                                            }
2289
0
                                        }
2290
0
                                    }
2291
128
                                }
2292
179
                            }
2293
2294
36.7k
                            for(i = 0; i < 8; i++)
2295
32.6k
                            {
2296
                                /*Sets the edges that lie on the slice/tile boundary*/
2297
32.6k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2298
321
                                {
2299
321
                                    au1_avail_luma[i] = 0;
2300
321
                                }
2301
32.6k
                            }
2302
4.08k
                        }
2303
5.12k
                    }
2304
5.12k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
2305
0
                    {
2306
0
                        au1_avail_luma[0] = 0;
2307
0
                        au1_avail_luma[4] = 0;
2308
0
                        au1_avail_luma[6] = 0;
2309
0
                    }
2310
5.12k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2311
0
                    {
2312
0
                        au1_avail_luma[1] = 0;
2313
0
                        au1_avail_luma[5] = 0;
2314
0
                        au1_avail_luma[7] = 0;
2315
0
                    }
2316
2317
5.12k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2318
925
                    {
2319
925
                        au1_avail_luma[2] = 0;
2320
925
                        au1_avail_luma[4] = 0;
2321
925
                        au1_avail_luma[5] = 0;
2322
925
                    }
2323
2324
5.12k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2325
344
                    {
2326
344
                        au1_avail_luma[3] = 0;
2327
344
                        au1_avail_luma[6] = 0;
2328
344
                        au1_avail_luma[7] = 0;
2329
344
                    }
2330
2331
5.12k
                    {
2332
5.12k
                        au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
2333
5.12k
                        u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
2334
5.12k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2335
5.12k
                                                                          src_strd,
2336
5.12k
                                                                          pu1_src_left_luma,
2337
5.12k
                                                                          pu1_src_top_luma,
2338
5.12k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2339
5.12k
                                                                          au1_src_top_right,
2340
5.12k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2341
5.12k
                                                                          au1_avail_luma,
2342
5.12k
                                                                          ai1_offset_y,
2343
5.12k
                                                                          sao_wd_luma,
2344
5.12k
                                                                          sao_ht_luma);
2345
5.12k
                    }
2346
2347
5.12k
                }
2348
23.1k
            }
2349
86.1k
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2350
32.5k
            {
2351
                /* Update left, top and top-left */
2352
1.74M
                for(row = 0; row < sao_ht_luma; row++)
2353
1.71M
                {
2354
1.71M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2355
1.71M
                }
2356
                /*Update in next location*/
2357
32.5k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2358
2359
32.5k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2360
32.5k
            }
2361
109k
        }
2362
2363
109k
        if(0 != sao_ht_chroma)
2364
109k
        {
2365
109k
            if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
2366
42.4k
            {
2367
42.4k
                if(0 == ps_sao->b3_cb_type_idx)
2368
38.5k
                {
2369
934k
                    for(row = 0; row < sao_ht_chroma; row++)
2370
895k
                    {
2371
895k
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2372
895k
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2373
895k
                    }
2374
38.5k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2375
38.5k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2376
2377
38.5k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2378
38.5k
                }
2379
2380
3.98k
                else if(1 == ps_sao->b3_cb_type_idx)
2381
1.77k
                {
2382
1.77k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2383
1.77k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2384
1.77k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2385
1.77k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2386
2387
1.77k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2388
1.77k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2389
1.77k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2390
1.77k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2391
2392
1.77k
                    if(chroma_yuv420sp_vu)
2393
237
                    {
2394
237
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2395
237
                                                                                    src_strd,
2396
237
                                                                                    pu1_src_left_chroma,
2397
237
                                                                                    pu1_src_top_chroma,
2398
237
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2399
237
                                                                                    ps_sao->b5_cr_band_pos,
2400
237
                                                                                    ps_sao->b5_cb_band_pos,
2401
237
                                                                                    ai1_offset_cr,
2402
237
                                                                                    ai1_offset_cb,
2403
237
                                                                                    sao_wd_chroma,
2404
237
                                                                                    sao_ht_chroma
2405
237
                                                                                   );
2406
237
                    }
2407
1.54k
                    else
2408
1.54k
                    {
2409
1.54k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
2410
1.54k
                                                                                    src_strd,
2411
1.54k
                                                                                    pu1_src_left_chroma,
2412
1.54k
                                                                                    pu1_src_top_chroma,
2413
1.54k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
2414
1.54k
                                                                                    ps_sao->b5_cb_band_pos,
2415
1.54k
                                                                                    ps_sao->b5_cr_band_pos,
2416
1.54k
                                                                                    ai1_offset_cb,
2417
1.54k
                                                                                    ai1_offset_cr,
2418
1.54k
                                                                                    sao_wd_chroma,
2419
1.54k
                                                                                    sao_ht_chroma
2420
1.54k
                                                                                   );
2421
1.54k
                    }
2422
1.77k
                }
2423
2424
2.20k
                else // if(2 <= ps_sao->b3_cb_type_idx)
2425
2.20k
                {
2426
2.20k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
2427
2.20k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
2428
2.20k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
2429
2.20k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
2430
2431
2.20k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
2432
2.20k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
2433
2.20k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
2434
2.20k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
2435
2436
19.8k
                    for(i = 0; i < 8; i++)
2437
17.6k
                    {
2438
17.6k
                        au1_avail_chroma[i] = 255;
2439
17.6k
                        au1_tile_slice_boundary[i] = 0;
2440
17.6k
                        au4_idx_l[i] = 0;
2441
17.6k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2442
17.6k
                    }
2443
                    /*In case of slices*/
2444
2.20k
                    {
2445
2.20k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2446
1.71k
                        {
2447
1.71k
                            ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
2448
1.71k
                            ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
2449
2450
1.71k
                            ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
2451
1.71k
                            ctby_l_l = ps_sao_ctxt->i4_ctb_y;
2452
2453
1.71k
                            ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
2454
1.71k
                            ctby_l_r = ps_sao_ctxt->i4_ctb_y;
2455
2456
1.71k
                            ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
2457
1.71k
                            ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
2458
2459
1.71k
                            ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
2460
1.71k
                            ctby_l = ps_sao_ctxt->i4_ctb_y;
2461
2462
1.71k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2463
1.56k
                            {
2464
1.56k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2465
6
                                {
2466
6
                                    au4_idx_l[2] = -1;
2467
6
                                    au4_idx_l[4] = -1;
2468
6
                                    au4_idx_l[5] = -1;
2469
6
                                }
2470
1.55k
                                else
2471
1.55k
                                {
2472
1.55k
                                    au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2473
1.55k
                                    au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2474
1.55k
                                }
2475
1.56k
                                idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2476
1.56k
                                au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2477
1.56k
                                au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2478
1.56k
                                au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2479
2480
                                /*Verify that the neighbour ctbs dont cross pic boundary.*/
2481
1.56k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2482
6
                                {
2483
6
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2484
6
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2485
6
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2486
6
                                }
2487
1.55k
                                else
2488
1.55k
                                {
2489
1.55k
                                    au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2490
1.55k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2491
1.55k
                                }
2492
2493
1.56k
                                if(au4_idx_l[5] > idx_l)
2494
0
                                {
2495
0
                                    au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
2496
0
                                }
2497
2498
                                //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
2499
1.56k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2500
1.56k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2501
1.56k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2502
                                /*
2503
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2504
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2505
                                 */
2506
14.0k
                                for(i = 0; i < 8; i++)
2507
12.5k
                                {
2508
                                    /*Sets the edges that lie on the slice/tile boundary*/
2509
12.5k
                                    if(au4_idx_l[i] != idx_l)
2510
4.04k
                                    {
2511
4.04k
                                        au1_tile_slice_boundary[i] = 1;
2512
4.04k
                                    }
2513
8.47k
                                    else
2514
8.47k
                                    {
2515
8.47k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2516
8.47k
                                    }
2517
12.5k
                                }
2518
                                /*Reset indices*/
2519
14.0k
                                for(i = 0; i < 8; i++)
2520
12.5k
                                {
2521
12.5k
                                    au4_idx_l[i] = 0;
2522
12.5k
                                }
2523
1.56k
                            }
2524
1.71k
                            if(ps_pps->i1_tiles_enabled_flag)
2525
152
                            {
2526
                                /* Calculate availability flags at slice boundary */
2527
152
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2528
99
                                {
2529
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2530
99
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2531
3
                                    {
2532
3
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2533
1
                                        {
2534
1
                                            au4_idx_l[2] = -1;
2535
1
                                            au4_idx_l[4] = -1;
2536
1
                                            au4_idx_l[5] = -1;
2537
1
                                        }
2538
2
                                        else
2539
2
                                        {
2540
2
                                            au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
2541
2
                                            au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
2542
2
                                        }
2543
2544
3
                                        idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
2545
3
                                        au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
2546
3
                                        au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
2547
3
                                        au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
2548
2549
27
                                        for(i = 0; i < 8; i++)
2550
24
                                        {
2551
                                            /*Sets the edges that lie on the slice/tile boundary*/
2552
24
                                            if(au4_idx_l[i] != idx_l)
2553
11
                                            {
2554
11
                                                au1_tile_slice_boundary[i] |= 1;
2555
11
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2556
11
                                            }
2557
24
                                        }
2558
3
                                    }
2559
99
                                }
2560
152
                            }
2561
15.4k
                            for(i = 0; i < 8; i++)
2562
13.7k
                            {
2563
                                /*Sets the edges that lie on the slice/tile boundary*/
2564
13.7k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2565
29
                                {
2566
29
                                    au1_avail_chroma[i] = 0;
2567
29
                                }
2568
13.7k
                            }
2569
1.71k
                        }
2570
2.20k
                    }
2571
2.20k
                    if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
2572
0
                    {
2573
0
                        au1_avail_chroma[0] = 0;
2574
0
                        au1_avail_chroma[4] = 0;
2575
0
                        au1_avail_chroma[6] = 0;
2576
0
                    }
2577
2578
2.20k
                    if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
2579
0
                    {
2580
0
                        au1_avail_chroma[1] = 0;
2581
0
                        au1_avail_chroma[5] = 0;
2582
0
                        au1_avail_chroma[7] = 0;
2583
0
                    }
2584
2585
2.20k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2586
285
                    {
2587
285
                        au1_avail_chroma[2] = 0;
2588
285
                        au1_avail_chroma[4] = 0;
2589
285
                        au1_avail_chroma[5] = 0;
2590
285
                    }
2591
2592
2.20k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
2593
128
                    {
2594
128
                        au1_avail_chroma[3] = 0;
2595
128
                        au1_avail_chroma[6] = 0;
2596
128
                        au1_avail_chroma[7] = 0;
2597
128
                    }
2598
2599
2.20k
                    {
2600
2.20k
                        au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
2601
2.20k
                        au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
2602
2.20k
                        au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
2603
2.20k
                        au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
2604
                        //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
2605
                        //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
2606
2.20k
                        if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
2607
0
                        {
2608
0
                            au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
2609
0
                            au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
2610
0
                        }
2611
2612
2613
2.20k
                        if(chroma_yuv420sp_vu)
2614
362
                        {
2615
362
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2616
362
                                                                                 src_strd,
2617
362
                                                                                 pu1_src_left_chroma,
2618
362
                                                                                 pu1_src_top_chroma,
2619
362
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2620
362
                                                                                 au1_src_top_right,
2621
362
                                                                                 au1_src_bot_left,
2622
362
                                                                                 au1_avail_chroma,
2623
362
                                                                                 ai1_offset_cr,
2624
362
                                                                                 ai1_offset_cb,
2625
362
                                                                                 sao_wd_chroma,
2626
362
                                                                                 sao_ht_chroma);
2627
362
                        }
2628
1.84k
                        else
2629
1.84k
                        {
2630
1.84k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
2631
1.84k
                                                                                 src_strd,
2632
1.84k
                                                                                 pu1_src_left_chroma,
2633
1.84k
                                                                                 pu1_src_top_chroma,
2634
1.84k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
2635
1.84k
                                                                                 au1_src_top_right,
2636
1.84k
                                                                                 au1_src_bot_left,
2637
1.84k
                                                                                 au1_avail_chroma,
2638
1.84k
                                                                                 ai1_offset_cb,
2639
1.84k
                                                                                 ai1_offset_cr,
2640
1.84k
                                                                                 sao_wd_chroma,
2641
1.84k
                                                                                 sao_ht_chroma);
2642
1.84k
                        }
2643
2.20k
                    }
2644
2645
2.20k
                }
2646
42.4k
            }
2647
66.7k
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2648
37.6k
            {
2649
900k
                for(row = 0; row < sao_ht_chroma; row++)
2650
863k
                {
2651
863k
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
2652
863k
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
2653
863k
                }
2654
37.6k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
2655
37.6k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
2656
2657
37.6k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
2658
37.6k
            }
2659
2660
109k
        }
2661
109k
        pu1_src_luma += sao_wd_luma;
2662
109k
        pu1_src_chroma += sao_wd_chroma;
2663
109k
        ps_sao += 1;
2664
109k
    }
2665
2666
2667
    /* Current CTB */
2668
116k
    {
2669
116k
        WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
2670
116k
        WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
2671
116k
        WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
2672
116k
        WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
2673
116k
        WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
2674
116k
        WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
2675
116k
        WORD32 au4_idx_c[8], idx_c;
2676
2677
116k
        WORD32 remaining_rows;
2678
116k
        WORD32 remaining_cols;
2679
2680
116k
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
2681
116k
        if(remaining_cols <= SAO_SHIFT_CTB)
2682
7.04k
        {
2683
7.04k
            sao_wd_luma += remaining_cols;
2684
7.04k
        }
2685
116k
        remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
2686
116k
        if(remaining_cols <= 2 * SAO_SHIFT_CTB)
2687
7.04k
        {
2688
7.04k
            sao_wd_chroma += remaining_cols;
2689
7.04k
        }
2690
2691
116k
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
2692
116k
        if(remaining_rows <= SAO_SHIFT_CTB)
2693
10.9k
        {
2694
10.9k
            sao_ht_luma += remaining_rows;
2695
10.9k
        }
2696
116k
        remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
2697
116k
        if(remaining_rows <= SAO_SHIFT_CTB)
2698
10.9k
        {
2699
10.9k
            sao_ht_chroma += remaining_rows;
2700
10.9k
        }
2701
2702
116k
        pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2703
116k
        pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
2704
116k
        pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2705
116k
        pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
2706
2707
116k
        if((0 != sao_wd_luma) && (0 != sao_ht_luma))
2708
116k
        {
2709
116k
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
2710
25.3k
            {
2711
25.3k
                if(0 == ps_sao->b3_y_type_idx)
2712
17.5k
                {
2713
                    /* Update left, top and top-left */
2714
934k
                    for(row = 0; row < sao_ht_luma; row++)
2715
917k
                    {
2716
917k
                        pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2717
917k
                    }
2718
17.5k
                    pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2719
2720
17.5k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2721
2722
17.5k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2723
2724
17.5k
                }
2725
2726
7.82k
                else if(1 == ps_sao->b3_y_type_idx)
2727
2.42k
                {
2728
2.42k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2729
2.42k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2730
2.42k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2731
2.42k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2732
2733
2.42k
                    ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
2734
2.42k
                                                                              src_strd,
2735
2.42k
                                                                              pu1_src_left_luma,
2736
2.42k
                                                                              pu1_src_top_luma,
2737
2.42k
                                                                              pu1_sao_src_top_left_luma_curr_ctb,
2738
2.42k
                                                                              ps_sao->b5_y_band_pos,
2739
2.42k
                                                                              ai1_offset_y,
2740
2.42k
                                                                              sao_wd_luma,
2741
2.42k
                                                                              sao_ht_luma
2742
2.42k
                                                                             );
2743
2.42k
                }
2744
2745
5.40k
                else // if(2 <= ps_sao->b3_y_type_idx)
2746
5.40k
                {
2747
5.40k
                    ai1_offset_y[1] = ps_sao->b4_y_offset_1;
2748
5.40k
                    ai1_offset_y[2] = ps_sao->b4_y_offset_2;
2749
5.40k
                    ai1_offset_y[3] = ps_sao->b4_y_offset_3;
2750
5.40k
                    ai1_offset_y[4] = ps_sao->b4_y_offset_4;
2751
2752
48.6k
                    for(i = 0; i < 8; i++)
2753
43.2k
                    {
2754
43.2k
                        au1_avail_luma[i] = 255;
2755
43.2k
                        au1_tile_slice_boundary[i] = 0;
2756
43.2k
                        au4_idx_c[i] = 0;
2757
43.2k
                        au4_ilf_across_tile_slice_enable[i] = 1;
2758
43.2k
                    }
2759
                    /******************************************************************
2760
                     * Derive the  Top-left CTB's neighbour pixel's slice indices.
2761
                     *
2762
                     *
2763
                     *          ____________
2764
                     *         |    |       |
2765
                     *         |    | C_T   |
2766
                     *         |____|_______|____
2767
                     *         |    |       |    |
2768
                     *         | C_L|   C   | C_R|
2769
                     *         |____|_______|    |
2770
                     *              |  C_D       |
2771
                     *              |            |
2772
                     *              |____________|
2773
                     *
2774
                     *****************************************************************/
2775
2776
                    /*In case of slices*/
2777
5.40k
                    {
2778
5.40k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2779
4.24k
                        {
2780
4.24k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
2781
4.24k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
2782
2783
4.24k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
2784
4.24k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
2785
2786
4.24k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
2787
4.24k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
2788
2789
4.24k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
2790
4.24k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
2791
2792
4.24k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
2793
4.24k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
2794
2795
4.24k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
2796
4.04k
                            {
2797
4.04k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2798
499
                                {
2799
499
                                    au4_idx_c[6] = -1;
2800
499
                                    au4_idx_c[0] = -1;
2801
499
                                    au4_idx_c[4] = -1;
2802
499
                                }
2803
3.54k
                                else
2804
3.54k
                                {
2805
3.54k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2806
3.54k
                                }
2807
2808
4.04k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2809
71
                                {
2810
71
                                    au4_idx_c[2] = -1;
2811
71
                                    au4_idx_c[5] = -1;
2812
71
                                    au4_idx_c[4] = -1;
2813
71
                                }
2814
3.97k
                                else
2815
3.97k
                                {
2816
3.97k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2817
3.97k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2818
3.97k
                                }
2819
4.04k
                                idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2820
4.04k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2821
4.04k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2822
2823
4.04k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
2824
499
                                {
2825
499
                                    au4_ilf_across_tile_slice_enable[6] = 0;
2826
499
                                    au4_ilf_across_tile_slice_enable[0] = 0;
2827
499
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2828
499
                                }
2829
3.54k
                                else
2830
3.54k
                                {
2831
3.54k
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
2832
3.54k
                                    au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
2833
3.54k
                                }
2834
4.04k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
2835
71
                                {
2836
71
                                    au4_ilf_across_tile_slice_enable[2] = 0;
2837
71
                                    au4_ilf_across_tile_slice_enable[4] = 0;
2838
71
                                    au4_ilf_across_tile_slice_enable[5] = 0;
2839
71
                                }
2840
3.97k
                                else
2841
3.97k
                                {
2842
3.97k
                                    au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2843
3.97k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
2844
3.97k
                                }
2845
4.04k
                                au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
2846
4.04k
                                au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
2847
4.04k
                                au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
2848
2849
4.04k
                                if(au4_idx_c[6] < idx_c)
2850
859
                                {
2851
859
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
2852
859
                                }
2853
2854
                                /*
2855
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
2856
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
2857
                                 */
2858
36.3k
                                for(i = 0; i < 8; i++)
2859
32.3k
                                {
2860
                                    /*Sets the edges that lie on the slice/tile boundary*/
2861
32.3k
                                    if(au4_idx_c[i] != idx_c)
2862
12.7k
                                    {
2863
12.7k
                                        au1_tile_slice_boundary[i] = 1;
2864
12.7k
                                    }
2865
19.5k
                                    else
2866
19.5k
                                    {
2867
19.5k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
2868
19.5k
                                    }
2869
32.3k
                                }
2870
                                /*Reset indices*/
2871
36.3k
                                for(i = 0; i < 8; i++)
2872
32.3k
                                {
2873
32.3k
                                    au4_idx_c[i] = 0;
2874
32.3k
                                }
2875
4.04k
                            }
2876
2877
4.24k
                            if(ps_pps->i1_tiles_enabled_flag)
2878
207
                            {
2879
                                /* Calculate availability flags at slice boundary */
2880
207
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
2881
138
                                {
2882
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
2883
138
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
2884
0
                                    {
2885
0
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
2886
0
                                        {
2887
0
                                            au4_idx_c[6] = -1;
2888
0
                                            au4_idx_c[0] = -1;
2889
0
                                            au4_idx_c[4] = -1;
2890
0
                                        }
2891
0
                                        else
2892
0
                                        {
2893
0
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
2894
0
                                        }
2895
2896
0
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
2897
0
                                        {
2898
0
                                            au4_idx_c[2] = -1;
2899
0
                                            au4_idx_c[5] = -1;
2900
0
                                            au4_idx_c[4] = -1;
2901
0
                                        }
2902
0
                                        else
2903
0
                                        {
2904
0
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
2905
0
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
2906
0
                                        }
2907
0
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
2908
0
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
2909
0
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
2910
2911
0
                                        for(i = 0; i < 8; i++)
2912
0
                                        {
2913
                                            /*Sets the edges that lie on the slice/tile boundary*/
2914
0
                                            if(au4_idx_c[i] != idx_c)
2915
0
                                            {
2916
0
                                                au1_tile_slice_boundary[i] |= 1;
2917
0
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
2918
0
                                            }
2919
0
                                        }
2920
0
                                    }
2921
138
                                }
2922
207
                            }
2923
2924
38.2k
                            for(i = 0; i < 8; i++)
2925
33.9k
                            {
2926
                                /*Sets the edges that lie on the slice/tile boundary*/
2927
33.9k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
2928
712
                                {
2929
712
                                    au1_avail_luma[i] = 0;
2930
712
                                }
2931
33.9k
                            }
2932
2933
4.24k
                        }
2934
5.40k
                    }
2935
5.40k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
2936
754
                    {
2937
754
                        au1_avail_luma[0] = 0;
2938
754
                        au1_avail_luma[4] = 0;
2939
754
                        au1_avail_luma[6] = 0;
2940
754
                    }
2941
2942
5.40k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
2943
276
                    {
2944
276
                        au1_avail_luma[1] = 0;
2945
276
                        au1_avail_luma[5] = 0;
2946
276
                        au1_avail_luma[7] = 0;
2947
276
                    }
2948
2949
5.40k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
2950
955
                    {
2951
955
                        au1_avail_luma[2] = 0;
2952
955
                        au1_avail_luma[4] = 0;
2953
955
                        au1_avail_luma[5] = 0;
2954
955
                    }
2955
2956
5.40k
                    if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
2957
355
                    {
2958
355
                        au1_avail_luma[3] = 0;
2959
355
                        au1_avail_luma[6] = 0;
2960
355
                        au1_avail_luma[7] = 0;
2961
355
                    }
2962
2963
5.40k
                    {
2964
5.40k
                        au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
2965
5.40k
                        u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
2966
2967
5.40k
                        ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
2968
5.40k
                                                                          src_strd,
2969
5.40k
                                                                          pu1_src_left_luma,
2970
5.40k
                                                                          pu1_src_top_luma,
2971
5.40k
                                                                          pu1_sao_src_top_left_luma_curr_ctb,
2972
5.40k
                                                                          au1_src_top_right,
2973
5.40k
                                                                          &u1_sao_src_top_left_luma_bot_left,
2974
5.40k
                                                                          au1_avail_luma,
2975
5.40k
                                                                          ai1_offset_y,
2976
5.40k
                                                                          sao_wd_luma,
2977
5.40k
                                                                          sao_ht_luma);
2978
5.40k
                    }
2979
5.40k
                    pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2980
5.40k
                    pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
2981
5.40k
                }
2982
25.3k
            }
2983
90.8k
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
2984
35.1k
            {
2985
                /* Update left, top and top-left */
2986
1.88M
                for(row = 0; row < sao_ht_luma; row++)
2987
1.84M
                {
2988
1.84M
                    pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
2989
1.84M
                }
2990
35.1k
                pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
2991
2992
35.1k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
2993
2994
35.1k
                pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
2995
35.1k
            }
2996
116k
        }
2997
2998
116k
        if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
2999
116k
        {
3000
116k
            if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
3001
44.4k
            {
3002
44.4k
                if(0 == ps_sao->b3_cb_type_idx)
3003
40.1k
                {
3004
975k
                    for(row = 0; row < sao_ht_chroma; row++)
3005
935k
                    {
3006
935k
                        pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3007
935k
                        pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3008
935k
                    }
3009
40.1k
                    pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3010
40.1k
                    pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3011
3012
40.1k
                    ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3013
3014
40.1k
                    pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3015
40.1k
                    pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3016
40.1k
                }
3017
3018
4.35k
                else if(1 == ps_sao->b3_cb_type_idx)
3019
1.95k
                {
3020
1.95k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3021
1.95k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3022
1.95k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3023
1.95k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3024
3025
1.95k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3026
1.95k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3027
1.95k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3028
1.95k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3029
3030
1.95k
                    if(chroma_yuv420sp_vu)
3031
260
                    {
3032
260
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3033
260
                                                                                    src_strd,
3034
260
                                                                                    pu1_src_left_chroma,
3035
260
                                                                                    pu1_src_top_chroma,
3036
260
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3037
260
                                                                                    ps_sao->b5_cr_band_pos,
3038
260
                                                                                    ps_sao->b5_cb_band_pos,
3039
260
                                                                                    ai1_offset_cr,
3040
260
                                                                                    ai1_offset_cb,
3041
260
                                                                                    sao_wd_chroma,
3042
260
                                                                                    sao_ht_chroma
3043
260
                                                                                   );
3044
260
                    }
3045
1.69k
                    else
3046
1.69k
                    {
3047
1.69k
                        ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
3048
1.69k
                                                                                    src_strd,
3049
1.69k
                                                                                    pu1_src_left_chroma,
3050
1.69k
                                                                                    pu1_src_top_chroma,
3051
1.69k
                                                                                    pu1_sao_src_top_left_chroma_curr_ctb,
3052
1.69k
                                                                                    ps_sao->b5_cb_band_pos,
3053
1.69k
                                                                                    ps_sao->b5_cr_band_pos,
3054
1.69k
                                                                                    ai1_offset_cb,
3055
1.69k
                                                                                    ai1_offset_cr,
3056
1.69k
                                                                                    sao_wd_chroma,
3057
1.69k
                                                                                    sao_ht_chroma
3058
1.69k
                                                                                   );
3059
1.69k
                    }
3060
1.95k
                }
3061
3062
2.40k
                else // if(2 <= ps_sao->b3_cb_type_idx)
3063
2.40k
                {
3064
2.40k
                    ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
3065
2.40k
                    ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
3066
2.40k
                    ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
3067
2.40k
                    ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
3068
3069
2.40k
                    ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
3070
2.40k
                    ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
3071
2.40k
                    ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
3072
2.40k
                    ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
3073
3074
21.6k
                    for(i = 0; i < 8; i++)
3075
19.2k
                    {
3076
19.2k
                        au1_avail_chroma[i] = 255;
3077
19.2k
                        au1_tile_slice_boundary[i] = 0;
3078
19.2k
                        au4_idx_c[i] = 0;
3079
19.2k
                        au4_ilf_across_tile_slice_enable[i] = 1;
3080
19.2k
                    }
3081
2.40k
                    {
3082
2.40k
                        if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3083
1.87k
                        {
3084
1.87k
                            ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
3085
1.87k
                            ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
3086
3087
1.87k
                            ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
3088
1.87k
                            ctby_c_l = ps_sao_ctxt->i4_ctb_y;
3089
3090
1.87k
                            ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
3091
1.87k
                            ctby_c_r = ps_sao_ctxt->i4_ctb_y;
3092
3093
1.87k
                            ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
3094
1.87k
                            ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
3095
3096
1.87k
                            ctbx_c = ps_sao_ctxt->i4_ctb_x;
3097
1.87k
                            ctby_c = ps_sao_ctxt->i4_ctb_y;
3098
3099
1.87k
                            if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
3100
1.69k
                            {
3101
1.69k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3102
212
                                {
3103
212
                                    au4_idx_c[0] = -1;
3104
212
                                    au4_idx_c[4] = -1;
3105
212
                                    au4_idx_c[6] = -1;
3106
212
                                }
3107
1.48k
                                else
3108
1.48k
                                {
3109
1.48k
                                    au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3110
1.48k
                                }
3111
3112
1.69k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3113
21
                                {
3114
21
                                    au4_idx_c[2] = -1;
3115
21
                                    au4_idx_c[4] = -1;
3116
21
                                    au4_idx_c[5] = -1;
3117
21
                                }
3118
1.67k
                                else
3119
1.67k
                                {
3120
1.67k
                                    au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3121
1.67k
                                    au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3122
1.67k
                                }
3123
1.69k
                                idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3124
1.69k
                                au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3125
1.69k
                                au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3126
3127
1.69k
                                if(0 == ps_sao_ctxt->i4_ctb_x)
3128
212
                                {
3129
212
                                    au4_ilf_across_tile_slice_enable[0] = 0;
3130
212
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3131
212
                                    au4_ilf_across_tile_slice_enable[6] = 0;
3132
212
                                }
3133
1.48k
                                else
3134
1.48k
                                {
3135
1.48k
                                    au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
3136
1.48k
                                    au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3137
1.48k
                                }
3138
3139
1.69k
                                if(0 == ps_sao_ctxt->i4_ctb_y)
3140
21
                                {
3141
21
                                    au4_ilf_across_tile_slice_enable[2] = 0;
3142
21
                                    au4_ilf_across_tile_slice_enable[4] = 0;
3143
21
                                    au4_ilf_across_tile_slice_enable[5] = 0;
3144
21
                                }
3145
1.67k
                                else
3146
1.67k
                                {
3147
1.67k
                                    au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3148
1.67k
                                    au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
3149
1.67k
                                }
3150
3151
1.69k
                                au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
3152
1.69k
                                au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
3153
1.69k
                                au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
3154
3155
1.69k
                                if(idx_c > au4_idx_c[6])
3156
312
                                {
3157
312
                                    au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
3158
312
                                }
3159
3160
                                /*
3161
                                 * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
3162
                                 * of the pixel having a greater address is checked. Accordingly, set the availability flags
3163
                                 */
3164
15.2k
                                for(i = 0; i < 8; i++)
3165
13.5k
                                {
3166
                                    /*Sets the edges that lie on the slice/tile boundary*/
3167
13.5k
                                    if(au4_idx_c[i] != idx_c)
3168
4.72k
                                    {
3169
4.72k
                                        au1_tile_slice_boundary[i] = 1;
3170
4.72k
                                    }
3171
8.83k
                                    else
3172
8.83k
                                    {
3173
8.83k
                                        au4_ilf_across_tile_slice_enable[i] = 1;
3174
8.83k
                                    }
3175
13.5k
                                }
3176
                                /*Reset indices*/
3177
15.2k
                                for(i = 0; i < 8; i++)
3178
13.5k
                                {
3179
13.5k
                                    au4_idx_c[i] = 0;
3180
13.5k
                                }
3181
1.69k
                            }
3182
3183
1.87k
                            if(ps_pps->i1_tiles_enabled_flag)
3184
178
                            {
3185
                                /* Calculate availability flags at slice boundary */
3186
178
                                if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
3187
107
                                {
3188
                                    /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
3189
107
                                    if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
3190
3
                                    {
3191
3
                                        if(0 == ps_sao_ctxt->i4_ctb_x)
3192
0
                                        {
3193
0
                                            au4_idx_c[6] = -1;
3194
0
                                            au4_idx_c[0] = -1;
3195
0
                                            au4_idx_c[4] = -1;
3196
0
                                        }
3197
3
                                        else
3198
3
                                        {
3199
3
                                            au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
3200
3
                                        }
3201
3202
3
                                        if(0 == ps_sao_ctxt->i4_ctb_y)
3203
1
                                        {
3204
1
                                            au4_idx_c[2] = -1;
3205
1
                                            au4_idx_c[5] = -1;
3206
1
                                            au4_idx_c[4] = -1;
3207
1
                                        }
3208
2
                                        else
3209
2
                                        {
3210
2
                                            au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
3211
2
                                            au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
3212
2
                                        }
3213
3
                                        idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
3214
3
                                        au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
3215
3
                                        au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
3216
3217
27
                                        for(i = 0; i < 8; i++)
3218
24
                                        {
3219
                                            /*Sets the edges that lie on the slice/tile boundary*/
3220
24
                                            if(au4_idx_c[i] != idx_c)
3221
11
                                            {
3222
11
                                                au1_tile_slice_boundary[i] |= 1;
3223
11
                                                au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
3224
11
                                            }
3225
24
                                        }
3226
3
                                    }
3227
107
                                }
3228
178
                            }
3229
3230
16.8k
                            for(i = 0; i < 8; i++)
3231
14.9k
                            {
3232
                                /*Sets the edges that lie on the slice/tile boundary*/
3233
14.9k
                                if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
3234
286
                                {
3235
286
                                    au1_avail_chroma[i] = 0;
3236
286
                                }
3237
14.9k
                            }
3238
1.87k
                        }
3239
2.40k
                    }
3240
3241
2.40k
                    if(0 == ps_sao_ctxt->i4_ctb_x)
3242
332
                    {
3243
332
                        au1_avail_chroma[0] = 0;
3244
332
                        au1_avail_chroma[4] = 0;
3245
332
                        au1_avail_chroma[6] = 0;
3246
332
                    }
3247
3248
2.40k
                    if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
3249
198
                    {
3250
198
                        au1_avail_chroma[1] = 0;
3251
198
                        au1_avail_chroma[5] = 0;
3252
198
                        au1_avail_chroma[7] = 0;
3253
198
                    }
3254
3255
2.40k
                    if(0 == ps_sao_ctxt->i4_ctb_y)
3256
322
                    {
3257
322
                        au1_avail_chroma[2] = 0;
3258
322
                        au1_avail_chroma[4] = 0;
3259
322
                        au1_avail_chroma[5] = 0;
3260
322
                    }
3261
3262
2.40k
                    if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
3263
135
                    {
3264
135
                        au1_avail_chroma[3] = 0;
3265
135
                        au1_avail_chroma[6] = 0;
3266
135
                        au1_avail_chroma[7] = 0;
3267
135
                    }
3268
3269
2.40k
                    {
3270
2.40k
                        au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
3271
2.40k
                        au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
3272
3273
2.40k
                        au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
3274
2.40k
                        au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
3275
3276
2.40k
                        if(chroma_yuv420sp_vu)
3277
384
                        {
3278
384
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3279
384
                                                                                 src_strd,
3280
384
                                                                                 pu1_src_left_chroma,
3281
384
                                                                                 pu1_src_top_chroma,
3282
384
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3283
384
                                                                                 au1_src_top_right,
3284
384
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3285
384
                                                                                 au1_avail_chroma,
3286
384
                                                                                 ai1_offset_cr,
3287
384
                                                                                 ai1_offset_cb,
3288
384
                                                                                 sao_wd_chroma,
3289
384
                                                                                 sao_ht_chroma);
3290
384
                        }
3291
2.01k
                        else
3292
2.01k
                        {
3293
2.01k
                            ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
3294
2.01k
                                                                                 src_strd,
3295
2.01k
                                                                                 pu1_src_left_chroma,
3296
2.01k
                                                                                 pu1_src_top_chroma,
3297
2.01k
                                                                                 pu1_sao_src_top_left_chroma_curr_ctb,
3298
2.01k
                                                                                 au1_src_top_right,
3299
2.01k
                                                                                 au1_sao_src_top_left_chroma_bot_left,
3300
2.01k
                                                                                 au1_avail_chroma,
3301
2.01k
                                                                                 ai1_offset_cb,
3302
2.01k
                                                                                 ai1_offset_cr,
3303
2.01k
                                                                                 sao_wd_chroma,
3304
2.01k
                                                                                 sao_ht_chroma);
3305
2.01k
                        }
3306
2.40k
                    }
3307
3308
2.40k
                }
3309
44.4k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3310
44.4k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3311
3312
44.4k
                pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
3313
44.4k
                pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
3314
44.4k
            }
3315
71.7k
            else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
3316
40.6k
            {
3317
972k
                for(row = 0; row < sao_ht_chroma; row++)
3318
931k
                {
3319
931k
                    pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
3320
931k
                    pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
3321
931k
                }
3322
40.6k
                pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
3323
40.6k
                pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
3324
3325
40.6k
                ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
3326
3327
40.6k
                pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
3328
40.6k
                pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
3329
40.6k
            }
3330
3331
116k
        }
3332
116k
    }
3333
3334
3335
3336
3337
/* If no loop filter is enabled copy the backed up values */
3338
116k
    {
3339
        /* Luma */
3340
116k
        if(no_loop_filter_enabled_luma)
3341
136
        {
3342
136
            UWORD32 u4_no_loop_filter_flag;
3343
136
            WORD32 loop_filter_bit_pos;
3344
136
            WORD32 log2_min_cu = 3;
3345
136
            WORD32 min_cu = (1 << log2_min_cu);
3346
136
            UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
3347
136
            WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
3348
136
            WORD32 sao_blk_wd = ctb_size;
3349
136
            WORD32 remaining_rows;
3350
136
            WORD32 remaining_cols;
3351
3352
136
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3353
136
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
3354
136
            if(remaining_rows <= SAO_SHIFT_CTB)
3355
9
                sao_blk_ht += remaining_rows;
3356
136
            if(remaining_cols <= SAO_SHIFT_CTB)
3357
40
                sao_blk_wd += remaining_cols;
3358
3359
136
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
3360
136
            pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3361
3362
136
            pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
3363
3364
136
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3365
136
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3366
136
            if(ps_sao_ctxt->i4_ctb_x > 0)
3367
104
                loop_filter_bit_pos -= 1;
3368
3369
136
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3370
136
                            (loop_filter_bit_pos >> 3);
3371
3372
136
            for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
3373
778
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3374
642
            {
3375
642
                WORD32 tmp_wd = sao_blk_wd;
3376
3377
642
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3378
642
                                (loop_filter_bit_pos & 7);
3379
642
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3380
3381
642
                if(u4_no_loop_filter_flag)
3382
352
                {
3383
998
                    while(tmp_wd > 0)
3384
646
                    {
3385
646
                        if(CTZ(u4_no_loop_filter_flag))
3386
272
                        {
3387
272
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3388
272
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3389
272
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3390
272
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3391
272
                        }
3392
374
                        else
3393
374
                        {
3394
3.36k
                            for(row = 0; row < min_cu; row++)
3395
2.99k
                            {
3396
76.0k
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3397
73.0k
                                {
3398
73.0k
                                    pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
3399
73.0k
                                }
3400
2.99k
                            }
3401
374
                            pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3402
374
                            pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3403
374
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3404
374
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3405
374
                        }
3406
646
                    }
3407
3408
352
                    pu1_src_tmp_luma -= sao_blk_wd;
3409
352
                    pu1_src_backup_luma -= sao_blk_wd;
3410
352
                }
3411
3412
642
                pu1_src_tmp_luma += (src_strd << log2_min_cu);
3413
642
                pu1_src_backup_luma += (backup_strd << log2_min_cu);
3414
642
            }
3415
136
        }
3416
3417
        /* Chroma */
3418
116k
        if(no_loop_filter_enabled_chroma)
3419
136
        {
3420
136
            UWORD32 u4_no_loop_filter_flag;
3421
136
            WORD32 loop_filter_bit_pos;
3422
136
            WORD32 log2_min_cu = 3;
3423
136
            WORD32 min_cu = (1 << log2_min_cu);
3424
136
            UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
3425
136
            WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
3426
136
            WORD32 sao_blk_wd = ctb_size;
3427
136
            WORD32 remaining_rows;
3428
136
            WORD32 remaining_cols;
3429
3430
136
            remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3431
136
            remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
3432
136
            if(remaining_rows <= 2 * SAO_SHIFT_CTB)
3433
9
                sao_blk_ht += remaining_rows;
3434
136
            if(remaining_cols <= 2 * SAO_SHIFT_CTB)
3435
40
                sao_blk_wd += remaining_cols;
3436
3437
136
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
3438
136
            pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
3439
3440
136
            pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
3441
3442
136
            loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
3443
136
                            (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
3444
136
            if(ps_sao_ctxt->i4_ctb_x > 0)
3445
104
                loop_filter_bit_pos -= 2;
3446
3447
136
            pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
3448
136
                            (loop_filter_bit_pos >> 3);
3449
3450
136
            for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
3451
746
                            i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
3452
610
            {
3453
610
                WORD32 tmp_wd = sao_blk_wd;
3454
3455
610
                u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
3456
610
                                (loop_filter_bit_pos & 7);
3457
610
                u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
3458
3459
610
                if(u4_no_loop_filter_flag)
3460
354
                {
3461
999
                    while(tmp_wd > 0)
3462
645
                    {
3463
645
                        if(CTZ(u4_no_loop_filter_flag))
3464
280
                        {
3465
280
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3466
280
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3467
280
                            tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
3468
280
                            u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
3469
280
                        }
3470
365
                        else
3471
365
                        {
3472
1.82k
                            for(row = 0; row < min_cu / 2; row++)
3473
1.46k
                            {
3474
40.2k
                                for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
3475
38.7k
                                {
3476
38.7k
                                    pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
3477
38.7k
                                }
3478
1.46k
                            }
3479
3480
365
                            pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3481
365
                            pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
3482
365
                            tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
3483
365
                            u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
3484
365
                        }
3485
645
                    }
3486
3487
354
                    pu1_src_tmp_chroma -= sao_blk_wd;
3488
354
                    pu1_src_backup_chroma -= sao_blk_wd;
3489
354
                }
3490
3491
610
                pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
3492
610
                pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
3493
610
            }
3494
136
        }
3495
116k
    }
3496
3497
116k
}
3498