Coverage Report

Created: 2026-04-01 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/common/ihevc_sao.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
*******************************************************************************
20
* @file
21
*  ihevc_sao.c
22
*
23
* @brief
24
*  Contains leaf level function definitions for sample adaptive offset process
25
*
26
* @author
27
*  Srinivas T
28
*
29
* @par List of Functions:
30
*   - ihevc_sao_band_offset_luma()
31
*   - ihevc_sao_band_offset_chroma()
32
*   - ihevc_sao_edge_offset_class0()
33
*   - ihevc_sao_edge_offset_class0_chroma()
34
*   - ihevc_sao_edge_offset_class1()
35
*   - ihevc_sao_edge_offset_class1_chroma()
36
*   - ihevc_sao_edge_offset_class2()
37
*   - ihevc_sao_edge_offset_class2_chroma()
38
*   - ihevc_sao_edge_offset_class3()
39
*   - ihevc_sao_edge_offset_class3_chroma()
40
* @remarks
41
*  None
42
*
43
*******************************************************************************
44
*/
45
#include <stdlib.h>
46
#include <assert.h>
47
#include <string.h>
48
#include "ihevc_typedefs.h"
49
#include "ihevc_macros.h"
50
#include "ihevc_platform_macros.h"
51
#include "ihevc_func_selector.h"
52
#include "ihevc_defs.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_sao.h"
55
56
13.5M
#define NUM_BAND_TABLE  32
57
58
const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
59
/**
60
 * au4_avail is an array of flags - one for each neighboring block specifying if the block is available
61
 * au4_avail[0] - left
62
 * au4_avail[1] - right
63
 * au4_avail[2] - top
64
 * au4_avail[3] - bottom
65
 * au4_avail[4] - top-left
66
 * au4_avail[5] - top-right
67
 * au4_avail[6] - bottom-left
68
 * au4_avail[7] - bottom-right
69
 */
70
71
72
void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
73
                                WORD32 src_strd,
74
                                UWORD8 *pu1_src_left,
75
                                UWORD8 *pu1_src_top,
76
                                UWORD8 *pu1_src_top_left,
77
                                WORD32 sao_band_pos,
78
                                WORD8 *pi1_sao_offset,
79
                                WORD32 wd,
80
                                WORD32 ht)
81
235k
{
82
235k
    WORD32 band_shift;
83
235k
    WORD32 band_table[NUM_BAND_TABLE];
84
235k
    WORD32 i;
85
235k
    WORD32 row, col;
86
87
    /* Updating left and top and top-left */
88
7.75M
    for(row = 0; row < ht; row++)
89
7.51M
    {
90
7.51M
        pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
91
7.51M
    }
92
235k
    pu1_src_top_left[0] = pu1_src_top[wd - 1];
93
7.73M
    for(col = 0; col < wd; col++)
94
7.50M
    {
95
7.50M
        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
96
7.50M
    }
97
98
235k
    band_shift = BIT_DEPTH_LUMA - 5;
99
7.78M
    for(i = 0; i < NUM_BAND_TABLE; i++)
100
7.55M
    {
101
7.55M
        band_table[i] = 0;
102
7.55M
    }
103
1.17M
    for(i = 0; i < 4; i++)
104
943k
    {
105
943k
        band_table[(i + sao_band_pos) & 31] = i + 1;
106
943k
    }
107
108
7.74M
    for(row = 0; row < ht; row++)
109
7.50M
    {
110
247M
        for(col = 0; col < wd; col++)
111
239M
        {
112
239M
            WORD32 band_idx;
113
114
239M
            band_idx = band_table[pu1_src[col] >> band_shift];
115
239M
            pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
116
239M
        }
117
7.50M
        pu1_src += src_strd;
118
7.50M
    }
119
235k
}
120
121
122
123
/* input 'wd' has to be for the interleaved block and not for each color component */
124
void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
125
                                  WORD32 src_strd,
126
                                  UWORD8 *pu1_src_left,
127
                                  UWORD8 *pu1_src_top,
128
                                  UWORD8 *pu1_src_top_left,
129
                                  WORD32 sao_band_pos_u,
130
                                  WORD32 sao_band_pos_v,
131
                                  WORD8 *pi1_sao_offset_u,
132
                                  WORD8 *pi1_sao_offset_v,
133
                                  WORD32 wd,
134
                                  WORD32 ht)
135
174k
{
136
174k
    WORD32 band_shift;
137
174k
    WORD32 band_table_u[NUM_BAND_TABLE];
138
174k
    WORD32 band_table_v[NUM_BAND_TABLE];
139
174k
    WORD32 i;
140
174k
    WORD32 row, col;
141
142
    /* Updating left and top and top-left */
143
2.95M
    for(row = 0; row < ht; row++)
144
2.78M
    {
145
2.78M
        pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
146
2.78M
        pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
147
2.78M
    }
148
174k
    pu1_src_top_left[0] = pu1_src_top[wd - 2];
149
174k
    pu1_src_top_left[1] = pu1_src_top[wd - 1];
150
5.71M
    for(col = 0; col < wd; col++)
151
5.54M
    {
152
5.54M
        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
153
5.54M
    }
154
155
156
174k
    band_shift = BIT_DEPTH_CHROMA - 5;
157
5.75M
    for(i = 0; i < NUM_BAND_TABLE; i++)
158
5.58M
    {
159
5.58M
        band_table_u[i] = 0;
160
5.58M
        band_table_v[i] = 0;
161
5.58M
    }
162
871k
    for(i = 0; i < 4; i++)
163
697k
    {
164
697k
        band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
165
697k
        band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
166
697k
    }
167
168
2.95M
    for(row = 0; row < ht; row++)
169
2.77M
    {
170
91.1M
        for(col = 0; col < wd; col++)
171
88.4M
        {
172
88.4M
            WORD32 band_idx;
173
88.4M
            WORD8 *pi1_sao_offset;
174
175
88.4M
            pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
176
88.4M
            band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
177
88.4M
            pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
178
88.4M
        }
179
2.77M
        pu1_src += src_strd;
180
2.77M
    }
181
174k
}
182
183
184
185
/* Horizontal filtering */
186
void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
187
                                  WORD32 src_strd,
188
                                  UWORD8 *pu1_src_left,
189
                                  UWORD8 *pu1_src_top,
190
                                  UWORD8 *pu1_src_top_left,
191
                                  UWORD8 *pu1_src_top_right,
192
                                  UWORD8 *pu1_src_bot_left,
193
                                  UWORD8 *pu1_avail,
194
                                  WORD8 *pi1_sao_offset,
195
                                  WORD32 wd,
196
                                  WORD32 ht)
197
52.1k
{
198
52.1k
    WORD32 row, col;
199
52.1k
    UWORD8 au1_mask[MAX_CTB_SIZE];
200
52.1k
    UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
201
52.1k
    WORD8 u1_sign_left, u1_sign_right;
202
52.1k
    WORD32 bit_depth;
203
52.1k
    UNUSED(pu1_src_top_right);
204
52.1k
    UNUSED(pu1_src_bot_left);
205
52.1k
    bit_depth = BIT_DEPTH_LUMA;
206
207
    /* Initialize the mask values */
208
52.1k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
209
210
    /* Update top and top-left arrays */
211
52.1k
    *pu1_src_top_left = pu1_src_top[wd - 1];
212
1.71M
    for(row = 0; row < ht; row++)
213
1.66M
    {
214
1.66M
        au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
215
1.66M
    }
216
1.73M
    for(col = 0; col < wd; col++)
217
1.67M
    {
218
1.67M
        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
219
1.67M
    }
220
221
    /* Update masks based on the availability flags */
222
52.1k
    if(0 == pu1_avail[0])
223
5.15k
    {
224
5.15k
        au1_mask[0] = 0;
225
5.15k
    }
226
52.1k
    if(0 == pu1_avail[1])
227
4.04k
    {
228
4.04k
        au1_mask[wd - 1] = 0;
229
4.04k
    }
230
231
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
232
52.1k
    {
233
1.71M
        for(row = 0; row < ht; row++)
234
1.66M
        {
235
1.66M
            u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
236
54.8M
            for(col = 0; col < wd; col++)
237
53.2M
            {
238
53.2M
                WORD32 edge_idx;
239
240
53.2M
                u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
241
53.2M
                edge_idx = 2 + u1_sign_left + u1_sign_right;
242
53.2M
                u1_sign_left = -u1_sign_right;
243
244
53.2M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
245
246
53.2M
                if(0 != edge_idx)
247
9.73M
                {
248
9.73M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
249
9.73M
                }
250
53.2M
            }
251
252
1.66M
            pu1_src += src_strd;
253
1.66M
        }
254
52.1k
    }
255
256
    /* Update left array */
257
1.71M
    for(row = 0; row < ht; row++)
258
1.66M
    {
259
1.66M
        pu1_src_left[row] = au1_src_left_tmp[row];
260
1.66M
    }
261
262
52.1k
}
263
264
265
266
267
/* input 'wd' has to be for the interleaved block and not for each color component */
268
void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
269
                                         WORD32 src_strd,
270
                                         UWORD8 *pu1_src_left,
271
                                         UWORD8 *pu1_src_top,
272
                                         UWORD8 *pu1_src_top_left,
273
                                         UWORD8 *pu1_src_top_right,
274
                                         UWORD8 *pu1_src_bot_left,
275
                                         UWORD8 *pu1_avail,
276
                                         WORD8 *pi1_sao_offset_u,
277
                                         WORD8 *pi1_sao_offset_v,
278
                                         WORD32 wd,
279
                                         WORD32 ht)
280
27.7k
{
281
27.7k
    WORD32 row, col;
282
27.7k
    UWORD8 au1_mask[MAX_CTB_SIZE];
283
27.7k
    UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
284
27.7k
    WORD8 u1_sign_left_u, u1_sign_right_u;
285
27.7k
    WORD8 u1_sign_left_v, u1_sign_right_v;
286
27.7k
    WORD32 bit_depth;
287
27.7k
    UNUSED(pu1_src_top_right);
288
27.7k
    UNUSED(pu1_src_bot_left);
289
27.7k
    bit_depth = BIT_DEPTH_CHROMA;
290
291
    /* Initialize the mask values */
292
27.7k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
293
294
    /* Update left, top and top-left arrays */
295
27.7k
    pu1_src_top_left[0] = pu1_src_top[wd - 2];
296
27.7k
    pu1_src_top_left[1] = pu1_src_top[wd - 1];
297
467k
    for(row = 0; row < ht; row++)
298
440k
    {
299
440k
        au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
300
440k
        au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
301
440k
    }
302
904k
    for(col = 0; col < wd; col++)
303
876k
    {
304
876k
        pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
305
876k
    }
306
307
    /* Update masks based on the availability flags */
308
27.7k
    if(0 == pu1_avail[0])
309
2.32k
    {
310
2.32k
        au1_mask[0] = 0;
311
2.32k
    }
312
27.7k
    if(0 == pu1_avail[1])
313
2.11k
    {
314
2.11k
        au1_mask[(wd - 1) >> 1] = 0;
315
2.11k
    }
316
317
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
318
27.7k
    {
319
467k
        for(row = 0; row < ht; row++)
320
439k
        {
321
439k
            u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
322
439k
            u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
323
14.4M
            for(col = 0; col < wd; col++)
324
13.9M
            {
325
13.9M
                WORD32 edge_idx;
326
13.9M
                WORD8 *pi1_sao_offset;
327
328
13.9M
                if(0 == col % 2)
329
6.98M
                {
330
6.98M
                    pi1_sao_offset = pi1_sao_offset_u;
331
6.98M
                    u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
332
6.98M
                    edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
333
6.98M
                    u1_sign_left_u = -u1_sign_right_u;
334
6.98M
                }
335
6.98M
                else
336
6.98M
                {
337
6.98M
                    pi1_sao_offset = pi1_sao_offset_v;
338
6.98M
                    u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
339
6.98M
                    edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
340
6.98M
                    u1_sign_left_v = -u1_sign_right_v;
341
6.98M
                }
342
343
13.9M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
344
345
13.9M
                if(0 != edge_idx)
346
1.04M
                {
347
1.04M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
348
1.04M
                }
349
13.9M
            }
350
351
439k
            pu1_src += src_strd;
352
439k
        }
353
27.7k
    }
354
355
908k
    for(row = 0; row < 2 * ht; row++)
356
880k
    {
357
880k
        pu1_src_left[row] = au1_src_left_tmp[row];
358
880k
    }
359
360
27.7k
}
361
362
363
364
/* Vertical filtering */
365
void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
366
                                  WORD32 src_strd,
367
                                  UWORD8 *pu1_src_left,
368
                                  UWORD8 *pu1_src_top,
369
                                  UWORD8 *pu1_src_top_left,
370
                                  UWORD8 *pu1_src_top_right,
371
                                  UWORD8 *pu1_src_bot_left,
372
                                  UWORD8 *pu1_avail,
373
                                  WORD8 *pi1_sao_offset,
374
                                  WORD32 wd,
375
                                  WORD32 ht)
376
36.3k
{
377
36.3k
    WORD32 row, col;
378
36.3k
    UWORD8 au1_mask[MAX_CTB_SIZE];
379
36.3k
    UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
380
36.3k
    WORD8 au1_sign_up[MAX_CTB_SIZE];
381
36.3k
    WORD8 u1_sign_down;
382
36.3k
    WORD32 bit_depth;
383
36.3k
    UNUSED(pu1_src_top_right);
384
36.3k
    UNUSED(pu1_src_bot_left);
385
386
36.3k
    bit_depth = BIT_DEPTH_LUMA;
387
388
    /* Initialize the mask values */
389
36.3k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
390
391
    /* Update left, top and top-left arrays */
392
36.3k
    *pu1_src_top_left = pu1_src_top[wd - 1];
393
1.19M
    for(row = 0; row < ht; row++)
394
1.15M
    {
395
1.15M
        pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
396
1.15M
    }
397
1.19M
    for(col = 0; col < wd; col++)
398
1.15M
    {
399
1.15M
        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
400
1.15M
    }
401
402
    /* Update height and source pointers based on the availability flags */
403
36.3k
    if(0 == pu1_avail[2])
404
6.87k
    {
405
6.87k
        pu1_src += src_strd;
406
6.87k
        ht--;
407
221k
        for(col = 0; col < wd; col++)
408
215k
        {
409
215k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
410
215k
        }
411
6.87k
    }
412
29.4k
    else
413
29.4k
    {
414
974k
        for(col = 0; col < wd; col++)
415
944k
        {
416
944k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
417
944k
        }
418
29.4k
    }
419
36.3k
    if(0 == pu1_avail[3])
420
6.66k
    {
421
6.66k
        ht--;
422
6.66k
    }
423
424
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
425
36.3k
    {
426
1.17M
        for(row = 0; row < ht; row++)
427
1.13M
        {
428
37.7M
            for(col = 0; col < wd; col++)
429
36.5M
            {
430
36.5M
                WORD32 edge_idx;
431
432
36.5M
                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
433
36.5M
                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
434
36.5M
                au1_sign_up[col] = -u1_sign_down;
435
436
36.5M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
437
438
36.5M
                if(0 != edge_idx)
439
6.15M
                {
440
6.15M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
441
6.15M
                }
442
36.5M
            }
443
444
1.13M
            pu1_src += src_strd;
445
1.13M
        }
446
36.3k
    }
447
448
1.19M
    for(col = 0; col < wd; col++)
449
1.16M
    {
450
1.16M
        pu1_src_top[col] = au1_src_top_tmp[col];
451
1.16M
    }
452
453
36.3k
}
454
455
456
457
/* input 'wd' has to be for the interleaved block and not for each color component */
458
void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
459
                                         WORD32 src_strd,
460
                                         UWORD8 *pu1_src_left,
461
                                         UWORD8 *pu1_src_top,
462
                                         UWORD8 *pu1_src_top_left,
463
                                         UWORD8 *pu1_src_top_right,
464
                                         UWORD8 *pu1_src_bot_left,
465
                                         UWORD8 *pu1_avail,
466
                                         WORD8 *pi1_sao_offset_u,
467
                                         WORD8 *pi1_sao_offset_v,
468
                                         WORD32 wd,
469
                                         WORD32 ht)
470
34.6k
{
471
34.6k
    WORD32 row, col;
472
34.6k
    UWORD8 au1_mask[MAX_CTB_SIZE];
473
34.6k
    UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
474
34.6k
    WORD8 au1_sign_up[MAX_CTB_SIZE];
475
34.6k
    WORD8 u1_sign_down;
476
34.6k
    WORD32 bit_depth;
477
34.6k
    UNUSED(pu1_src_top_right);
478
34.6k
    UNUSED(pu1_src_bot_left);
479
480
34.6k
    bit_depth = BIT_DEPTH_CHROMA;
481
482
    /* Initialize the mask values */
483
34.6k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
484
485
    /* Update left, top and top-left arrays */
486
34.6k
    pu1_src_top_left[0] = pu1_src_top[wd - 2];
487
34.6k
    pu1_src_top_left[1] = pu1_src_top[wd - 1];
488
575k
    for(row = 0; row < ht; row++)
489
540k
    {
490
540k
        pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
491
540k
        pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
492
540k
    }
493
1.10M
    for(col = 0; col < wd; col++)
494
1.07M
    {
495
1.07M
        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
496
1.07M
    }
497
498
    /* Update height and source pointers based on the availability flags */
499
34.6k
    if(0 == pu1_avail[2])
500
7.81k
    {
501
7.81k
        pu1_src += src_strd;
502
7.81k
        ht--;
503
251k
        for(col = 0; col < wd; col++)
504
244k
        {
505
244k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
506
244k
        }
507
7.81k
    }
508
26.8k
    else
509
26.8k
    {
510
856k
        for(col = 0; col < wd; col++)
511
830k
        {
512
830k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
513
830k
        }
514
26.8k
    }
515
34.6k
    if(0 == pu1_avail[3])
516
6.29k
    {
517
6.29k
        ht--;
518
6.29k
    }
519
520
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
521
34.6k
    {
522
560k
        for(row = 0; row < ht; row++)
523
525k
        {
524
17.0M
            for(col = 0; col < wd; col++)
525
16.5M
            {
526
16.5M
                WORD32 edge_idx;
527
16.5M
                WORD8 *pi1_sao_offset;
528
529
16.5M
                pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
530
531
16.5M
                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
532
16.5M
                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
533
16.5M
                au1_sign_up[col] = -u1_sign_down;
534
535
16.5M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
536
537
16.5M
                if(0 != edge_idx)
538
2.07M
                {
539
2.07M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
540
2.07M
                }
541
16.5M
            }
542
543
525k
            pu1_src += src_strd;
544
525k
        }
545
34.6k
    }
546
547
1.10M
    for(col = 0; col < wd; col++)
548
1.07M
    {
549
1.07M
        pu1_src_top[col] = au1_src_top_tmp[col];
550
1.07M
    }
551
552
34.6k
}
553
554
555
556
/* 135 degree filtering */
557
void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
558
                                  WORD32 src_strd,
559
                                  UWORD8 *pu1_src_left,
560
                                  UWORD8 *pu1_src_top,
561
                                  UWORD8 *pu1_src_top_left,
562
                                  UWORD8 *pu1_src_top_right,
563
                                  UWORD8 *pu1_src_bot_left,
564
                                  UWORD8 *pu1_avail,
565
                                  WORD8 *pi1_sao_offset,
566
                                  WORD32 wd,
567
                                  WORD32 ht)
568
44.6k
{
569
44.6k
    WORD32 row, col;
570
44.6k
    UWORD8 au1_mask[MAX_CTB_SIZE];
571
44.6k
    UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
572
44.6k
    UWORD8 u1_src_top_left_tmp;
573
44.6k
    WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
574
44.6k
    WORD8 u1_sign_down;
575
44.6k
    WORD8 *pu1_sign_up;
576
44.6k
    WORD8 *pu1_sign_up_tmp;
577
44.6k
    UWORD8 *pu1_src_left_cpy;
578
579
44.6k
    WORD32 bit_depth;
580
44.6k
    UWORD8 u1_pos_0_0_tmp;
581
44.6k
    UWORD8 u1_pos_wd_ht_tmp;
582
44.6k
    UNUSED(pu1_src_top_right);
583
44.6k
    UNUSED(pu1_src_bot_left);
584
585
44.6k
    bit_depth = BIT_DEPTH_LUMA;
586
44.6k
    pu1_sign_up = au1_sign_up;
587
44.6k
    pu1_sign_up_tmp = au1_sign_up_tmp;
588
44.6k
    pu1_src_left_cpy = pu1_src_left;
589
590
    /* Initialize the mask values */
591
44.6k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
592
593
    /* Update left, top and top-left arrays */
594
44.6k
    u1_src_top_left_tmp = pu1_src_top[wd - 1];
595
1.42M
    for(row = 0; row < ht; row++)
596
1.38M
    {
597
1.38M
        au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
598
1.38M
    }
599
1.44M
    for(col = 0; col < wd; col++)
600
1.39M
    {
601
1.39M
        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
602
1.39M
    }
603
604
605
    /* If top-left is available, process separately */
606
44.6k
    if(0 != pu1_avail[4])
607
32.8k
    {
608
32.8k
        WORD32 edge_idx;
609
610
32.8k
        edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
611
32.8k
                        SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
612
613
32.8k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
614
615
32.8k
        if(0 != edge_idx)
616
7.02k
        {
617
7.02k
            u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
618
7.02k
        }
619
25.8k
        else
620
25.8k
        {
621
25.8k
            u1_pos_0_0_tmp = pu1_src[0];
622
25.8k
        }
623
32.8k
    }
624
11.7k
    else
625
11.7k
    {
626
11.7k
        u1_pos_0_0_tmp = pu1_src[0];
627
11.7k
    }
628
629
    /* If bottom-right is available, process separately */
630
44.6k
    if(0 != pu1_avail[7])
631
33.2k
    {
632
33.2k
        WORD32 edge_idx;
633
634
33.2k
        edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
635
33.2k
                        SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
636
637
33.2k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
638
639
33.2k
        if(0 != edge_idx)
640
7.65k
        {
641
7.65k
            u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
642
7.65k
        }
643
25.5k
        else
644
25.5k
        {
645
25.5k
            u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
646
25.5k
        }
647
33.2k
    }
648
11.4k
    else
649
11.4k
    {
650
11.4k
        u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
651
11.4k
    }
652
653
    /* If Left is not available */
654
44.6k
    if(0 == pu1_avail[0])
655
2.58k
    {
656
2.58k
        au1_mask[0] = 0;
657
2.58k
    }
658
659
    /* If Top is not available */
660
44.6k
    if(0 == pu1_avail[2])
661
9.83k
    {
662
9.83k
        pu1_src += src_strd;
663
9.83k
        ht--;
664
9.83k
        pu1_src_left_cpy += 1;
665
311k
        for(col = 1; col < wd; col++)
666
302k
        {
667
302k
            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
668
302k
        }
669
9.83k
    }
670
34.7k
    else
671
34.7k
    {
672
1.08M
        for(col = 1; col < wd; col++)
673
1.04M
        {
674
1.04M
            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
675
1.04M
        }
676
34.7k
    }
677
678
    /* If Right is not available */
679
44.6k
    if(0 == pu1_avail[1])
680
3.95k
    {
681
3.95k
        au1_mask[wd - 1] = 0;
682
3.95k
    }
683
684
    /* If Bottom is not available */
685
44.6k
    if(0 == pu1_avail[3])
686
8.29k
    {
687
8.29k
        ht--;
688
8.29k
    }
689
690
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
691
44.6k
    {
692
1.40M
        for(row = 0; row < ht; row++)
693
1.35M
        {
694
1.35M
            pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
695
44.6M
            for(col = 0; col < wd; col++)
696
43.2M
            {
697
43.2M
                WORD32 edge_idx;
698
699
43.2M
                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
700
43.2M
                edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
701
43.2M
                pu1_sign_up_tmp[col + 1] = -u1_sign_down;
702
703
43.2M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
704
705
43.2M
                if(0 != edge_idx)
706
7.67M
                {
707
7.67M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
708
7.67M
                }
709
43.2M
            }
710
711
            /* Swapping pu1_sign_up_tmp and pu1_sign_up */
712
1.35M
            {
713
1.35M
                WORD8 *pu1_swap_tmp = pu1_sign_up;
714
1.35M
                pu1_sign_up = pu1_sign_up_tmp;
715
1.35M
                pu1_sign_up_tmp = pu1_swap_tmp;
716
1.35M
            }
717
718
1.35M
            pu1_src += src_strd;
719
1.35M
        }
720
721
44.6k
        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
722
44.6k
        pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
723
44.6k
    }
724
725
44.6k
    if(0 == pu1_avail[2])
726
9.83k
        ht++;
727
44.6k
    if(0 == pu1_avail[3])
728
8.29k
        ht++;
729
44.6k
    *pu1_src_top_left = u1_src_top_left_tmp;
730
1.42M
    for(row = 0; row < ht; row++)
731
1.38M
    {
732
1.38M
        pu1_src_left[row] = au1_src_left_tmp[row];
733
1.38M
    }
734
1.43M
    for(col = 0; col < wd; col++)
735
1.39M
    {
736
1.39M
        pu1_src_top[col] = au1_src_top_tmp[col];
737
1.39M
    }
738
739
44.6k
}
740
741
742
743
744
/* 135 degree filtering */
745
void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
746
                                         WORD32 src_strd,
747
                                         UWORD8 *pu1_src_left,
748
                                         UWORD8 *pu1_src_top,
749
                                         UWORD8 *pu1_src_top_left,
750
                                         UWORD8 *pu1_src_top_right,
751
                                         UWORD8 *pu1_src_bot_left,
752
                                         UWORD8 *pu1_avail,
753
                                         WORD8 *pi1_sao_offset_u,
754
                                         WORD8 *pi1_sao_offset_v,
755
                                         WORD32 wd,
756
                                         WORD32 ht)
757
43.3k
{
758
43.3k
    WORD32 row, col;
759
43.3k
    UWORD8 au1_mask[MAX_CTB_SIZE];
760
43.3k
    UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
761
43.3k
    UWORD8 au1_src_top_left_tmp[2];
762
43.3k
    WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
763
43.3k
    WORD8 u1_sign_down;
764
43.3k
    WORD8 *pu1_sign_up;
765
43.3k
    WORD8 *pu1_sign_up_tmp;
766
43.3k
    UWORD8 *pu1_src_left_cpy;
767
768
43.3k
    WORD32 bit_depth;
769
770
43.3k
    UWORD8 u1_pos_0_0_tmp_u;
771
43.3k
    UWORD8 u1_pos_0_0_tmp_v;
772
43.3k
    UWORD8 u1_pos_wd_ht_tmp_u;
773
43.3k
    UWORD8 u1_pos_wd_ht_tmp_v;
774
43.3k
    UNUSED(pu1_src_top_right);
775
43.3k
    UNUSED(pu1_src_bot_left);
776
777
778
43.3k
    bit_depth = BIT_DEPTH_CHROMA;
779
43.3k
    pu1_sign_up = au1_sign_up;
780
43.3k
    pu1_sign_up_tmp = au1_sign_up_tmp;
781
43.3k
    pu1_src_left_cpy = pu1_src_left;
782
783
    /* Initialize the mask values */
784
43.3k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
785
786
    /* Update left, top and top-left arrays */
787
43.3k
    au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
788
43.3k
    au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
789
730k
    for(row = 0; row < ht; row++)
790
686k
    {
791
686k
        au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
792
686k
        au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
793
686k
    }
794
1.44M
    for(col = 0; col < wd; col++)
795
1.39M
    {
796
1.39M
        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
797
1.39M
    }
798
799
800
    /* If top-left is available, process separately */
801
43.3k
    if(0 != pu1_avail[4])
802
32.2k
    {
803
32.2k
        WORD32 edge_idx;
804
805
        /* U */
806
32.2k
        edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
807
32.2k
                        SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
808
809
32.2k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
810
811
32.2k
        if(0 != edge_idx)
812
5.11k
        {
813
5.11k
            u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
814
5.11k
        }
815
27.1k
        else
816
27.1k
        {
817
27.1k
            u1_pos_0_0_tmp_u = pu1_src[0];
818
27.1k
        }
819
820
        /* V */
821
32.2k
        edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
822
32.2k
                        SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
823
824
32.2k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
825
826
32.2k
        if(0 != edge_idx)
827
4.49k
        {
828
4.49k
            u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
829
4.49k
        }
830
27.7k
        else
831
27.7k
        {
832
27.7k
            u1_pos_0_0_tmp_v = pu1_src[1];
833
27.7k
        }
834
32.2k
    }
835
11.1k
    else
836
11.1k
    {
837
11.1k
        u1_pos_0_0_tmp_u = pu1_src[0];
838
11.1k
        u1_pos_0_0_tmp_v = pu1_src[1];
839
11.1k
    }
840
841
    /* If bottom-right is available, process separately */
842
43.3k
    if(0 != pu1_avail[7])
843
31.2k
    {
844
31.2k
        WORD32 edge_idx;
845
846
        /* U */
847
31.2k
        edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
848
31.2k
                        SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
849
850
31.2k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
851
852
31.2k
        if(0 != edge_idx)
853
3.63k
        {
854
3.63k
            u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
855
3.63k
        }
856
27.6k
        else
857
27.6k
        {
858
27.6k
            u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
859
27.6k
        }
860
861
        /* V */
862
31.2k
        edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
863
31.2k
                        SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
864
865
31.2k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
866
867
31.2k
        if(0 != edge_idx)
868
3.43k
        {
869
3.43k
            u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
870
3.43k
        }
871
27.8k
        else
872
27.8k
        {
873
27.8k
            u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
874
27.8k
        }
875
31.2k
    }
876
12.1k
    else
877
12.1k
    {
878
12.1k
        u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
879
12.1k
        u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
880
12.1k
    }
881
882
    /* If Left is not available */
883
43.3k
    if(0 == pu1_avail[0])
884
2.84k
    {
885
2.84k
        au1_mask[0] = 0;
886
2.84k
    }
887
888
    /* If Top is not available */
889
43.3k
    if(0 == pu1_avail[2])
890
8.73k
    {
891
8.73k
        pu1_src += src_strd;
892
8.73k
        pu1_src_left_cpy += 2;
893
8.73k
        ht--;
894
268k
        for(col = 2; col < wd; col++)
895
259k
        {
896
259k
            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
897
259k
        }
898
8.73k
    }
899
34.6k
    else
900
34.6k
    {
901
1.08M
        for(col = 2; col < wd; col++)
902
1.05M
        {
903
1.05M
            pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
904
1.05M
        }
905
34.6k
    }
906
907
    /* If Right is not available */
908
43.3k
    if(0 == pu1_avail[1])
909
3.66k
    {
910
3.66k
        au1_mask[(wd - 1) >> 1] = 0;
911
3.66k
    }
912
913
    /* If Bottom is not available */
914
43.3k
    if(0 == pu1_avail[3])
915
9.02k
    {
916
9.02k
        ht--;
917
9.02k
    }
918
919
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
920
43.3k
    {
921
711k
        for(row = 0; row < ht; row++)
922
668k
        {
923
668k
            pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
924
668k
            pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
925
22.3M
            for(col = 0; col < wd; col++)
926
21.6M
            {
927
21.6M
                WORD32 edge_idx;
928
21.6M
                WORD8 *pi1_sao_offset;
929
930
21.6M
                pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
931
932
21.6M
                u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
933
21.6M
                edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
934
21.6M
                pu1_sign_up_tmp[col + 2] = -u1_sign_down;
935
936
21.6M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
937
938
21.6M
                if(0 != edge_idx)
939
1.63M
                {
940
1.63M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
941
1.63M
                }
942
21.6M
            }
943
944
            /* Swapping pu1_sign_up_tmp and pu1_sign_up */
945
668k
            {
946
668k
                WORD8 *pu1_swap_tmp = pu1_sign_up;
947
668k
                pu1_sign_up = pu1_sign_up_tmp;
948
668k
                pu1_sign_up_tmp = pu1_swap_tmp;
949
668k
            }
950
951
668k
            pu1_src += src_strd;
952
668k
        }
953
954
43.3k
        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
955
43.3k
        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
956
43.3k
        pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
957
43.3k
        pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
958
43.3k
    }
959
960
43.3k
    if(0 == pu1_avail[2])
961
8.73k
        ht++;
962
43.3k
    if(0 == pu1_avail[3])
963
9.02k
        ht++;
964
43.3k
    pu1_src_top_left[0] = au1_src_top_left_tmp[0];
965
43.3k
    pu1_src_top_left[1] = au1_src_top_left_tmp[1];
966
1.41M
    for(row = 0; row < 2 * ht; row++)
967
1.37M
    {
968
1.37M
        pu1_src_left[row] = au1_src_left_tmp[row];
969
1.37M
    }
970
1.44M
    for(col = 0; col < wd; col++)
971
1.39M
    {
972
1.39M
        pu1_src_top[col] = au1_src_top_tmp[col];
973
1.39M
    }
974
975
43.3k
}
976
977
978
979
980
/* 45 degree filtering */
981
void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
982
                                  WORD32 src_strd,
983
                                  UWORD8 *pu1_src_left,
984
                                  UWORD8 *pu1_src_top,
985
                                  UWORD8 *pu1_src_top_left,
986
                                  UWORD8 *pu1_src_top_right,
987
                                  UWORD8 *pu1_src_bot_left,
988
                                  UWORD8 *pu1_avail,
989
                                  WORD8 *pi1_sao_offset,
990
                                  WORD32 wd,
991
                                  WORD32 ht)
992
53.5k
{
993
53.5k
    WORD32 row, col;
994
53.5k
    UWORD8 au1_mask[MAX_CTB_SIZE];
995
53.5k
    UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
996
53.5k
    UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
997
53.5k
    UWORD8 u1_src_top_left_tmp;
998
53.5k
    WORD8 au1_sign_up[MAX_CTB_SIZE];
999
53.5k
    UWORD8 *pu1_src_left_cpy;
1000
53.5k
    WORD8 u1_sign_down;
1001
53.5k
    WORD32 bit_depth;
1002
1003
53.5k
    UWORD8 u1_pos_0_ht_tmp;
1004
53.5k
    UWORD8 u1_pos_wd_0_tmp;
1005
1006
53.5k
    bit_depth = BIT_DEPTH_LUMA;
1007
53.5k
    pu1_src_left_cpy = pu1_src_left;
1008
1009
    /* Initialize the mask values */
1010
53.5k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1011
1012
    /* Update left, top and top-left arrays */
1013
53.5k
    u1_src_top_left_tmp = pu1_src_top[wd - 1];
1014
1.67M
    for(row = 0; row < ht; row++)
1015
1.62M
    {
1016
1.62M
        au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
1017
1.62M
    }
1018
1.69M
    for(col = 0; col < wd; col++)
1019
1.63M
    {
1020
1.63M
        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1021
1.63M
    }
1022
1023
    /* If top-right is available, process separately */
1024
53.5k
    if(0 != pu1_avail[5])
1025
39.8k
    {
1026
39.8k
        WORD32 edge_idx;
1027
1028
39.8k
        edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
1029
39.8k
                        SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
1030
1031
39.8k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1032
1033
39.8k
        if(0 != edge_idx)
1034
10.5k
        {
1035
10.5k
            u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1036
10.5k
        }
1037
29.2k
        else
1038
29.2k
        {
1039
29.2k
            u1_pos_wd_0_tmp = pu1_src[wd - 1];
1040
29.2k
        }
1041
39.8k
    }
1042
13.7k
    else
1043
13.7k
    {
1044
13.7k
        u1_pos_wd_0_tmp = pu1_src[wd - 1];
1045
13.7k
    }
1046
1047
    /* If bottom-left is available, process separately */
1048
53.5k
    if(0 != pu1_avail[6])
1049
38.8k
    {
1050
38.8k
        WORD32 edge_idx;
1051
1052
38.8k
        edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
1053
38.8k
                        SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1054
1055
38.8k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1056
1057
38.8k
        if(0 != edge_idx)
1058
11.2k
        {
1059
11.2k
            u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1060
11.2k
        }
1061
27.6k
        else
1062
27.6k
        {
1063
27.6k
            u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1064
27.6k
        }
1065
38.8k
    }
1066
14.7k
    else
1067
14.7k
    {
1068
14.7k
        u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
1069
14.7k
    }
1070
1071
    /* If Left is not available */
1072
53.5k
    if(0 == pu1_avail[0])
1073
6.73k
    {
1074
6.73k
        au1_mask[0] = 0;
1075
6.73k
    }
1076
1077
    /* If Top is not available */
1078
53.5k
    if(0 == pu1_avail[2])
1079
11.1k
    {
1080
11.1k
        pu1_src += src_strd;
1081
11.1k
        ht--;
1082
11.1k
        pu1_src_left_cpy += 1;
1083
349k
        for(col = 0; col < wd - 1; col++)
1084
338k
        {
1085
338k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
1086
338k
        }
1087
11.1k
    }
1088
42.3k
    else
1089
42.3k
    {
1090
1.28M
        for(col = 0; col < wd - 1; col++)
1091
1.24M
        {
1092
1.24M
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
1093
1.24M
        }
1094
42.3k
    }
1095
1096
    /* If Right is not available */
1097
53.5k
    if(0 == pu1_avail[1])
1098
3.28k
    {
1099
3.28k
        au1_mask[wd - 1] = 0;
1100
3.28k
    }
1101
1102
    /* If Bottom is not available */
1103
53.5k
    if(0 == pu1_avail[3])
1104
9.25k
    {
1105
9.25k
        ht--;
1106
9.25k
    }
1107
1108
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1109
53.5k
    {
1110
1.65M
        for(row = 0; row < ht; row++)
1111
1.60M
        {
1112
1.60M
            au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
1113
52.2M
            for(col = 0; col < wd; col++)
1114
50.6M
            {
1115
50.6M
                WORD32 edge_idx;
1116
1117
50.6M
                u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
1118
50.6M
                                                                 pu1_src[col - 1 + src_strd]));
1119
50.6M
                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1120
50.6M
                if(col > 0)
1121
49.0M
                    au1_sign_up[col - 1] = -u1_sign_down;
1122
1123
50.6M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
1124
1125
50.6M
                if(0 != edge_idx)
1126
12.1M
                {
1127
12.1M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1128
12.1M
                }
1129
50.6M
            }
1130
1131
1.60M
            pu1_src += src_strd;
1132
1.60M
        }
1133
1134
53.5k
        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
1135
53.5k
        pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp;
1136
53.5k
    }
1137
1138
53.5k
    if(0 == pu1_avail[2])
1139
11.1k
        ht++;
1140
53.5k
    if(0 == pu1_avail[3])
1141
9.25k
        ht++;
1142
53.5k
    *pu1_src_top_left = u1_src_top_left_tmp;
1143
1.67M
    for(row = 0; row < ht; row++)
1144
1.62M
    {
1145
1.62M
        pu1_src_left[row] = au1_src_left_tmp[row];
1146
1.62M
    }
1147
1.68M
    for(col = 0; col < wd; col++)
1148
1.63M
    {
1149
1.63M
        pu1_src_top[col] = au1_src_top_tmp[col];
1150
1.63M
    }
1151
1152
53.5k
}
1153
1154
1155
1156
1157
void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
1158
                                         WORD32 src_strd,
1159
                                         UWORD8 *pu1_src_left,
1160
                                         UWORD8 *pu1_src_top,
1161
                                         UWORD8 *pu1_src_top_left,
1162
                                         UWORD8 *pu1_src_top_right,
1163
                                         UWORD8 *pu1_src_bot_left,
1164
                                         UWORD8 *pu1_avail,
1165
                                         WORD8 *pi1_sao_offset_u,
1166
                                         WORD8 *pi1_sao_offset_v,
1167
                                         WORD32 wd,
1168
                                         WORD32 ht)
1169
41.9k
{
1170
41.9k
    WORD32 row, col;
1171
41.9k
    UWORD8 au1_mask[MAX_CTB_SIZE];
1172
41.9k
    UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
1173
41.9k
    UWORD8 au1_src_top_left_tmp[2];
1174
41.9k
    WORD8 au1_sign_up[MAX_CTB_SIZE];
1175
41.9k
    UWORD8 *pu1_src_left_cpy;
1176
41.9k
    WORD8 u1_sign_down;
1177
41.9k
    WORD32 bit_depth;
1178
1179
41.9k
    UWORD8 u1_pos_wd_0_tmp_u;
1180
41.9k
    UWORD8 u1_pos_wd_0_tmp_v;
1181
41.9k
    UWORD8 u1_pos_0_ht_tmp_u;
1182
41.9k
    UWORD8 u1_pos_0_ht_tmp_v;
1183
1184
41.9k
    bit_depth = BIT_DEPTH_CHROMA;
1185
41.9k
    pu1_src_left_cpy = pu1_src_left;
1186
1187
    /* Initialize the mask values */
1188
41.9k
    memset(au1_mask, 0xFF, MAX_CTB_SIZE);
1189
1190
    /* Update left, top and top-left arrays */
1191
41.9k
    au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
1192
41.9k
    au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
1193
676k
    for(row = 0; row < ht; row++)
1194
634k
    {
1195
634k
        au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
1196
634k
        au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
1197
634k
    }
1198
1.30M
    for(col = 0; col < wd; col++)
1199
1.26M
    {
1200
1.26M
        au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
1201
1.26M
    }
1202
1203
1204
    /* If top-right is available, process separately */
1205
41.9k
    if(0 != pu1_avail[5])
1206
30.8k
    {
1207
30.8k
        WORD32 edge_idx;
1208
1209
        /* U */
1210
30.8k
        edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
1211
30.8k
                        SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
1212
1213
30.8k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1214
1215
30.8k
        if(0 != edge_idx)
1216
5.01k
        {
1217
5.01k
            u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1218
5.01k
        }
1219
25.7k
        else
1220
25.7k
        {
1221
25.7k
            u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1222
25.7k
        }
1223
1224
        /* V */
1225
30.8k
        edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
1226
30.8k
                        SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
1227
1228
30.8k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1229
1230
30.8k
        if(0 != edge_idx)
1231
4.39k
        {
1232
4.39k
            u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1233
4.39k
        }
1234
26.4k
        else
1235
26.4k
        {
1236
26.4k
            u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1237
26.4k
        }
1238
30.8k
    }
1239
11.1k
    else
1240
11.1k
    {
1241
11.1k
        u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
1242
11.1k
        u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
1243
11.1k
    }
1244
1245
    /* If bottom-left is available, process separately */
1246
41.9k
    if(0 != pu1_avail[6])
1247
31.1k
    {
1248
31.1k
        WORD32 edge_idx;
1249
1250
        /* U */
1251
31.1k
        edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
1252
31.1k
                        SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
1253
1254
31.1k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1255
1256
31.1k
        if(0 != edge_idx)
1257
5.09k
        {
1258
5.09k
            u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
1259
5.09k
        }
1260
26.0k
        else
1261
26.0k
        {
1262
26.0k
            u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1263
26.0k
        }
1264
1265
        /* V */
1266
31.1k
        edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
1267
31.1k
                        SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
1268
1269
31.1k
        edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
1270
1271
31.1k
        if(0 != edge_idx)
1272
4.36k
        {
1273
4.36k
            u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
1274
4.36k
        }
1275
26.7k
        else
1276
26.7k
        {
1277
26.7k
            u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1278
26.7k
        }
1279
31.1k
    }
1280
10.7k
    else
1281
10.7k
    {
1282
10.7k
        u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
1283
10.7k
        u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
1284
10.7k
    }
1285
1286
    /* If Left is not available */
1287
41.9k
    if(0 == pu1_avail[0])
1288
2.63k
    {
1289
2.63k
        au1_mask[0] = 0;
1290
2.63k
    }
1291
1292
    /* If Top is not available */
1293
41.9k
    if(0 == pu1_avail[2])
1294
8.11k
    {
1295
8.11k
        pu1_src += src_strd;
1296
8.11k
        ht--;
1297
8.11k
        pu1_src_left_cpy += 2;
1298
227k
        for(col = 0; col < wd - 2; col++)
1299
219k
        {
1300
219k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
1301
219k
        }
1302
8.11k
    }
1303
33.8k
    else
1304
33.8k
    {
1305
993k
        for(col = 0; col < wd - 2; col++)
1306
959k
        {
1307
959k
            au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
1308
959k
        }
1309
33.8k
    }
1310
1311
    /* If Right is not available */
1312
41.9k
    if(0 == pu1_avail[1])
1313
3.86k
    {
1314
3.86k
        au1_mask[(wd - 1) >> 1] = 0;
1315
3.86k
    }
1316
1317
    /* If Bottom is not available */
1318
41.9k
    if(0 == pu1_avail[3])
1319
8.70k
    {
1320
8.70k
        ht--;
1321
8.70k
    }
1322
1323
    /* Processing is done on the intermediate buffer and the output is written to the source buffer */
1324
41.9k
    {
1325
658k
        for(row = 0; row < ht; row++)
1326
617k
        {
1327
617k
            au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
1328
617k
            au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
1329
19.7M
            for(col = 0; col < wd; col++)
1330
19.1M
            {
1331
19.1M
                WORD32 edge_idx;
1332
19.1M
                WORD8 *pi1_sao_offset;
1333
1334
19.1M
                pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
1335
1336
19.1M
                u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
1337
19.1M
                                                                pu1_src[col - 2 + src_strd]));
1338
19.1M
                edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
1339
19.1M
                if(col > 1)
1340
17.8M
                    au1_sign_up[col - 2] = -u1_sign_down;
1341
1342
19.1M
                edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
1343
1344
19.1M
                if(0 != edge_idx)
1345
1.88M
                {
1346
1.88M
                    pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
1347
1.88M
                }
1348
19.1M
            }
1349
1350
617k
            pu1_src += src_strd;
1351
617k
        }
1352
1353
41.9k
        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
1354
41.9k
        pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
1355
41.9k
        pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
1356
41.9k
        pu1_src[(pu1_avail[3] ?  (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
1357
41.9k
    }
1358
1359
41.9k
    if(0 == pu1_avail[2])
1360
8.11k
        ht++;
1361
41.9k
    if(0 == pu1_avail[3])
1362
8.70k
        ht++;
1363
41.9k
    pu1_src_top_left[0] = au1_src_top_left_tmp[0];
1364
41.9k
    pu1_src_top_left[1] = au1_src_top_left_tmp[1];
1365
1.31M
    for(row = 0; row < 2 * ht; row++)
1366
1.26M
    {
1367
1.26M
        pu1_src_left[row] = au1_src_left_tmp[row];
1368
1.26M
    }
1369
1.30M
    for(col = 0; col < wd; col++)
1370
1.26M
    {
1371
1.26M
        pu1_src_top[col] = au1_src_top_tmp[col];
1372
1.26M
    }
1373
1374
41.9k
}