Coverage Report

Created: 2025-12-29 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_boundary_strength.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_boundary_strength.c
22
 *
23
 * @brief
24
 *  Contains functions for computing boundary strength
25
 *
26
 * @author
27
 *  Harish
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
/*****************************************************************************/
37
/* File Includes                                                             */
38
/*****************************************************************************/
39
#include <stdio.h>
40
#include <stddef.h>
41
#include <stdlib.h>
42
#include <string.h>
43
44
#include "ihevc_typedefs.h"
45
#include "iv.h"
46
#include "ivd.h"
47
#include "ihevcd_cxa.h"
48
#include "ithread.h"
49
50
#include "ihevc_defs.h"
51
#include "ihevc_debug.h"
52
#include "ihevc_defs.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
#include "ihevc_cabac_tables.h"
57
58
#include "ihevc_error.h"
59
#include "ihevc_common_tables.h"
60
61
#include "ihevcd_trace.h"
62
#include "ihevcd_defs.h"
63
#include "ihevcd_function_selector.h"
64
#include "ihevcd_structs.h"
65
#include "ihevcd_error.h"
66
#include "ihevcd_nal.h"
67
#include "ihevcd_bitstream.h"
68
#include "ihevcd_job_queue.h"
69
#include "ihevcd_utils.h"
70
#include "ihevcd_profile.h"
71
72
/*****************************************************************************/
73
/* Function Prototypes                                                       */
74
/*****************************************************************************/
75
76
77
#define SET_NGBHR_ALL_AVAIL(avail)          avail = 0x1F;
78
79
#define SET_NGBHR_BOTLEFT_NOTAVAIL(avail)   avail &= ~0x10;
80
#define SET_NGBHR_LEFT_NOTAVAIL(avail)      avail &= ~0x8;
81
#define SET_NGBHR_TOPLEFT_NOTAVAIL(avail)   avail &= ~0x4;
82
#define SET_NGBHR_TOP_NOTAVAIL(avail)       avail &= ~0x2;
83
#define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail)  avail &= ~0x1;
84
85
WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
86
                                   pu_t *ps_ngbr_pu)
87
30.7M
{
88
30.7M
    WORD32 i4_bs;
89
30.7M
    UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
90
30.7M
    UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
91
92
30.7M
    WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
93
30.7M
    WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
94
95
30.7M
    WORD32 num_mv, ngbr_num_mv;
96
97
30.7M
    num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
98
30.7M
    ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
99
100
30.7M
    l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
101
30.7M
    l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
102
30.7M
    ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
103
30.7M
    ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
104
105
106
30.7M
    i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
107
30.7M
    i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
108
30.7M
    i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
109
30.7M
    i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
110
111
30.7M
    i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
112
30.7M
    i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
113
30.7M
    i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
114
30.7M
    i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
115
116
117
    /* If two motion vectors are used */
118
30.7M
    if((2 == num_mv) &&
119
12.8M
            (2 == ngbr_num_mv))
120
11.9M
    {
121
11.9M
        if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
122
119k
                (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
123
11.8M
        {
124
11.8M
            if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
125
1.49M
            {
126
1.49M
                if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
127
1.47M
                {
128
1.47M
                    i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
129
1.39M
                            (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
130
1.36M
                            (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
131
1.35M
                            (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
132
1.47M
                }
133
15.3k
                else
134
15.3k
                {
135
15.3k
                    i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
136
9.35k
                            (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
137
7.21k
                            (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
138
10.3k
                            (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
139
15.3k
                }
140
1.49M
            }
141
10.3M
            else /* Same L0 and L1 */
142
10.3M
            {
143
10.3M
                i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
144
10.2M
                         (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
145
10.2M
                         (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
146
10.2M
                         (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
147
146k
                                ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
148
82.9k
                                 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
149
39.1k
                                 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
150
10.2M
                                 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
151
10.3M
            }
152
11.8M
        }
153
101k
        else /* If the reference pictures used are different */
154
101k
        {
155
101k
            i4_bs = 1;
156
101k
        }
157
11.9M
    }
158
159
    /* If one motion vector is used in both PUs */
160
18.8M
    else if((1 == num_mv) &&
161
17.9M
            (1 == ngbr_num_mv))
162
16.9M
    {
163
16.9M
        WORD16 i2_mv_x, i2_mv_y;
164
16.9M
        WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
165
16.9M
        UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
166
167
16.9M
        if(PRED_L0 == ps_pu->b2_pred_mode)
168
15.8M
        {
169
15.8M
            i2_mv_x = i2_mv_x0;
170
15.8M
            i2_mv_y = i2_mv_y0;
171
15.8M
            ref_pic_buf_id = l0_ref_pic_buf_id;
172
15.8M
        }
173
1.07M
        else
174
1.07M
        {
175
1.07M
            i2_mv_x = i2_mv_x1;
176
1.07M
            i2_mv_y = i2_mv_y1;
177
1.07M
            ref_pic_buf_id = l1_ref_pic_buf_id;
178
1.07M
        }
179
180
16.9M
        if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
181
15.8M
        {
182
15.8M
            i2_ngbr_mv_x = i2_ngbr_mv_x0;
183
15.8M
            i2_ngbr_mv_y = i2_ngbr_mv_y0;
184
15.8M
            ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
185
15.8M
        }
186
1.08M
        else
187
1.08M
        {
188
1.08M
            i2_ngbr_mv_x = i2_ngbr_mv_x1;
189
1.08M
            i2_ngbr_mv_y = i2_ngbr_mv_y1;
190
1.08M
            ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
191
1.08M
        }
192
193
16.9M
        i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
194
16.4M
                (ABS(i2_mv_x - i2_ngbr_mv_x) < 4)  &&
195
15.7M
                (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
196
16.9M
    }
197
198
    /* If the no. of motion vectors is not the same */
199
1.90M
    else
200
1.90M
    {
201
1.90M
        i4_bs = 1;
202
1.90M
    }
203
204
205
30.7M
    return i4_bs;
206
30.7M
}
207
208
/* QP is also populated in the same function */
209
WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
210
1.59M
{
211
1.59M
    pps_t *ps_pps;
212
1.59M
    sps_t *ps_sps;
213
1.59M
    tu_t *ps_tu;
214
1.59M
    UWORD32 *pu4_vert_bs;
215
1.59M
    UWORD32 *pu4_horz_bs;
216
1.59M
    WORD32 bs_strd;
217
1.59M
    WORD32 vert_bs0_tmp;
218
1.59M
    WORD32 horz_bs0_tmp;
219
1.59M
    UWORD8 *pu1_qp;
220
1.59M
    WORD32 qp_strd;
221
1.59M
    UWORD32 u4_qp_const_in_ctb;
222
1.59M
    WORD32 ctb_indx;
223
1.59M
    WORD32 i4_tu_cnt;
224
1.59M
    WORD32 log2_ctb_size;
225
1.59M
    WORD32 ctb_size;
226
227
1.59M
    WORD8 i1_loop_filter_across_tiles_enabled_flag;
228
1.59M
    WORD8 i1_loop_filter_across_slices_enabled_flag;
229
230
1.59M
    WORD32 i;
231
232
1.59M
    PROFILE_DISABLE_BOUNDARY_STRENGTH();
233
234
1.59M
    ps_pps = ps_bs_ctxt->ps_pps;
235
1.59M
    ps_sps = ps_bs_ctxt->ps_sps;
236
1.59M
    i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
237
1.59M
    i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
238
1.59M
    i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
239
240
1.59M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
241
1.59M
    ctb_size = (1 << log2_ctb_size);
242
243
    /* strides are in units of number of bytes */
244
    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
245
1.59M
    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
246
247
1.59M
    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
248
1.59M
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
249
1.59M
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
250
1.59M
    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
251
1.59M
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
252
1.59M
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
253
254
    /* ctb_size/8 elements per CTB */
255
1.59M
    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
256
1.59M
    pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
257
258
1.59M
    ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
259
1.59M
    u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
260
261
1.59M
    vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
262
1.59M
    horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
263
264
    /* ctb_size/8 is the number of edges per CTB
265
     * ctb_size/4 is the number of BS values needed per edge
266
     * divided by 8 for the number of bytes
267
     * 2 is the number of bits needed for each BS value */
268
/*
269
    memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
270
    memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
271
*/
272
1.59M
    memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
273
1.59M
    memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
274
275
    /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
276
1.59M
    if(0 != ps_bs_ctxt->i4_ctb_x)
277
1.54M
    {
278
1.54M
        pu4_vert_bs[0] |= vert_bs0_tmp;
279
1.54M
    }
280
281
    /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
282
1.59M
    if(0 != ps_bs_ctxt->i4_ctb_y)
283
1.41M
    {
284
1.41M
        pu4_horz_bs[0] |= horz_bs0_tmp;
285
1.41M
    }
286
287
1.59M
    ps_tu = ps_bs_ctxt->ps_tu;
288
289
    /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
290
1.59M
    if(u4_qp_const_in_ctb)
291
1.58M
        pu1_qp[0] = ps_tu->b7_qp;
292
293
60.7M
    for(i = 0; i < i4_tu_cnt; i++)
294
59.1M
    {
295
59.1M
        WORD32 start_pos_x;
296
59.1M
        WORD32 start_pos_y;
297
59.1M
        WORD32 tu_size;
298
299
300
59.1M
        UWORD32 u4_bs;
301
59.1M
        ps_tu = ps_bs_ctxt->ps_tu + i;
302
303
        /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
304
59.1M
        start_pos_x = ps_tu->b4_pos_x;
305
59.1M
        start_pos_y = ps_tu->b4_pos_y;
306
307
59.1M
        tu_size = 1 << (ps_tu->b3_size + 2);
308
59.1M
        tu_size >>= 2; /* TU size divided by 4 */
309
310
59.1M
        u4_bs = DUP_LSB_10(tu_size);
311
312
        /* Only if the current edge falls on 8 pixel grid set BS */
313
59.1M
        if(0 == (start_pos_x & 1))
314
58.9M
        {
315
58.9M
            WORD32 shift;
316
58.9M
            shift = start_pos_y * 2;
317
            /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
318
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
319
             *  and deblocking is done on 8x8 grid
320
             */
321
58.9M
            if(6 != log2_ctb_size)
322
591k
                shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
323
58.9M
            pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
324
58.9M
        }
325
        /* Only if the current edge falls on 8 pixel grid set BS */
326
59.1M
        if(0 == (start_pos_y & 1))
327
58.9M
        {
328
58.9M
            WORD32 shift;
329
58.9M
            shift = start_pos_x * 2;
330
            /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
331
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
332
             *  and deblocking is done on 8x8 grid
333
             */
334
58.9M
            if(6 != log2_ctb_size)
335
591k
                shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
336
58.9M
            pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
337
58.9M
        }
338
339
        /* Populating the QP array */
340
59.1M
        if(0 == u4_qp_const_in_ctb)
341
271k
        {
342
271k
            if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
343
135k
            {
344
135k
                WORD32 row, col;
345
323k
                for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
346
188k
                {
347
550k
                    for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
348
362k
                    {
349
362k
                        pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
350
362k
                    }
351
188k
                }
352
135k
            }
353
271k
        }
354
355
59.1M
    }
356
1.59M
    {
357
        /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
358
1.59M
        UWORD32 ctb_addr;
359
1.59M
        WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
360
        /* If left neighbor is not available, then set BS for entire first column to zero */
361
1.59M
        if(!ps_pps->i1_tiles_enabled_flag)
362
1.56M
        {
363
1.56M
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
364
1.52M
                            (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
365
1.52M
                            (0 == ps_bs_ctxt->i4_ctb_x))
366
43.9k
            {
367
43.9k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
368
43.9k
            }
369
1.56M
        }
370
26.0k
        else
371
26.0k
        {
372
            //If across-tiles is disabled
373
26.0k
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
374
2.97k
            {
375
2.97k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
376
2.97k
            }
377
23.1k
            else
378
23.1k
            {
379
23.1k
                ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
380
23.1k
                slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
381
23.1k
                if(ps_bs_ctxt->i4_ctb_x)
382
22.5k
                {
383
22.5k
                    ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
384
22.5k
                    left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
385
22.5k
                }
386
                /*If the 1st slice in a new tile is a dependent slice*/
387
23.1k
                if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
388
23.0k
                {
389
                    /* Removed reduntant checks */
390
23.0k
                    if((0 == i1_loop_filter_across_slices_enabled_flag && (
391
20.3k
                                    ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
392
19.9k
                                    ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
393
22.5k
                                    (0 == ps_bs_ctxt->i4_ctb_x))
394
551
                    {
395
551
                        pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
396
551
                    }
397
23.0k
                }
398
23.1k
            }
399
26.0k
        }
400
401
1.59M
        ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
402
1.59M
        slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
403
1.59M
        if(ps_bs_ctxt->i4_ctb_y)
404
1.41M
        {
405
1.41M
            ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
406
1.41M
            top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
407
1.41M
        }
408
409
        /* If top neighbor is not available, then set BS for entire first row to zero */
410
        /* Removed reduntant checks */
411
1.59M
        if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
412
1.41M
                        || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
413
1.41M
                        || (0 == ps_bs_ctxt->i4_ctb_y))
414
181k
        {
415
181k
            pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
416
181k
        }
417
1.59M
    }
418
419
    /**
420
     *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
421
     *   (They might have been set to  non zero values because of CBF of the current CTB)
422
     *   This block might not be needed for I slices*/
423
1.59M
    {
424
1.59M
        WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
425
1.59M
        WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
426
1.59M
        if(num_rows_remaining < (ctb_size >> 3))
427
180k
        {
428
            /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
429
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
430
             *  and deblocking is done on 8x8 grid
431
             */
432
180k
            WORD32 offset;
433
180k
            offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
434
180k
            if(6 != log2_ctb_size)
435
2.41k
                offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
436
437
180k
            memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
438
180k
        }
439
440
1.59M
        if(num_cols_remaining < (ctb_size >> 3))
441
36.4k
        {
442
            /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
443
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
444
             *  and deblocking is done on 8x8 grid
445
             */
446
447
36.4k
            WORD32 offset;
448
36.4k
            offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
449
36.4k
            if(6 != log2_ctb_size)
450
7.72k
                offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
451
452
36.4k
            memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
453
36.4k
        }
454
1.59M
    }
455
456
1.59M
    return 0;
457
1.59M
}
458
WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
459
681k
{
460
681k
    sps_t *ps_sps;
461
681k
    pps_t *ps_pps;
462
681k
    WORD32 cur_ctb_idx, next_ctb_idx = 0;
463
681k
    WORD32 i4_tu_cnt;
464
681k
    WORD32 i4_pu_cnt;
465
681k
    tu_t *ps_tu;
466
467
681k
    UWORD32 *pu4_vert_bs;
468
681k
    UWORD32 *pu4_horz_bs;
469
681k
    WORD32 bs_strd;
470
681k
    WORD32 vert_bs0_tmp;
471
681k
    WORD32 horz_bs0_tmp;
472
681k
    UWORD8 *pu1_qp;
473
681k
    WORD32 qp_strd;
474
681k
    UWORD32 u4_qp_const_in_ctb;
475
681k
    WORD32 ctb_indx;
476
681k
    WORD32 log2_ctb_size;
477
681k
    WORD32 ctb_size;
478
479
681k
    WORD32 i;
480
681k
    WORD8 i1_loop_filter_across_tiles_enabled_flag;
481
681k
    WORD8 i1_loop_filter_across_slices_enabled_flag;
482
483
681k
    PROFILE_DISABLE_BOUNDARY_STRENGTH();
484
485
681k
    ps_sps = ps_bs_ctxt->ps_sps;
486
681k
    ps_pps = ps_bs_ctxt->ps_pps;
487
488
681k
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
489
681k
    ctb_size = (1 << log2_ctb_size);
490
491
    /* strides are in units of number of bytes */
492
    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
493
681k
    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
494
495
681k
    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
496
681k
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
497
681k
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
498
681k
    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
499
681k
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
500
681k
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
501
502
681k
    vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
503
681k
    horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
504
505
681k
    ps_tu = ps_bs_ctxt->ps_tu;
506
507
    /* ctb_size/8 elements per CTB */
508
681k
    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
509
681k
    pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
510
511
681k
    ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
512
681k
    u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
513
514
681k
    i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
515
681k
    i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
516
517
    /* ctb_size/8 is the number of edges per CTB
518
     * ctb_size/4 is the number of BS values needed per edge
519
     * divided by 8 for the number of bytes
520
     * 2 is the number of bits needed for each BS value */
521
/*
522
    memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
523
    memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
524
*/
525
681k
    memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
526
681k
    memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
527
528
    /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
529
681k
    if(0 != ps_bs_ctxt->i4_ctb_x)
530
633k
    {
531
633k
        pu4_vert_bs[0] |= vert_bs0_tmp;
532
633k
    }
533
534
    /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
535
681k
    if(0 != ps_bs_ctxt->i4_ctb_y)
536
603k
    {
537
603k
        pu4_horz_bs[0] |= horz_bs0_tmp;
538
603k
    }
539
    /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
540
681k
    *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
541
542
681k
    cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
543
681k
                    + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
544
681k
    next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
545
681k
    if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
546
150k
    {
547
150k
        i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
548
150k
    }
549
531k
    else
550
531k
    {
551
531k
        i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
552
531k
    }
553
554
681k
    ps_tu = ps_bs_ctxt->ps_tu;
555
681k
    if(u4_qp_const_in_ctb)
556
669k
        pu1_qp[0] = ps_tu->b7_qp;
557
558
    /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
559
16.5M
    for(i = 0; i < i4_tu_cnt; i++)
560
15.8M
    {
561
15.8M
        WORD32 start_pos_x;
562
15.8M
        WORD32 start_pos_y;
563
15.8M
        WORD32 end_pos_x;
564
15.8M
        WORD32 end_pos_y;
565
15.8M
        WORD32 tu_size;
566
15.8M
        UWORD32 u4_bs;
567
15.8M
        WORD32 intra_flag;
568
15.8M
        UWORD8 *pu1_pic_intra_flag;
569
570
15.8M
        ps_tu = ps_bs_ctxt->ps_tu + i;
571
572
15.8M
        start_pos_x = ps_tu->b4_pos_x;
573
15.8M
        start_pos_y = ps_tu->b4_pos_y;
574
575
15.8M
        tu_size = 1 << (ps_tu->b3_size + 2);
576
15.8M
        tu_size >>= 2;
577
578
15.8M
        end_pos_x = start_pos_x + tu_size;
579
15.8M
        end_pos_y = start_pos_y + tu_size;
580
581
15.8M
        {
582
15.8M
            WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
583
15.8M
            WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
584
585
15.8M
            WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
586
587
15.8M
            pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
588
15.8M
            pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
589
15.8M
            pu1_pic_intra_flag += (tu_abs_x >> 6);
590
591
15.8M
            intra_flag = *pu1_pic_intra_flag;
592
15.8M
            intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
593
15.8M
        }
594
15.8M
        if(intra_flag)
595
528k
        {
596
528k
            u4_bs = DUP_LSB_10(tu_size);
597
598
            /* Only if the current edge falls on 8 pixel grid set BS */
599
528k
            if(0 == (start_pos_x & 1))
600
315k
            {
601
315k
                WORD32 shift;
602
315k
                shift = start_pos_y * 2;
603
                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
604
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
605
                 *  and deblocking is done on 8x8 grid
606
                 */
607
315k
                if(6 != log2_ctb_size)
608
13.1k
                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
609
315k
                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
610
315k
            }
611
            /* Only if the current edge falls on 8 pixel grid set BS */
612
528k
            if(0 == (start_pos_y & 1))
613
315k
            {
614
315k
                WORD32 shift;
615
315k
                shift = start_pos_x * 2;
616
                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
617
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
618
                 *  and deblocking is done on 8x8 grid
619
                 */
620
315k
                if(6 != log2_ctb_size)
621
13.1k
                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
622
315k
                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
623
315k
            }
624
528k
        }
625
626
627
        /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
628
15.8M
        if(ps_tu->b1_y_cbf)
629
811k
        {
630
811k
            u4_bs = DUP_LSB_01(tu_size);
631
632
            /* Only if the current edge falls on 8 pixel grid set BS */
633
811k
            if(0 == (start_pos_x & 1))
634
564k
            {
635
564k
                WORD32 shift;
636
564k
                shift = start_pos_y * 2;
637
                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
638
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
639
                 *  and deblocking is done on 8x8 grid
640
                 */
641
564k
                if(6 != log2_ctb_size)
642
8.70k
                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
643
564k
                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
644
564k
            }
645
            /* Only if the current edge falls on 8 pixel grid set BS */
646
811k
            if(0 == (start_pos_y & 1))
647
564k
            {
648
564k
                WORD32 shift;
649
564k
                shift = start_pos_x * 2;
650
                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
651
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
652
                 *  and deblocking is done on 8x8 grid
653
                 */
654
564k
                if(6 != log2_ctb_size)
655
8.48k
                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
656
564k
                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
657
564k
            }
658
            /* Only if the current edge falls on 8 pixel grid set BS */
659
811k
            if(0 == (end_pos_x & 1))
660
563k
            {
661
563k
                if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
662
559k
                {
663
559k
                    WORD32 shift;
664
559k
                    shift = start_pos_y * 2;
665
559k
                    shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
666
559k
                    pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
667
559k
                }
668
563k
            }
669
            /* Only if the current edge falls on 8 pixel grid set BS */
670
811k
            if(0 == (end_pos_y & 1))
671
564k
            {
672
                /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
673
564k
                if(ctb_size / 8 == (end_pos_y >> 1))
674
91.4k
                {
675
91.4k
                    *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
676
91.4k
                }
677
473k
                else
678
473k
                {
679
473k
                    WORD32 shift;
680
473k
                    shift = start_pos_x * 2;
681
473k
                    shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
682
473k
                    pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
683
473k
                }
684
564k
            }
685
811k
        }
686
687
15.8M
        if(0 == u4_qp_const_in_ctb)
688
360k
        {
689
360k
            if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
690
267k
            {
691
267k
                WORD32 row, col;
692
608k
                for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
693
341k
                {
694
898k
                    for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
695
557k
                    {
696
557k
                        pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
697
557k
                    }
698
341k
                }
699
267k
            }
700
360k
        }
701
15.8M
    }
702
703
    /* For all PUs in the CTB,
704
    For left and top edges, compute BS */
705
706
681k
    cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
707
681k
                    + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
708
709
681k
    {
710
681k
        WORD32 next_ctb_idx;
711
681k
        next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
712
681k
        i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
713
681k
    }
714
715
16.1M
    for(i = 0; i < i4_pu_cnt; i++)
716
15.4M
    {
717
15.4M
        WORD32 start_pos_x;
718
15.4M
        WORD32 start_pos_y;
719
15.4M
        WORD32 end_pos_x;
720
15.4M
        WORD32 end_pos_y;
721
15.4M
        WORD32 pu_wd, pu_ht;
722
15.4M
        UWORD32 u4_bs;
723
15.4M
        pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
724
15.4M
        pu_t *ps_ngbr_pu;
725
15.4M
        UWORD32 u4_ngbr_pu_indx;
726
727
15.4M
        start_pos_x = ps_pu->b4_pos_x;
728
15.4M
        start_pos_y = ps_pu->b4_pos_y;
729
730
15.4M
        pu_wd = (ps_pu->b4_wd + 1);
731
15.4M
        pu_ht = (ps_pu->b4_ht + 1);
732
733
15.4M
        end_pos_x = start_pos_x + pu_wd;
734
15.4M
        end_pos_y = start_pos_y + pu_ht;
735
736
        /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
737
        /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
738
15.4M
        if(ps_pu->b1_intra_flag)
739
167k
        {
740
167k
            u4_bs = DUP_LSB_10(pu_ht);
741
742
            /* Only if the current edge falls on 8 pixel grid set BS */
743
167k
            if(0 == (start_pos_x & 1))
744
167k
            {
745
167k
                WORD32 shift;
746
167k
                shift = start_pos_y * 2;
747
                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
748
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
749
                 *  and deblocking is done on 8x8 grid
750
                 */
751
167k
                if(6 != log2_ctb_size)
752
2.92k
                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
753
167k
                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
754
167k
            }
755
756
167k
            u4_bs = DUP_LSB_10(pu_wd);
757
758
            /* Only if the current edge falls on 8 pixel grid set BS */
759
167k
            if(0 == (start_pos_y & 1))
760
167k
            {
761
167k
                WORD32 shift;
762
167k
                shift = start_pos_x * 2;
763
                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
764
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
765
                 *  and deblocking is done on 8x8 grid
766
                 */
767
167k
                if(6 != log2_ctb_size)
768
2.92k
                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
769
167k
                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
770
167k
            }
771
167k
        }
772
773
15.2M
        else
774
15.2M
        {
775
            /* Vertical edge */
776
            /* Process only if the edge is not a frame edge */
777
15.2M
            if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
778
15.1M
            {
779
15.1M
                do
780
15.5M
                {
781
15.5M
                    WORD32 pu_ngbr_ht;
782
15.5M
                    WORD32 min_pu_ht;
783
15.5M
                    WORD32 ngbr_end_pos_y;
784
15.5M
                    UWORD32 ngbr_pu_idx_strd;
785
15.5M
                    ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
786
15.5M
                    u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
787
15.5M
                    ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
788
789
15.5M
                    pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
790
15.5M
                    ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
791
792
15.5M
                    min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
793
794
15.5M
                    if(ps_ngbr_pu->b1_intra_flag)
795
92.5k
                    {
796
92.5k
                        u4_bs = DUP_LSB_10(min_pu_ht);
797
798
                        /* Only if the current edge falls on 8 pixel grid set BS */
799
92.5k
                        if(0 == (start_pos_x & 1))
800
92.5k
                        {
801
92.5k
                            WORD32 shift;
802
92.5k
                            shift = start_pos_y * 2;
803
                            /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
804
                             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
805
                             *  and deblocking is done on 8x8 grid
806
                             */
807
92.5k
                            if(6 != log2_ctb_size)
808
3.20k
                                shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
809
92.5k
                            pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
810
92.5k
                        }
811
92.5k
                    }
812
15.4M
                    else
813
15.4M
                    {
814
15.4M
                        u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
815
15.4M
                        if(u4_bs)
816
1.84M
                        {
817
1.84M
                            u4_bs = DUP_LSB_01(min_pu_ht);
818
1.84M
                            if(0 == (start_pos_x & 1))
819
1.68M
                            {
820
1.68M
                                WORD32 shift;
821
1.68M
                                shift = start_pos_y * 2;
822
                                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
823
                                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
824
                                 *  and deblocking is done on 8x8 grid
825
                                 */
826
1.68M
                                if(6 != log2_ctb_size)
827
51.3k
                                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
828
1.68M
                                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
829
1.68M
                            }
830
1.84M
                        }
831
15.4M
                    }
832
833
15.5M
                    pu_ht -= min_pu_ht;
834
15.5M
                    start_pos_y += min_pu_ht;
835
15.5M
                }while(pu_ht > 0);
836
837
                /* Reinitialising since the values are updated in the previous loop */
838
15.1M
                pu_ht = ps_pu->b4_ht + 1;
839
15.1M
                start_pos_y = ps_pu->b4_pos_y;
840
15.1M
            }
841
842
            /* Horizontal edge */
843
            /* Process only if the edge is not a frame edge */
844
15.2M
            if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
845
15.1M
            {
846
15.1M
                do
847
15.5M
                {
848
15.5M
                    WORD32 pu_ngbr_wd;
849
15.5M
                    WORD32 min_pu_wd;
850
15.5M
                    WORD32 ngbr_end_pos_x;
851
15.5M
                    UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
852
15.5M
                    u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
853
15.5M
                    ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
854
855
15.5M
                    pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
856
15.5M
                    ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
857
858
15.5M
                    min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
859
860
15.5M
                    if(ps_ngbr_pu->b1_intra_flag)
861
94.5k
                    {
862
94.5k
                        u4_bs = DUP_LSB_10(min_pu_wd);
863
864
                        /* Only if the current edge falls on 8 pixel grid set BS */
865
94.5k
                        if(0 == (start_pos_y & 1))
866
94.5k
                        {
867
94.5k
                            WORD32 shift;
868
94.5k
                            shift = start_pos_x * 2;
869
                            /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
870
                             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
871
                             *  and deblocking is done on 8x8 grid
872
                             */
873
94.5k
                            if(6 != log2_ctb_size)
874
2.67k
                                shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
875
94.5k
                            pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
876
94.5k
                        }
877
94.5k
                    }
878
15.4M
                    else
879
15.4M
                    {
880
15.4M
                        u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
881
15.4M
                        if(u4_bs)
882
1.91M
                        {
883
1.91M
                            u4_bs = DUP_LSB_01(min_pu_wd);
884
885
                            /* Only if the current edge falls on 8 pixel grid set BS */
886
1.91M
                            if(0 == (start_pos_y & 1))
887
1.80M
                            {
888
1.80M
                                WORD32 shift;
889
1.80M
                                shift = start_pos_x * 2;
890
                                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
891
                                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
892
                                 *  and deblocking is done on 8x8 grid
893
                                 */
894
1.80M
                                if(6 != log2_ctb_size)
895
50.4k
                                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
896
1.80M
                                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
897
1.80M
                            }
898
1.91M
                        }
899
15.4M
                    }
900
901
15.5M
                    pu_wd -= min_pu_wd;
902
15.5M
                    start_pos_x += min_pu_wd;
903
15.5M
                }while(pu_wd > 0);
904
905
                /* Reinitialising since the values are updated in the previous loop */
906
15.1M
                pu_wd = ps_pu->b4_wd + 1;
907
15.1M
                start_pos_x = ps_pu->b4_pos_x;
908
15.1M
            }
909
15.2M
        }
910
15.4M
    }
911
912
681k
    {
913
        /* If left neighbor is not available, then set BS for entire first column to zero */
914
681k
        UWORD32 ctb_addr;
915
681k
        WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
916
917
681k
        if(!ps_pps->i1_tiles_enabled_flag)
918
561k
        {
919
561k
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
920
520k
                            (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
921
520k
                            (0 == ps_bs_ctxt->i4_ctb_x))
922
41.8k
            {
923
41.8k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
924
41.8k
            }
925
561k
        }
926
119k
        else
927
119k
        {
928
119k
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
929
4.82k
            {
930
4.82k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
931
4.82k
            }
932
114k
            else
933
114k
            {
934
935
114k
                ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
936
114k
                slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
937
938
114k
                if(ps_bs_ctxt->i4_ctb_x)
939
111k
                {
940
111k
                    ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
941
111k
                    left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
942
111k
                }
943
944
114k
                if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
945
63.4k
                {
946
                    /* Removed reduntant checks */
947
63.4k
                    if((0 == i1_loop_filter_across_slices_enabled_flag && (
948
48.5k
                                    (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
949
59.1k
                                    ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
950
4.77k
                    {
951
4.77k
                        pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
952
4.77k
                    }
953
63.4k
                }
954
114k
            }
955
119k
        }
956
957
681k
        ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
958
681k
        slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
959
681k
        if(ps_bs_ctxt->i4_ctb_y)
960
604k
        {
961
604k
            ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
962
604k
            top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
963
604k
        }
964
        /* If top neighbor is not available, then set BS for entire first row to zero */
965
        /* Removed reduntant checks */
966
681k
        if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
967
599k
                        || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
968
587k
                        || (0 == ps_bs_ctxt->i4_ctb_y))
969
94.8k
        {
970
94.8k
            pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
971
94.8k
        }
972
681k
    }
973
974
    /**
975
     *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
976
     *   (They might have set to  non zero values because of CBF of the current CTB)*/
977
681k
    {
978
681k
        WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
979
681k
        WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
980
681k
        if(num_rows_remaining < (ctb_size >> 3))
981
62.2k
        {
982
            /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
983
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
984
             *  and deblocking is done on 8x8 grid
985
             */
986
62.2k
            WORD32 offset;
987
62.2k
            offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
988
62.2k
            if(6 != log2_ctb_size)
989
4.53k
                offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
990
991
62.2k
            memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
992
62.2k
        }
993
994
681k
        if(num_cols_remaining < (ctb_size >> 3))
995
28.3k
        {
996
            /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
997
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
998
             *  and deblocking is done on 8x8 grid
999
             */
1000
1001
28.3k
            WORD32 offset;
1002
28.3k
            offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
1003
28.3k
            if(6 != log2_ctb_size)
1004
18.2k
                offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
1005
1006
28.3k
            memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
1007
28.3k
        }
1008
681k
    }
1009
681k
    return 0;
1010
681k
}