Coverage Report

Created: 2025-11-11 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_boundary_strength.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_boundary_strength.c
22
 *
23
 * @brief
24
 *  Contains functions for computing boundary strength
25
 *
26
 * @author
27
 *  Harish
28
 *
29
 * @par List of Functions:
30
 *
31
 * @remarks
32
 *  None
33
 *
34
 *******************************************************************************
35
 */
36
/*****************************************************************************/
37
/* File Includes                                                             */
38
/*****************************************************************************/
39
#include <stdio.h>
40
#include <stddef.h>
41
#include <stdlib.h>
42
#include <string.h>
43
44
#include "ihevc_typedefs.h"
45
#include "iv.h"
46
#include "ivd.h"
47
#include "ihevcd_cxa.h"
48
#include "ithread.h"
49
50
#include "ihevc_defs.h"
51
#include "ihevc_debug.h"
52
#include "ihevc_defs.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
#include "ihevc_cabac_tables.h"
57
58
#include "ihevc_error.h"
59
#include "ihevc_common_tables.h"
60
61
#include "ihevcd_trace.h"
62
#include "ihevcd_defs.h"
63
#include "ihevcd_function_selector.h"
64
#include "ihevcd_structs.h"
65
#include "ihevcd_error.h"
66
#include "ihevcd_nal.h"
67
#include "ihevcd_bitstream.h"
68
#include "ihevcd_job_queue.h"
69
#include "ihevcd_utils.h"
70
#include "ihevcd_profile.h"
71
72
/*****************************************************************************/
73
/* Function Prototypes                                                       */
74
/*****************************************************************************/
75
76
77
#define SET_NGBHR_ALL_AVAIL(avail)          avail = 0x1F;
78
79
#define SET_NGBHR_BOTLEFT_NOTAVAIL(avail)   avail &= ~0x10;
80
#define SET_NGBHR_LEFT_NOTAVAIL(avail)      avail &= ~0x8;
81
#define SET_NGBHR_TOPLEFT_NOTAVAIL(avail)   avail &= ~0x4;
82
#define SET_NGBHR_TOP_NOTAVAIL(avail)       avail &= ~0x2;
83
#define SET_NGBHR_TOPRIGHT_NOTAVAIL(avail)  avail &= ~0x1;
84
85
WORD32 ihevcd_pu_boundary_strength(pu_t *ps_pu,
86
                                   pu_t *ps_ngbr_pu)
87
25.6M
{
88
25.6M
    WORD32 i4_bs;
89
25.6M
    UWORD32 l0_ref_pic_buf_id, l1_ref_pic_buf_id;
90
25.6M
    UWORD32 ngbr_l0_ref_pic_buf_id, ngbr_l1_ref_pic_buf_id;
91
92
25.6M
    WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
93
25.6M
    WORD16 i2_ngbr_mv_x0, i2_ngbr_mv_y0, i2_ngbr_mv_x1, i2_ngbr_mv_y1;
94
95
25.6M
    WORD32 num_mv, ngbr_num_mv;
96
97
25.6M
    num_mv = (PRED_BI == ps_pu->b2_pred_mode) ? 2 : 1;
98
25.6M
    ngbr_num_mv = (PRED_BI == ps_ngbr_pu->b2_pred_mode) ? 2 : 1;
99
100
25.6M
    l0_ref_pic_buf_id = ps_pu->mv.i1_l0_ref_pic_buf_id;
101
25.6M
    l1_ref_pic_buf_id = ps_pu->mv.i1_l1_ref_pic_buf_id;
102
25.6M
    ngbr_l0_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l0_ref_pic_buf_id;
103
25.6M
    ngbr_l1_ref_pic_buf_id = ps_ngbr_pu->mv.i1_l1_ref_pic_buf_id;
104
105
106
25.6M
    i2_mv_x0 = ps_pu->mv.s_l0_mv.i2_mvx;
107
25.6M
    i2_mv_y0 = ps_pu->mv.s_l0_mv.i2_mvy;
108
25.6M
    i2_mv_x1 = ps_pu->mv.s_l1_mv.i2_mvx;
109
25.6M
    i2_mv_y1 = ps_pu->mv.s_l1_mv.i2_mvy;
110
111
25.6M
    i2_ngbr_mv_x0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvx;
112
25.6M
    i2_ngbr_mv_y0 = ps_ngbr_pu->mv.s_l0_mv.i2_mvy;
113
25.6M
    i2_ngbr_mv_x1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvx;
114
25.6M
    i2_ngbr_mv_y1 = ps_ngbr_pu->mv.s_l1_mv.i2_mvy;
115
116
117
    /* If two motion vectors are used */
118
25.6M
    if((2 == num_mv) &&
119
9.57M
            (2 == ngbr_num_mv))
120
8.98M
    {
121
8.98M
        if((l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id) ||
122
70.1k
                (l0_ref_pic_buf_id == ngbr_l1_ref_pic_buf_id && l1_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id))
123
8.92M
        {
124
8.92M
            if(l0_ref_pic_buf_id != l1_ref_pic_buf_id) /* Different L0 and L1 */
125
950k
            {
126
950k
                if(l0_ref_pic_buf_id == ngbr_l0_ref_pic_buf_id)
127
944k
                {
128
944k
                    i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x0) < 4) &&
129
890k
                            (ABS(i2_mv_y0 - i2_ngbr_mv_y0) < 4) &&
130
871k
                            (ABS(i2_mv_x1 - i2_ngbr_mv_x1) < 4) &&
131
861k
                            (ABS(i2_mv_y1 - i2_ngbr_mv_y1) < 4) ? 0 : 1;
132
944k
                }
133
6.43k
                else
134
6.43k
                {
135
6.43k
                    i4_bs = (ABS(i2_mv_x0 - i2_ngbr_mv_x1) < 4) &&
136
3.91k
                            (ABS(i2_mv_y0 - i2_ngbr_mv_y1) < 4) &&
137
2.96k
                            (ABS(i2_mv_x1 - i2_ngbr_mv_x0) < 4) &&
138
4.33k
                            (ABS(i2_mv_y1 - i2_ngbr_mv_y0) < 4) ? 0 : 1;
139
6.43k
                }
140
950k
            }
141
7.97M
            else /* Same L0 and L1 */
142
7.97M
            {
143
7.97M
                i4_bs = ((ABS(i2_mv_x0 - i2_ngbr_mv_x0) >= 4) ||
144
7.91M
                         (ABS(i2_mv_y0 - i2_ngbr_mv_y0) >= 4) ||
145
7.89M
                         (ABS(i2_mv_x1 - i2_ngbr_mv_x1) >= 4) ||
146
7.88M
                         (ABS(i2_mv_y1 - i2_ngbr_mv_y1) >= 4)) &&
147
90.9k
                                ((ABS(i2_mv_x0 - i2_ngbr_mv_x1) >= 4) ||
148
46.9k
                                 (ABS(i2_mv_y0 - i2_ngbr_mv_y1) >= 4) ||
149
25.9k
                                 (ABS(i2_mv_x1 - i2_ngbr_mv_x0) >= 4) ||
150
7.88M
                                 (ABS(i2_mv_y1 - i2_ngbr_mv_y0) >= 4)) ? 1 : 0;
151
7.97M
            }
152
8.92M
        }
153
60.7k
        else /* If the reference pictures used are different */
154
60.7k
        {
155
60.7k
            i4_bs = 1;
156
60.7k
        }
157
8.98M
    }
158
159
    /* If one motion vector is used in both PUs */
160
16.6M
    else if((1 == num_mv) &&
161
16.1M
            (1 == ngbr_num_mv))
162
15.4M
    {
163
15.4M
        WORD16 i2_mv_x, i2_mv_y;
164
15.4M
        WORD16 i2_ngbr_mv_x, i2_ngbr_mv_y;
165
15.4M
        UWORD32 ref_pic_buf_id, ngbr_ref_pic_buf_id;
166
167
15.4M
        if(PRED_L0 == ps_pu->b2_pred_mode)
168
14.8M
        {
169
14.8M
            i2_mv_x = i2_mv_x0;
170
14.8M
            i2_mv_y = i2_mv_y0;
171
14.8M
            ref_pic_buf_id = l0_ref_pic_buf_id;
172
14.8M
        }
173
605k
        else
174
605k
        {
175
605k
            i2_mv_x = i2_mv_x1;
176
605k
            i2_mv_y = i2_mv_y1;
177
605k
            ref_pic_buf_id = l1_ref_pic_buf_id;
178
605k
        }
179
180
15.4M
        if(PRED_L0 == ps_ngbr_pu->b2_pred_mode)
181
14.8M
        {
182
14.8M
            i2_ngbr_mv_x = i2_ngbr_mv_x0;
183
14.8M
            i2_ngbr_mv_y = i2_ngbr_mv_y0;
184
14.8M
            ngbr_ref_pic_buf_id = ngbr_l0_ref_pic_buf_id;
185
14.8M
        }
186
608k
        else
187
608k
        {
188
608k
            i2_ngbr_mv_x = i2_ngbr_mv_x1;
189
608k
            i2_ngbr_mv_y = i2_ngbr_mv_y1;
190
608k
            ngbr_ref_pic_buf_id = ngbr_l1_ref_pic_buf_id;
191
608k
        }
192
193
15.4M
        i4_bs = (ref_pic_buf_id == ngbr_ref_pic_buf_id) &&
194
15.1M
                (ABS(i2_mv_x - i2_ngbr_mv_x) < 4)  &&
195
14.6M
                (ABS(i2_mv_y - i2_ngbr_mv_y) < 4) ? 0 : 1;
196
15.4M
    }
197
198
    /* If the no. of motion vectors is not the same */
199
1.22M
    else
200
1.22M
    {
201
1.22M
        i4_bs = 1;
202
1.22M
    }
203
204
205
25.6M
    return i4_bs;
206
25.6M
}
207
208
/* QP is also populated in the same function */
209
WORD32 ihevcd_ctb_boundary_strength_islice(bs_ctxt_t *ps_bs_ctxt)
210
2.46M
{
211
2.46M
    pps_t *ps_pps;
212
2.46M
    sps_t *ps_sps;
213
2.46M
    tu_t *ps_tu;
214
2.46M
    UWORD32 *pu4_vert_bs;
215
2.46M
    UWORD32 *pu4_horz_bs;
216
2.46M
    WORD32 bs_strd;
217
2.46M
    WORD32 vert_bs0_tmp;
218
2.46M
    WORD32 horz_bs0_tmp;
219
2.46M
    UWORD8 *pu1_qp;
220
2.46M
    WORD32 qp_strd;
221
2.46M
    UWORD32 u4_qp_const_in_ctb;
222
2.46M
    WORD32 ctb_indx;
223
2.46M
    WORD32 i4_tu_cnt;
224
2.46M
    WORD32 log2_ctb_size;
225
2.46M
    WORD32 ctb_size;
226
227
2.46M
    WORD8 i1_loop_filter_across_tiles_enabled_flag;
228
2.46M
    WORD8 i1_loop_filter_across_slices_enabled_flag;
229
230
2.46M
    WORD32 i;
231
232
2.46M
    PROFILE_DISABLE_BOUNDARY_STRENGTH();
233
234
2.46M
    ps_pps = ps_bs_ctxt->ps_pps;
235
2.46M
    ps_sps = ps_bs_ctxt->ps_sps;
236
2.46M
    i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
237
2.46M
    i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
238
2.46M
    i4_tu_cnt = ps_bs_ctxt->i4_ctb_tu_cnt;
239
240
2.46M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
241
2.46M
    ctb_size = (1 << log2_ctb_size);
242
243
    /* strides are in units of number of bytes */
244
    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
245
2.46M
    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
246
247
2.46M
    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
248
2.46M
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
249
2.46M
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
250
2.46M
    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
251
2.46M
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
252
2.46M
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
253
254
    /* ctb_size/8 elements per CTB */
255
2.46M
    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
256
2.46M
    pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
257
258
2.46M
    ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
259
2.46M
    u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
260
261
2.46M
    vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
262
2.46M
    horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
263
264
    /* ctb_size/8 is the number of edges per CTB
265
     * ctb_size/4 is the number of BS values needed per edge
266
     * divided by 8 for the number of bytes
267
     * 2 is the number of bits needed for each BS value */
268
/*
269
    memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) / 8 * 2 );
270
    memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) / 8 * 2 );
271
*/
272
2.46M
    memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + ((ctb_size >> 5) << 1));
273
2.46M
    memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
274
275
    /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
276
2.46M
    if(0 != ps_bs_ctxt->i4_ctb_x)
277
2.40M
    {
278
2.40M
        pu4_vert_bs[0] |= vert_bs0_tmp;
279
2.40M
    }
280
281
    /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
282
2.46M
    if(0 != ps_bs_ctxt->i4_ctb_y)
283
2.25M
    {
284
2.25M
        pu4_horz_bs[0] |= horz_bs0_tmp;
285
2.25M
    }
286
287
2.46M
    ps_tu = ps_bs_ctxt->ps_tu;
288
289
    /* Populating the QP array - if const_qp_in_ctb flag is one, set only the first element */
290
2.46M
    if(u4_qp_const_in_ctb)
291
2.45M
        pu1_qp[0] = ps_tu->b7_qp;
292
293
112M
    for(i = 0; i < i4_tu_cnt; i++)
294
109M
    {
295
109M
        WORD32 start_pos_x;
296
109M
        WORD32 start_pos_y;
297
109M
        WORD32 tu_size;
298
299
300
109M
        UWORD32 u4_bs;
301
109M
        ps_tu = ps_bs_ctxt->ps_tu + i;
302
303
        /* start_pos_x and start_pos_y are in units of min TU size (4x4) */
304
109M
        start_pos_x = ps_tu->b4_pos_x;
305
109M
        start_pos_y = ps_tu->b4_pos_y;
306
307
109M
        tu_size = 1 << (ps_tu->b3_size + 2);
308
109M
        tu_size >>= 2; /* TU size divided by 4 */
309
310
109M
        u4_bs = DUP_LSB_10(tu_size);
311
312
        /* Only if the current edge falls on 8 pixel grid set BS */
313
109M
        if(0 == (start_pos_x & 1))
314
109M
        {
315
109M
            WORD32 shift;
316
109M
            shift = start_pos_y * 2;
317
            /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
318
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
319
             *  and deblocking is done on 8x8 grid
320
             */
321
109M
            if(6 != log2_ctb_size)
322
454k
                shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
323
109M
            pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
324
109M
        }
325
        /* Only if the current edge falls on 8 pixel grid set BS */
326
109M
        if(0 == (start_pos_y & 1))
327
109M
        {
328
109M
            WORD32 shift;
329
109M
            shift = start_pos_x * 2;
330
            /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
331
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
332
             *  and deblocking is done on 8x8 grid
333
             */
334
109M
            if(6 != log2_ctb_size)
335
454k
                shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
336
109M
            pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
337
109M
        }
338
339
        /* Populating the QP array */
340
109M
        if(0 == u4_qp_const_in_ctb)
341
315k
        {
342
315k
            if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
343
170k
            {
344
170k
                WORD32 row, col;
345
414k
                for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
346
244k
                {
347
681k
                    for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
348
437k
                    {
349
437k
                        pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
350
437k
                    }
351
244k
                }
352
170k
            }
353
315k
        }
354
355
109M
    }
356
2.46M
    {
357
        /*Determine if the slice is dependent, and is its left neighbor belongs to the same slice, in a different tile*/
358
2.46M
        UWORD32 ctb_addr;
359
2.46M
        WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
360
        /* If left neighbor is not available, then set BS for entire first column to zero */
361
2.46M
        if(!ps_pps->i1_tiles_enabled_flag)
362
2.42M
        {
363
2.42M
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
364
2.37M
                            (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
365
2.37M
                            (0 == ps_bs_ctxt->i4_ctb_x))
366
52.9k
            {
367
52.9k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
368
52.9k
            }
369
2.42M
        }
370
38.5k
        else
371
38.5k
        {
372
            //If across-tiles is disabled
373
38.5k
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
374
8.10k
            {
375
8.10k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
376
8.10k
            }
377
30.4k
            else
378
30.4k
            {
379
30.4k
                ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
380
30.4k
                slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
381
30.4k
                if(ps_bs_ctxt->i4_ctb_x)
382
29.9k
                {
383
29.9k
                    ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
384
29.9k
                    left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
385
29.9k
                }
386
                /*If the 1st slice in a new tile is a dependent slice*/
387
30.4k
                if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
388
30.0k
                {
389
                    /* Removed reduntant checks */
390
30.0k
                    if((0 == i1_loop_filter_across_slices_enabled_flag && (
391
24.8k
                                    ((slice_idx != left_slice_idx) && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
392
24.7k
                                    ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) ||
393
29.8k
                                    (0 == ps_bs_ctxt->i4_ctb_x))
394
257
                    {
395
257
                        pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
396
257
                    }
397
30.0k
                }
398
30.4k
            }
399
38.5k
        }
400
401
2.46M
        ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
402
2.46M
        slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
403
2.46M
        if(ps_bs_ctxt->i4_ctb_y)
404
2.25M
        {
405
2.25M
            ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
406
2.25M
            top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
407
2.25M
        }
408
409
        /* If top neighbor is not available, then set BS for entire first row to zero */
410
        /* Removed reduntant checks */
411
2.46M
        if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
412
2.24M
                        || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
413
2.24M
                        || (0 == ps_bs_ctxt->i4_ctb_y))
414
215k
        {
415
215k
            pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
416
215k
        }
417
2.46M
    }
418
419
    /**
420
     *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
421
     *   (They might have been set to  non zero values because of CBF of the current CTB)
422
     *   This block might not be needed for I slices*/
423
2.46M
    {
424
2.46M
        WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
425
2.46M
        WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
426
2.46M
        if(num_rows_remaining < (ctb_size >> 3))
427
263k
        {
428
            /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
429
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
430
             *  and deblocking is done on 8x8 grid
431
             */
432
263k
            WORD32 offset;
433
263k
            offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
434
263k
            if(6 != log2_ctb_size)
435
1.69k
                offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
436
437
263k
            memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
438
263k
        }
439
440
2.46M
        if(num_cols_remaining < (ctb_size >> 3))
441
56.8k
        {
442
            /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
443
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
444
             *  and deblocking is done on 8x8 grid
445
             */
446
447
56.8k
            WORD32 offset;
448
56.8k
            offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
449
56.8k
            if(6 != log2_ctb_size)
450
8.88k
                offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
451
452
56.8k
            memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
453
56.8k
        }
454
2.46M
    }
455
456
2.46M
    return 0;
457
2.46M
}
458
WORD32 ihevcd_ctb_boundary_strength_pbslice(bs_ctxt_t *ps_bs_ctxt)
459
802k
{
460
802k
    sps_t *ps_sps;
461
802k
    pps_t *ps_pps;
462
802k
    WORD32 cur_ctb_idx, next_ctb_idx = 0;
463
802k
    WORD32 i4_tu_cnt;
464
802k
    WORD32 i4_pu_cnt;
465
802k
    tu_t *ps_tu;
466
467
802k
    UWORD32 *pu4_vert_bs;
468
802k
    UWORD32 *pu4_horz_bs;
469
802k
    WORD32 bs_strd;
470
802k
    WORD32 vert_bs0_tmp;
471
802k
    WORD32 horz_bs0_tmp;
472
802k
    UWORD8 *pu1_qp;
473
802k
    WORD32 qp_strd;
474
802k
    UWORD32 u4_qp_const_in_ctb;
475
802k
    WORD32 ctb_indx;
476
802k
    WORD32 log2_ctb_size;
477
802k
    WORD32 ctb_size;
478
479
802k
    WORD32 i;
480
802k
    WORD8 i1_loop_filter_across_tiles_enabled_flag;
481
802k
    WORD8 i1_loop_filter_across_slices_enabled_flag;
482
483
802k
    PROFILE_DISABLE_BOUNDARY_STRENGTH();
484
485
802k
    ps_sps = ps_bs_ctxt->ps_sps;
486
802k
    ps_pps = ps_bs_ctxt->ps_pps;
487
488
802k
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
489
802k
    ctb_size = (1 << log2_ctb_size);
490
491
    /* strides are in units of number of bytes */
492
    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
493
802k
    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
494
495
802k
    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_vert_bs +
496
802k
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
497
802k
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
498
802k
    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_bs_ctxt->pu4_pic_horz_bs +
499
802k
                    (ps_bs_ctxt->i4_ctb_x << (2 * log2_ctb_size - 7)) +
500
802k
                    ps_bs_ctxt->i4_ctb_y * bs_strd);
501
502
802k
    vert_bs0_tmp = pu4_vert_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
503
802k
    horz_bs0_tmp = pu4_horz_bs[0] & (0xFFFFFFFF >> (sizeof(UWORD32) * 8 - ctb_size / 2));
504
505
802k
    ps_tu = ps_bs_ctxt->ps_tu;
506
507
    /* ctb_size/8 elements per CTB */
508
802k
    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
509
802k
    pu1_qp = ps_bs_ctxt->pu1_pic_qp + ((ps_bs_ctxt->i4_ctb_x + ps_bs_ctxt->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
510
511
802k
    ctb_indx = ps_bs_ctxt->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_bs_ctxt->i4_ctb_y;
512
802k
    u4_qp_const_in_ctb = ps_bs_ctxt->pu1_pic_qp_const_in_ctb[ctb_indx >> 3] & (1 << (ctb_indx & 7));
513
514
802k
    i1_loop_filter_across_tiles_enabled_flag = ps_pps->i1_loop_filter_across_tiles_enabled_flag;
515
802k
    i1_loop_filter_across_slices_enabled_flag = ps_bs_ctxt->ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag;
516
517
    /* ctb_size/8 is the number of edges per CTB
518
     * ctb_size/4 is the number of BS values needed per edge
519
     * divided by 8 for the number of bytes
520
     * 2 is the number of bits needed for each BS value */
521
/*
522
    memset(pu4_vert_bs, 0, (ctb_size / 8 + 1) * (ctb_size / 4) * 2 / 8 );
523
    memset(pu4_horz_bs, 0, (ctb_size / 8) * (ctb_size / 4) * 2 / 8 );
524
*/
525
802k
    memset(pu4_vert_bs, 0, (1 << (2 * log2_ctb_size - 7)) + (ctb_size >> 4));
526
802k
    memset(pu4_horz_bs, 0, (1 << (2 * log2_ctb_size - 7)));
527
528
    /* pu4_vert_bs[0] has information about the left CTB which is not required when ctb_x = 0 */
529
802k
    if(0 != ps_bs_ctxt->i4_ctb_x)
530
757k
    {
531
757k
        pu4_vert_bs[0] |= vert_bs0_tmp;
532
757k
    }
533
534
    /* pu4_horz_bs[0] has information about the top CTB which is not required when ctb_y = 0 */
535
802k
    if(0 != ps_bs_ctxt->i4_ctb_y)
536
713k
    {
537
713k
        pu4_horz_bs[0] |= horz_bs0_tmp;
538
713k
    }
539
    /* pu4_horz_bs[bs_strd / 4] corresponds to pu4_horz_bs[0] of the bottom CTB */
540
802k
    *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) = 0;
541
542
802k
    cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
543
802k
                    + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
544
802k
    next_ctb_idx = ps_bs_ctxt->i4_next_tu_ctb_cnt;
545
802k
    if(1 == ps_bs_ctxt->ps_codec->i4_num_cores)
546
208k
    {
547
208k
        i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx % RESET_TU_BUF_NCTB];
548
208k
    }
549
593k
    else
550
593k
    {
551
593k
        i4_tu_cnt = ps_bs_ctxt->pu4_pic_tu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_tu_idx[cur_ctb_idx];
552
593k
    }
553
554
802k
    ps_tu = ps_bs_ctxt->ps_tu;
555
802k
    if(u4_qp_const_in_ctb)
556
787k
        pu1_qp[0] = ps_tu->b7_qp;
557
558
    /* For all TUs in the CTB For left and top edges, check if there are coded coefficients on either sides of the edge */
559
14.1M
    for(i = 0; i < i4_tu_cnt; i++)
560
13.3M
    {
561
13.3M
        WORD32 start_pos_x;
562
13.3M
        WORD32 start_pos_y;
563
13.3M
        WORD32 end_pos_x;
564
13.3M
        WORD32 end_pos_y;
565
13.3M
        WORD32 tu_size;
566
13.3M
        UWORD32 u4_bs;
567
13.3M
        WORD32 intra_flag;
568
13.3M
        UWORD8 *pu1_pic_intra_flag;
569
570
13.3M
        ps_tu = ps_bs_ctxt->ps_tu + i;
571
572
13.3M
        start_pos_x = ps_tu->b4_pos_x;
573
13.3M
        start_pos_y = ps_tu->b4_pos_y;
574
575
13.3M
        tu_size = 1 << (ps_tu->b3_size + 2);
576
13.3M
        tu_size >>= 2;
577
578
13.3M
        end_pos_x = start_pos_x + tu_size;
579
13.3M
        end_pos_y = start_pos_y + tu_size;
580
581
13.3M
        {
582
13.3M
            WORD32 tu_abs_x = (ps_bs_ctxt->i4_ctb_x << log2_ctb_size) + (start_pos_x << 2);
583
13.3M
            WORD32 tu_abs_y = (ps_bs_ctxt->i4_ctb_y << log2_ctb_size) + (start_pos_y << 2);
584
585
13.3M
            WORD32 numbytes_row =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
586
587
13.3M
            pu1_pic_intra_flag = ps_bs_ctxt->ps_codec->pu1_pic_intra_flag;
588
13.3M
            pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
589
13.3M
            pu1_pic_intra_flag += (tu_abs_x >> 6);
590
591
13.3M
            intra_flag = *pu1_pic_intra_flag;
592
13.3M
            intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
593
13.3M
        }
594
13.3M
        if(intra_flag)
595
335k
        {
596
335k
            u4_bs = DUP_LSB_10(tu_size);
597
598
            /* Only if the current edge falls on 8 pixel grid set BS */
599
335k
            if(0 == (start_pos_x & 1))
600
221k
            {
601
221k
                WORD32 shift;
602
221k
                shift = start_pos_y * 2;
603
                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
604
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
605
                 *  and deblocking is done on 8x8 grid
606
                 */
607
221k
                if(6 != log2_ctb_size)
608
5.85k
                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
609
221k
                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
610
221k
            }
611
            /* Only if the current edge falls on 8 pixel grid set BS */
612
335k
            if(0 == (start_pos_y & 1))
613
221k
            {
614
221k
                WORD32 shift;
615
221k
                shift = start_pos_x * 2;
616
                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
617
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
618
                 *  and deblocking is done on 8x8 grid
619
                 */
620
221k
                if(6 != log2_ctb_size)
621
5.85k
                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
622
221k
                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
623
221k
            }
624
335k
        }
625
626
627
        /* If the current TU is coded then set both top edge and left edge BS to 1 and go to next TU */
628
13.3M
        if(ps_tu->b1_y_cbf)
629
542k
        {
630
542k
            u4_bs = DUP_LSB_01(tu_size);
631
632
            /* Only if the current edge falls on 8 pixel grid set BS */
633
542k
            if(0 == (start_pos_x & 1))
634
403k
            {
635
403k
                WORD32 shift;
636
403k
                shift = start_pos_y * 2;
637
                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
638
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
639
                 *  and deblocking is done on 8x8 grid
640
                 */
641
403k
                if(6 != log2_ctb_size)
642
4.11k
                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
643
403k
                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
644
403k
            }
645
            /* Only if the current edge falls on 8 pixel grid set BS */
646
542k
            if(0 == (start_pos_y & 1))
647
402k
            {
648
402k
                WORD32 shift;
649
402k
                shift = start_pos_x * 2;
650
                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
651
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
652
                 *  and deblocking is done on 8x8 grid
653
                 */
654
402k
                if(6 != log2_ctb_size)
655
3.93k
                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
656
402k
                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
657
402k
            }
658
            /* Only if the current edge falls on 8 pixel grid set BS */
659
542k
            if(0 == (end_pos_x & 1))
660
402k
            {
661
402k
                if(!(ctb_size / 8 == (end_pos_x >> 1) && ps_bs_ctxt->i4_ctb_x == ps_sps->i2_pic_wd_in_ctb - 1))
662
400k
                {
663
400k
                    WORD32 shift;
664
400k
                    shift = start_pos_y * 2;
665
400k
                    shift += (((end_pos_x >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
666
400k
                    pu4_vert_bs[end_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
667
400k
                }
668
402k
            }
669
            /* Only if the current edge falls on 8 pixel grid set BS */
670
542k
            if(0 == (end_pos_y & 1))
671
403k
            {
672
                /* If end_pos_y corresponds to the bottom of the CTB, write to pu4_horz_bs[0] of the bottom CTB */
673
403k
                if(ctb_size / 8 == (end_pos_y >> 1))
674
71.8k
                {
675
71.8k
                    *(UWORD32 *)((UWORD8 *)pu4_horz_bs + bs_strd) |= (u4_bs << (start_pos_x * 2));
676
71.8k
                }
677
331k
                else
678
331k
                {
679
331k
                    WORD32 shift;
680
331k
                    shift = start_pos_x * 2;
681
331k
                    shift += (((end_pos_y >> 1) & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1));
682
331k
                    pu4_horz_bs[end_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
683
331k
                }
684
403k
            }
685
542k
        }
686
687
13.3M
        if(0 == u4_qp_const_in_ctb)
688
320k
        {
689
320k
            if(0 == (start_pos_x & 1) && 0 == (start_pos_y & 1))
690
248k
            {
691
248k
                WORD32 row, col;
692
628k
                for(row = start_pos_y; row < start_pos_y + tu_size; row += 2)
693
380k
                {
694
1.13M
                    for(col = start_pos_x; col < start_pos_x + tu_size; col += 2)
695
756k
                    {
696
756k
                        pu1_qp[(row >> 1) * qp_strd + (col >> 1)] = ps_tu->b7_qp;
697
756k
                    }
698
380k
                }
699
248k
            }
700
320k
        }
701
13.3M
    }
702
703
    /* For all PUs in the CTB,
704
    For left and top edges, compute BS */
705
706
802k
    cur_ctb_idx = ps_bs_ctxt->i4_ctb_x
707
802k
                    + ps_bs_ctxt->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
708
709
802k
    {
710
802k
        WORD32 next_ctb_idx;
711
802k
        next_ctb_idx = ps_bs_ctxt->i4_next_pu_ctb_cnt;
712
802k
        i4_pu_cnt = ps_bs_ctxt->pu4_pic_pu_idx[next_ctb_idx] - ps_bs_ctxt->pu4_pic_pu_idx[cur_ctb_idx];
713
802k
    }
714
715
13.7M
    for(i = 0; i < i4_pu_cnt; i++)
716
12.9M
    {
717
12.9M
        WORD32 start_pos_x;
718
12.9M
        WORD32 start_pos_y;
719
12.9M
        WORD32 end_pos_x;
720
12.9M
        WORD32 end_pos_y;
721
12.9M
        WORD32 pu_wd, pu_ht;
722
12.9M
        UWORD32 u4_bs;
723
12.9M
        pu_t *ps_pu = ps_bs_ctxt->ps_pu + i;
724
12.9M
        pu_t *ps_ngbr_pu;
725
12.9M
        UWORD32 u4_ngbr_pu_indx;
726
727
12.9M
        start_pos_x = ps_pu->b4_pos_x;
728
12.9M
        start_pos_y = ps_pu->b4_pos_y;
729
730
12.9M
        pu_wd = (ps_pu->b4_wd + 1);
731
12.9M
        pu_ht = (ps_pu->b4_ht + 1);
732
733
12.9M
        end_pos_x = start_pos_x + pu_wd;
734
12.9M
        end_pos_y = start_pos_y + pu_ht;
735
736
        /* If the current PU is intra, set Boundary strength as 2 for both top and left edge */
737
        /* Need not mask the BS to zero even if it was set to 1 already since BS 2 and 3 are assumed to be the same in leaf level functions */
738
12.9M
        if(ps_pu->b1_intra_flag)
739
125k
        {
740
125k
            u4_bs = DUP_LSB_10(pu_ht);
741
742
            /* Only if the current edge falls on 8 pixel grid set BS */
743
125k
            if(0 == (start_pos_x & 1))
744
125k
            {
745
125k
                WORD32 shift;
746
125k
                shift = start_pos_y * 2;
747
                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
748
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
749
                 *  and deblocking is done on 8x8 grid
750
                 */
751
125k
                if(6 != log2_ctb_size)
752
1.66k
                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
753
125k
                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
754
125k
            }
755
756
125k
            u4_bs = DUP_LSB_10(pu_wd);
757
758
            /* Only if the current edge falls on 8 pixel grid set BS */
759
125k
            if(0 == (start_pos_y & 1))
760
125k
            {
761
125k
                WORD32 shift;
762
125k
                shift = start_pos_x * 2;
763
                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
764
                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
765
                 *  and deblocking is done on 8x8 grid
766
                 */
767
125k
                if(6 != log2_ctb_size)
768
1.67k
                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
769
125k
                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
770
125k
            }
771
125k
        }
772
773
12.8M
        else
774
12.8M
        {
775
            /* Vertical edge */
776
            /* Process only if the edge is not a frame edge */
777
12.8M
            if(0 != ps_bs_ctxt->i4_ctb_x + start_pos_x)
778
12.7M
            {
779
12.7M
                do
780
13.0M
                {
781
13.0M
                    WORD32 pu_ngbr_ht;
782
13.0M
                    WORD32 min_pu_ht;
783
13.0M
                    WORD32 ngbr_end_pos_y;
784
13.0M
                    UWORD32 ngbr_pu_idx_strd;
785
13.0M
                    ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
786
13.0M
                    u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y + 1) * ngbr_pu_idx_strd + (start_pos_x)];
787
13.0M
                    ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
788
789
13.0M
                    pu_ngbr_ht = ps_ngbr_pu->b4_ht + 1;
790
13.0M
                    ngbr_end_pos_y = ps_ngbr_pu->b4_pos_y + pu_ngbr_ht;
791
792
13.0M
                    min_pu_ht = MIN(ngbr_end_pos_y, end_pos_y) - start_pos_y;
793
794
13.0M
                    if(ps_ngbr_pu->b1_intra_flag)
795
67.0k
                    {
796
67.0k
                        u4_bs = DUP_LSB_10(min_pu_ht);
797
798
                        /* Only if the current edge falls on 8 pixel grid set BS */
799
67.0k
                        if(0 == (start_pos_x & 1))
800
67.0k
                        {
801
67.0k
                            WORD32 shift;
802
67.0k
                            shift = start_pos_y * 2;
803
                            /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
804
                             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
805
                             *  and deblocking is done on 8x8 grid
806
                             */
807
67.0k
                            if(6 != log2_ctb_size)
808
1.79k
                                shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
809
67.0k
                            pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
810
67.0k
                        }
811
67.0k
                    }
812
12.9M
                    else
813
12.9M
                    {
814
12.9M
                        u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
815
12.9M
                        if(u4_bs)
816
1.27M
                        {
817
1.27M
                            u4_bs = DUP_LSB_01(min_pu_ht);
818
1.27M
                            if(0 == (start_pos_x & 1))
819
1.17M
                            {
820
1.17M
                                WORD32 shift;
821
1.17M
                                shift = start_pos_y * 2;
822
                                /* shift += (((start_pos_x >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
823
                                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
824
                                 *  and deblocking is done on 8x8 grid
825
                                 */
826
1.17M
                                if(6 != log2_ctb_size)
827
18.9k
                                    shift += ((start_pos_x & 2) << (log2_ctb_size - 2));
828
1.17M
                                pu4_vert_bs[start_pos_x >> (7 - log2_ctb_size)] |= (u4_bs << shift);
829
1.17M
                            }
830
1.27M
                        }
831
12.9M
                    }
832
833
13.0M
                    pu_ht -= min_pu_ht;
834
13.0M
                    start_pos_y += min_pu_ht;
835
13.0M
                }while(pu_ht > 0);
836
837
                /* Reinitialising since the values are updated in the previous loop */
838
12.7M
                pu_ht = ps_pu->b4_ht + 1;
839
12.7M
                start_pos_y = ps_pu->b4_pos_y;
840
12.7M
            }
841
842
            /* Horizontal edge */
843
            /* Process only if the edge is not a frame edge */
844
12.8M
            if(0 != ps_bs_ctxt->i4_ctb_y + start_pos_y)
845
12.6M
            {
846
12.6M
                do
847
12.9M
                {
848
12.9M
                    WORD32 pu_ngbr_wd;
849
12.9M
                    WORD32 min_pu_wd;
850
12.9M
                    WORD32 ngbr_end_pos_x;
851
12.9M
                    UWORD32 ngbr_pu_idx_strd = MAX_CTB_SIZE / MIN_PU_SIZE + 2;
852
12.9M
                    u4_ngbr_pu_indx = ps_bs_ctxt->pu4_pic_pu_idx_map[(start_pos_y)*ngbr_pu_idx_strd + (start_pos_x + 1)];
853
12.9M
                    ps_ngbr_pu = ps_bs_ctxt->ps_pic_pu + u4_ngbr_pu_indx;
854
855
12.9M
                    pu_ngbr_wd = ps_ngbr_pu->b4_wd + 1;
856
12.9M
                    ngbr_end_pos_x = ps_ngbr_pu->b4_pos_x + pu_ngbr_wd;
857
858
12.9M
                    min_pu_wd = MIN(ngbr_end_pos_x, end_pos_x) - start_pos_x;
859
860
12.9M
                    if(ps_ngbr_pu->b1_intra_flag)
861
71.9k
                    {
862
71.9k
                        u4_bs = DUP_LSB_10(min_pu_wd);
863
864
                        /* Only if the current edge falls on 8 pixel grid set BS */
865
71.9k
                        if(0 == (start_pos_y & 1))
866
71.9k
                        {
867
71.9k
                            WORD32 shift;
868
71.9k
                            shift = start_pos_x * 2;
869
                            /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
870
                             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
871
                             *  and deblocking is done on 8x8 grid
872
                             */
873
71.9k
                            if(6 != log2_ctb_size)
874
1.50k
                                shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
875
71.9k
                            pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
876
71.9k
                        }
877
71.9k
                    }
878
12.9M
                    else
879
12.9M
                    {
880
12.9M
                        u4_bs = ihevcd_pu_boundary_strength(ps_pu, ps_ngbr_pu);
881
12.9M
                        if(u4_bs)
882
1.25M
                        {
883
1.25M
                            u4_bs = DUP_LSB_01(min_pu_wd);
884
885
                            /* Only if the current edge falls on 8 pixel grid set BS */
886
1.25M
                            if(0 == (start_pos_y & 1))
887
1.18M
                            {
888
1.18M
                                WORD32 shift;
889
1.18M
                                shift = start_pos_x * 2;
890
                                /* shift += (((start_pos_y >> 1) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 1));
891
                                 *  will reduce to the following assuming ctb size is one of 16, 32 and 64
892
                                 *  and deblocking is done on 8x8 grid
893
                                 */
894
1.18M
                                if(6 != log2_ctb_size)
895
24.0k
                                    shift += ((start_pos_y & 2) << (log2_ctb_size - 2));
896
1.18M
                                pu4_horz_bs[start_pos_y >> (7 - log2_ctb_size)] |= (u4_bs << shift);
897
1.18M
                            }
898
1.25M
                        }
899
12.9M
                    }
900
901
12.9M
                    pu_wd -= min_pu_wd;
902
12.9M
                    start_pos_x += min_pu_wd;
903
12.9M
                }while(pu_wd > 0);
904
905
                /* Reinitialising since the values are updated in the previous loop */
906
12.6M
                pu_wd = ps_pu->b4_wd + 1;
907
12.6M
                start_pos_x = ps_pu->b4_pos_x;
908
12.6M
            }
909
12.8M
        }
910
12.9M
    }
911
912
802k
    {
913
        /* If left neighbor is not available, then set BS for entire first column to zero */
914
802k
        UWORD32 ctb_addr;
915
802k
        WORD32 slice_idx, left_slice_idx = -1, top_slice_idx = -1;
916
917
802k
        if(!ps_pps->i1_tiles_enabled_flag)
918
600k
        {
919
600k
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x) ||
920
566k
                            (0 == i1_loop_filter_across_slices_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
921
566k
                            (0 == ps_bs_ctxt->i4_ctb_x))
922
34.4k
            {
923
34.4k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
924
34.4k
            }
925
600k
        }
926
201k
        else
927
201k
        {
928
201k
            if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_x))
929
6.71k
            {
930
6.71k
                pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
931
6.71k
            }
932
195k
            else
933
195k
            {
934
935
195k
                ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
936
195k
                slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
937
938
195k
                if(ps_bs_ctxt->i4_ctb_x)
939
189k
                {
940
189k
                    ctb_addr = (ps_bs_ctxt->i4_ctb_x - 1) + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
941
189k
                    left_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
942
189k
                }
943
944
195k
                if(!((ps_bs_ctxt->ps_slice_hdr->i1_dependent_slice_flag == 1) && (slice_idx == left_slice_idx)))
945
94.8k
                {
946
                    /* Removed reduntant checks */
947
94.8k
                    if((0 == i1_loop_filter_across_slices_enabled_flag && (
948
76.4k
                                    (0 == ps_bs_ctxt->i4_ctb_slice_x && 0 == ps_bs_ctxt->i4_ctb_slice_y) ||
949
86.7k
                                    ((0 == ps_bs_ctxt->i4_ctb_tile_x) && (slice_idx != left_slice_idx)))) || (0 == ps_bs_ctxt->i4_ctb_x))
950
8.78k
                    {
951
8.78k
                        pu4_vert_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
952
8.78k
                    }
953
94.8k
                }
954
195k
            }
955
201k
        }
956
957
802k
        ctb_addr = ps_bs_ctxt->i4_ctb_x + (ps_bs_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb);
958
802k
        slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
959
802k
        if(ps_bs_ctxt->i4_ctb_y)
960
714k
        {
961
714k
            ctb_addr = (ps_bs_ctxt->i4_ctb_x) + ((ps_bs_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb);
962
714k
            top_slice_idx = ps_bs_ctxt->pu1_slice_idx[ctb_addr];
963
714k
        }
964
        /* If top neighbor is not available, then set BS for entire first row to zero */
965
        /* Removed reduntant checks */
966
802k
        if((0 == i1_loop_filter_across_tiles_enabled_flag && 0 == ps_bs_ctxt->i4_ctb_tile_y)
967
708k
                        || (0 == i1_loop_filter_across_slices_enabled_flag && ((slice_idx != top_slice_idx)))
968
686k
                        || (0 == ps_bs_ctxt->i4_ctb_y))
969
116k
        {
970
116k
            pu4_horz_bs[0] &= (64 == ctb_size) ? 0 : ((UWORD32)0xFFFFFFFF) << (ctb_size / 2);
971
116k
        }
972
802k
    }
973
974
    /**
975
     *  Set BS of bottom and right frame boundaries to zero if it is an incomplete CTB
976
     *   (They might have set to  non zero values because of CBF of the current CTB)*/
977
802k
    {
978
802k
        WORD32 num_rows_remaining = (ps_sps->i2_pic_height_in_luma_samples - (ps_bs_ctxt->i4_ctb_y << log2_ctb_size)) >> 3;
979
802k
        WORD32 num_cols_remaining = (ps_sps->i2_pic_width_in_luma_samples - (ps_bs_ctxt->i4_ctb_x << log2_ctb_size)) >> 3;
980
802k
        if(num_rows_remaining < (ctb_size >> 3))
981
68.2k
        {
982
            /* WORD32 offset = (((num_rows_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
983
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
984
             *  and deblocking is done on 8x8 grid
985
             */
986
68.2k
            WORD32 offset;
987
68.2k
            offset = (num_rows_remaining >> (6 - log2_ctb_size)) << 2;
988
68.2k
            if(6 != log2_ctb_size)
989
4.23k
                offset += (num_rows_remaining & 1) << (log2_ctb_size - 4);
990
991
68.2k
            memset(((UWORD8 *)pu4_horz_bs) + offset, 0, 1 << (log2_ctb_size - 4));
992
68.2k
        }
993
994
802k
        if(num_cols_remaining < (ctb_size >> 3))
995
28.6k
        {
996
            /* WORD32 offset = (((num_cols_remaining >> 3) & (MAX_CTB_SIZE / ctb_size - 1)) << (log2_ctb_size - 4));
997
             *  will reduce to the following assuming ctb size is one of 16, 32 and 64
998
             *  and deblocking is done on 8x8 grid
999
             */
1000
1001
28.6k
            WORD32 offset;
1002
28.6k
            offset = (num_cols_remaining >> (6 - log2_ctb_size)) << 2;
1003
28.6k
            if(6 != log2_ctb_size)
1004
14.5k
                offset += (num_cols_remaining & 1) << (log2_ctb_size - 4);
1005
1006
28.6k
            memset(((UWORD8 *)pu4_vert_bs) + offset, 0, 1 << (log2_ctb_size - 4));
1007
28.6k
        }
1008
802k
    }
1009
802k
    return 0;
1010
802k
}