Coverage Report

Created: 2026-02-26 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_deblk.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
*******************************************************************************
20
* @file
21
*  ihevc_deblk.c
22
*
23
* @brief
24
*  Contains definition for the ctb level deblk function
25
*
26
* @author
27
*  Srinivas T
28
*
29
* @par List of Functions:
30
*   - ihevc_deblk()
31
*
32
* @remarks
33
*  None
34
*
35
*******************************************************************************
36
*/
37
38
#include <stdio.h>
39
#include <stddef.h>
40
#include <stdlib.h>
41
#include <string.h>
42
#include <assert.h>
43
44
#include "ihevc_typedefs.h"
45
#include "iv.h"
46
#include "ivd.h"
47
#include "ihevcd_cxa.h"
48
#include "ithread.h"
49
50
#include "ihevc_defs.h"
51
#include "ihevc_debug.h"
52
#include "ihevc_defs.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
#include "ihevc_cabac_tables.h"
57
58
#include "ihevc_error.h"
59
#include "ihevc_common_tables.h"
60
61
#include "ihevcd_trace.h"
62
#include "ihevcd_defs.h"
63
#include "ihevcd_function_selector.h"
64
#include "ihevcd_structs.h"
65
#include "ihevcd_error.h"
66
#include "ihevcd_nal.h"
67
#include "ihevcd_bitstream.h"
68
#include "ihevcd_job_queue.h"
69
#include "ihevcd_utils.h"
70
#include "ihevcd_debug.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
/**
76
*******************************************************************************
77
*
78
* @brief
79
*     Deblock CTB level function.
80
*
81
* @par Description:
82
*     For a given CTB, deblocking on both vertical and
83
*     horizontal edges is done. Both the luma and chroma
84
*     blocks are processed
85
*
86
* @param[in] ps_deblk
87
*  Pointer to the deblock context
88
*
89
* @returns
90
*
91
* @remarks
92
*  None
93
*
94
*******************************************************************************
95
*/
96
97
void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98
                      WORD32 i4_is_last_ctb_x,
99
                      WORD32 i4_is_last_ctb_y)
100
3.01M
{
101
3.01M
    WORD32 ctb_size;
102
3.01M
    WORD32 log2_ctb_size;
103
3.01M
    UWORD32 u4_bs;
104
3.01M
    WORD32 bs_tz; /*Leading zeros in boundary strength*/
105
3.01M
    WORD32 qp_p, qp_q;
106
107
3.01M
    WORD32 filter_p, filter_q;
108
109
3.01M
    UWORD8 *pu1_src;
110
3.01M
    WORD32 qp_strd;
111
3.01M
    UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112
3.01M
    UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113
3.01M
    WORD32 bs_strd;
114
3.01M
    WORD32 src_strd;
115
3.01M
    UWORD8 *pu1_qp;
116
3.01M
    UWORD16 *pu2_ctb_no_loop_filter_flag;
117
3.01M
    UWORD16 au2_ctb_no_loop_filter_flag[9];
118
119
3.01M
    WORD32 col, row;
120
121
    /* Flag to indicate if QP is constant in CTB
122
     * 0 - top_left, 1 - top, 2 - left, 3 - current */
123
3.01M
    UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124
3.01M
    WORD32 ctb_indx;
125
3.01M
    WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126
3.01M
    sps_t *ps_sps;
127
3.01M
    pps_t *ps_pps;
128
3.01M
    codec_t *ps_codec;
129
3.01M
    slice_header_t *ps_slice_hdr;
130
131
3.01M
    PROFILE_DISABLE_DEBLK();
132
133
3.01M
    ps_sps = ps_deblk->ps_sps;
134
3.01M
    ps_pps = ps_deblk->ps_pps;
135
3.01M
    ps_codec = ps_deblk->ps_codec;
136
3.01M
    ps_slice_hdr = ps_deblk->ps_slice_hdr;
137
138
3.01M
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
139
3.01M
    ctb_size = (1 << ps_sps->i1_log2_ctb_size);
140
141
    /* strides are in units of number of bytes */
142
    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
143
3.01M
    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
144
145
3.01M
    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
146
3.01M
                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
147
3.01M
                    ps_deblk->i4_ctb_y * bs_strd);
148
3.01M
    pu4_ctb_vert_bs = pu4_vert_bs;
149
150
3.01M
    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
151
3.01M
                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
152
3.01M
                    ps_deblk->i4_ctb_y * bs_strd);
153
3.01M
    pu4_ctb_horz_bs = pu4_horz_bs;
154
155
3.01M
    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
156
3.01M
    pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
157
158
3.01M
    pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
159
160
3.01M
    ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
161
3.01M
    if(i4_is_last_ctb_y)
162
7.42k
    {
163
7.42k
        pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
164
7.42k
        pu4_ctb_vert_bs = pu4_vert_bs;
165
        /* ctb_size/8 is the number of edges per CTB
166
         * ctb_size/4 is the number of BS values needed per edge
167
         * divided by 8 for the number of bytes
168
         * 2 is the number of bits needed for each BS value */
169
7.42k
        memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
170
171
7.42k
        pu1_qp += (qp_strd << (log2_ctb_size - 3));
172
7.42k
        pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
173
7.42k
        ctb_indx += ps_sps->i2_pic_wd_in_ctb;
174
7.42k
    }
175
176
3.01M
    if(i4_is_last_ctb_x)
177
41.1k
    {
178
41.1k
        pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
179
41.1k
        pu4_ctb_horz_bs = pu4_horz_bs;
180
41.1k
        memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
181
182
41.1k
        pu1_qp += (ctb_size >> 3);
183
184
362k
        for(row = 0; row < (ctb_size >> 3) + 1; row++)
185
320k
            au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
186
41.1k
        pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
187
41.1k
        ctb_indx += 1;
188
41.1k
    }
189
190
3.01M
    u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
191
192
3.01M
    if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
193
2.85M
    {
194
2.85M
        u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
195
2.85M
    }
196
197
3.01M
    if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
198
2.68M
    {
199
2.68M
        u4_qp_const_in_ctb[0] =
200
2.68M
                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
201
2.68M
                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
202
2.68M
    }
203
204
205
206
3.01M
    if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
207
2.83M
    {
208
2.83M
        u4_qp_const_in_ctb[1] =
209
2.83M
                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
210
2.83M
                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
211
2.83M
    }
212
213
3.01M
    src_strd = ps_codec->i4_strd;
214
215
    /* Luma Vertical Edge */
216
217
3.01M
    if(0 == i4_is_last_ctb_x)
218
2.97M
    {
219
        /* Top CTB's slice header */
220
2.97M
        slice_header_t *ps_slice_hdr_top;
221
2.97M
        {
222
2.97M
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
223
2.97M
            if(i4_is_last_ctb_y)
224
7.42k
                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
225
2.97M
            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
226
2.97M
        }
227
228
2.97M
        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
229
2.97M
        pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
230
231
        /** Deblocking is done on a shifted CTB -
232
         *  Vertical edge processing is done by shifting the CTB up by four pixels */
233
2.97M
        pu1_src -= 4 * src_strd;
234
235
16.6M
        for(col = 0; col < ctb_size / 8; col++)
236
13.6M
        {
237
13.6M
            WORD32 shift = 0;
238
239
            /*  downshift vert_bs by ctb_size/2 for each column
240
             *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
241
             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
242
             *  and deblocking is done on 8x8 grid
243
             */
244
13.6M
            if(6 != log2_ctb_size)
245
4.92M
                shift = (col & 1) << (log2_ctb_size - 1);
246
247
            /* BS for the column - Last row is excluded and the top row is included*/
248
13.6M
            u4_bs = (pu4_vert_bs[0] >> shift) << 2;
249
250
13.6M
            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
251
12.4M
            {
252
                /* Picking the last BS of the previous CTB corresponding to the same column */
253
12.4M
                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
254
12.4M
                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
255
12.4M
                u4_bs |= u4_top_bs & 3;
256
12.4M
            }
257
258
62.2M
            for(row = 0; row < ctb_size / 4;)
259
48.5M
            {
260
48.5M
                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
261
48.5M
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
262
263
                /* Trailing zeros are computed and the corresponding rows are not processed */
264
48.5M
                bs_tz = CTZ(u4_bs) >> 1;
265
48.5M
                if(0 != bs_tz)
266
19.2M
                {
267
19.2M
                    u4_bs = u4_bs >> (bs_tz << 1);
268
19.2M
                    if((row + bs_tz) >= (ctb_size / 4))
269
11.8M
                        pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
270
7.45M
                    else
271
7.45M
                        pu1_src += 4 * bs_tz  * src_strd;
272
273
19.2M
                    row += bs_tz;
274
19.2M
                    continue;
275
19.2M
                }
276
277
29.2M
                if(0 == row)
278
1.79M
                {
279
1.79M
                    i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
280
1.79M
                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
281
282
1.79M
                    if(0 == col)
283
375k
                    {
284
375k
                        qp_p = u4_qp_const_in_ctb[0] ?
285
356k
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
286
375k
                                        pu1_qp[-qp_strd - 1];
287
375k
                    }
288
1.42M
                    else
289
1.42M
                    {
290
1.42M
                        qp_p = u4_qp_const_in_ctb[1] ?
291
1.35M
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
292
1.42M
                                        pu1_qp[col - 1 - qp_strd];
293
1.42M
                    }
294
295
1.79M
                    qp_q = u4_qp_const_in_ctb[1] ?
296
1.71M
                                    pu1_qp[-ctb_size / 8 * qp_strd] :
297
1.79M
                                    pu1_qp[col - qp_strd];
298
1.79M
                }
299
27.4M
                else
300
27.4M
                {
301
27.4M
                    if(0 == col)
302
5.84M
                    {
303
5.84M
                        qp_p = u4_qp_const_in_ctb[2] ?
304
5.52M
                                        pu1_qp[-ctb_size / 8] :
305
5.84M
                                        pu1_qp[((row - 1) >> 1) * qp_strd - 1];
306
5.84M
                    }
307
21.6M
                    else
308
21.6M
                    {
309
21.6M
                        qp_p = u4_qp_const_in_ctb[3] ?
310
21.0M
                                        pu1_qp[0] :
311
21.6M
                                        pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
312
21.6M
                    }
313
314
27.4M
                    qp_q = u4_qp_const_in_ctb[3] ?
315
26.3M
                                    pu1_qp[0] :
316
27.4M
                                    pu1_qp[((row - 1) >> 1) * qp_strd + col];
317
27.4M
                }
318
319
29.2M
                filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
320
29.2M
                filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
321
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
322
29.2M
                filter_p = !filter_p;
323
29.2M
                filter_q = !filter_q;
324
325
29.2M
                if(filter_p || filter_q)
326
29.3M
                {
327
29.3M
                    DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
328
29.3M
                                         u4_bs & 3, qp_p, qp_q,
329
29.3M
                                         ps_slice_hdr->i1_beta_offset_div2,
330
29.3M
                                         ps_slice_hdr->i1_tc_offset_div2,
331
29.3M
                                         filter_p, filter_q);
332
29.3M
                    ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
333
29.3M
                                                                         u4_bs & 3, qp_p, qp_q,
334
29.3M
                                                                         i1_beta_offset_div2,
335
29.3M
                                                                         i1_tc_offset_div2,
336
29.3M
                                                                         filter_p, filter_q);
337
29.3M
                }
338
339
29.2M
                pu1_src += 4 * src_strd;
340
29.2M
                u4_bs = u4_bs >> 2;
341
29.2M
                row++;
342
29.2M
            }
343
344
13.6M
            if((64 == ctb_size) ||
345
4.92M
                            ((32 == ctb_size) && (col & 1)))
346
9.87M
            {
347
9.87M
                pu4_vert_bs++;
348
9.87M
            }
349
13.6M
            pu1_src -= (src_strd << log2_ctb_size);
350
13.6M
            pu1_src += 8;
351
13.6M
        }
352
2.97M
        pu4_vert_bs = pu4_ctb_vert_bs;
353
2.97M
    }
354
355
356
    /* Luma Horizontal Edge */
357
358
3.01M
    if(0 == i4_is_last_ctb_y)
359
3.01M
    {
360
361
        /* Left CTB's slice header */
362
3.01M
        slice_header_t *ps_slice_hdr_left;
363
3.01M
        {
364
3.01M
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
365
3.01M
            if(i4_is_last_ctb_x)
366
41.1k
                cur_ctb_indx += 1;
367
3.01M
            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
368
3.01M
        }
369
3.01M
        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
370
3.01M
        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
371
372
        /** Deblocking is done on a shifted CTB -
373
         *  Horizontal edge processing is done by shifting the CTB left by four pixels */
374
3.01M
        pu1_src -= 4;
375
16.9M
        for(row = 0; row < ctb_size / 8; row++)
376
13.8M
        {
377
13.8M
            WORD32 shift = 0;
378
379
            /* downshift vert_bs by ctb_size/2 for each column
380
             *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
381
             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
382
             *  and deblocking is done on 8x8 grid
383
             */
384
13.8M
            if(6 != log2_ctb_size)
385
4.95M
                shift = (row & 1) << (log2_ctb_size - 1);
386
387
            /* BS for the row - Last column is excluded and the left column is included*/
388
13.8M
            u4_bs = (pu4_horz_bs[0] >> shift) << 2;
389
390
13.8M
            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
391
13.0M
            {
392
                /** Picking the last BS of the previous CTB corresponding to the same row
393
                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
394
                */
395
13.0M
                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
396
13.0M
                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
397
13.0M
                u4_bs |= u4_left_bs & 3;
398
13.0M
            }
399
400
63.4M
            for(col = 0; col < ctb_size / 4;)
401
49.5M
            {
402
49.5M
                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
403
49.5M
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
404
405
49.5M
                bs_tz = CTZ(u4_bs) >> 1;
406
49.5M
                if(0 != bs_tz)
407
19.4M
                {
408
19.4M
                    u4_bs = u4_bs >> (bs_tz << 1);
409
410
19.4M
                    if((col + bs_tz) >= (ctb_size / 4))
411
11.9M
                        pu1_src += 4 * (ctb_size / 4 - col);
412
7.52M
                    else
413
7.52M
                        pu1_src += 4 * bs_tz;
414
415
19.4M
                    col += bs_tz;
416
19.4M
                    continue;
417
19.4M
                }
418
419
30.0M
                if(0 == col)
420
1.93M
                {
421
1.93M
                    i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
422
1.93M
                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
423
424
1.93M
                    if(0 == row)
425
399k
                    {
426
399k
                        qp_p = u4_qp_const_in_ctb[0] ?
427
382k
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
428
399k
                                        pu1_qp[-qp_strd - 1];
429
399k
                    }
430
1.53M
                    else
431
1.53M
                    {
432
1.53M
                        qp_p = u4_qp_const_in_ctb[2] ?
433
1.46M
                                        pu1_qp[-ctb_size / 8] :
434
1.53M
                                        pu1_qp[(row - 1) * qp_strd - 1];
435
1.53M
                    }
436
437
1.93M
                    qp_q = u4_qp_const_in_ctb[2] ?
438
1.84M
                                    pu1_qp[-ctb_size / 8] :
439
1.93M
                                    pu1_qp[row * qp_strd - 1];
440
1.93M
                }
441
28.1M
                else
442
28.1M
                {
443
28.1M
                    if(0 == row)
444
5.81M
                    {
445
5.81M
                        qp_p = u4_qp_const_in_ctb[1] ?
446
5.55M
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
447
5.81M
                                        pu1_qp[((col - 1) >> 1) - qp_strd];
448
5.81M
                    }
449
22.2M
                    else
450
22.2M
                    {
451
22.2M
                        qp_p = u4_qp_const_in_ctb[3] ?
452
21.3M
                                        pu1_qp[0] :
453
22.2M
                                        pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
454
22.2M
                    }
455
456
28.1M
                    qp_q = u4_qp_const_in_ctb[3] ?
457
26.8M
                                    pu1_qp[0] :
458
28.1M
                                    pu1_qp[((col - 1) >> 1) + row * qp_strd];
459
28.1M
                }
460
461
30.0M
                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
462
30.0M
                filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
463
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
464
30.0M
                filter_p = !filter_p;
465
30.0M
                filter_q = !filter_q;
466
467
30.0M
                if(filter_p || filter_q)
468
30.0M
                {
469
30.0M
                    DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
470
30.0M
                                         u4_bs & 3, qp_p, qp_q,
471
30.0M
                                         ps_slice_hdr->i1_beta_offset_div2,
472
30.0M
                                         ps_slice_hdr->i1_tc_offset_div2,
473
30.0M
                                         filter_p, filter_q);
474
30.0M
                    ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
475
30.0M
                                                                         u4_bs & 3, qp_p, qp_q,
476
30.0M
                                                                         i1_beta_offset_div2,
477
30.0M
                                                                         i1_tc_offset_div2, filter_p, filter_q);
478
30.0M
                }
479
480
30.0M
                pu1_src += 4;
481
30.0M
                u4_bs = u4_bs >> 2;
482
30.0M
                col++;
483
30.0M
            }
484
485
13.8M
            if((64 == ctb_size) ||
486
4.95M
                            ((32 == ctb_size) && (row & 1)))
487
10.1M
            {
488
10.1M
                pu4_horz_bs++;
489
10.1M
            }
490
13.8M
            pu1_src -= ctb_size;
491
13.8M
            pu1_src += (src_strd << 3);
492
13.8M
        }
493
3.01M
        pu4_horz_bs = pu4_ctb_horz_bs;
494
3.01M
    }
495
496
497
    /* Chroma Veritcal Edge */
498
499
3.01M
    if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 == i4_is_last_ctb_x)
500
2.97M
    {
501
502
        /* Top CTB's slice header */
503
2.97M
        slice_header_t *ps_slice_hdr_top;
504
2.97M
        {
505
2.97M
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
506
2.97M
            if(i4_is_last_ctb_y)
507
7.42k
                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
508
2.97M
            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
509
2.97M
        }
510
511
2.97M
        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
512
2.97M
        pu1_src += i4_is_last_ctb_y ? (ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size : 0;
513
514
        /** Deblocking is done on a shifted CTB -
515
         *  Vertical edge processing is done by shifting the CTB up by four pixels */
516
2.97M
        pu1_src -= 4 * src_strd;
517
518
9.80M
        for(col = 0; col < ctb_size / 16; col++)
519
6.82M
        {
520
521
            /* BS for the column - Last row is excluded and the top row is included*/
522
6.82M
            u4_bs = pu4_vert_bs[0] << 2;
523
524
6.82M
            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
525
6.23M
            {
526
                /* Picking the last BS of the previous CTB corresponding to the same column */
527
6.23M
                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
528
6.23M
                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> ((1 << (log2_ctb_size - 1)) - 2);
529
6.23M
                u4_bs |= u4_top_bs & 3;
530
6.23M
            }
531
532
            /* Every alternate boundary strength value is used for chroma */
533
6.82M
            u4_bs &= 0x22222222;
534
535
23.3M
            for(row = 0; row < ctb_size / 8;)
536
16.5M
            {
537
16.5M
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
538
539
16.5M
                bs_tz = CTZ(u4_bs) >> 2;
540
16.5M
                if(0 != bs_tz)
541
9.53M
                {
542
9.53M
                    if((row + bs_tz) >= (ctb_size / 8))
543
5.96M
                        pu1_src += 4 * (ctb_size / 8 - row) * src_strd;
544
3.56M
                    else
545
3.56M
                        pu1_src += 4 * bs_tz  * src_strd;
546
9.53M
                    row += bs_tz;
547
9.53M
                    u4_bs = u4_bs >> (bs_tz << 2);
548
9.53M
                    continue;
549
9.53M
                }
550
551
6.99M
                if(0 == row)
552
854k
                {
553
854k
                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
554
555
854k
                    if(0 == col)
556
264k
                    {
557
264k
                        qp_p = u4_qp_const_in_ctb[0] ?
558
255k
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
559
264k
                                        pu1_qp[-qp_strd - 1];
560
264k
                    }
561
590k
                    else
562
590k
                    {
563
590k
                        qp_p = u4_qp_const_in_ctb[1] ?
564
569k
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
565
590k
                                        pu1_qp[2 * col - 1 - qp_strd];
566
590k
                    }
567
568
854k
                    qp_q = u4_qp_const_in_ctb[1] ?
569
825k
                                    pu1_qp[-ctb_size / 8 * qp_strd] :
570
854k
                                    pu1_qp[2 * col - qp_strd];
571
854k
                }
572
6.13M
                else
573
6.13M
                {
574
6.13M
                    if(0 == col)
575
1.86M
                    {
576
1.86M
                        qp_p = u4_qp_const_in_ctb[2] ?
577
1.80M
                                        pu1_qp[-ctb_size / 8] :
578
1.86M
                                        pu1_qp[(row - 1) * qp_strd - 1];
579
1.86M
                    }
580
4.27M
                    else
581
4.27M
                    {
582
4.27M
                        qp_p = u4_qp_const_in_ctb[3] ?
583
4.17M
                                        pu1_qp[0] :
584
4.27M
                                        pu1_qp[(row - 1) * qp_strd + 2 * col - 1];
585
4.27M
                    }
586
587
6.13M
                    qp_q = u4_qp_const_in_ctb[3] ?
588
5.95M
                                    pu1_qp[0] :
589
6.13M
                                    pu1_qp[(row - 1) * qp_strd + 2 * col];
590
6.13M
                }
591
592
6.99M
                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 1;
593
6.99M
                filter_q = (pu2_ctb_no_loop_filter_flag[row] >> (col << 1)) & 2;
594
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
595
6.99M
                filter_p = !filter_p;
596
6.99M
                filter_q = !filter_q;
597
598
6.99M
                if(filter_p || filter_q)
599
7.00M
                {
600
7.00M
                    ASSERT(1 == ((u4_bs & 3) >> 1));
601
7.00M
                    DUMP_DEBLK_CHROMA_VERT(pu1_src, src_strd,
602
7.00M
                                           u4_bs & 3, qp_p, qp_q,
603
7.00M
                                           ps_pps->i1_pic_cb_qp_offset,
604
7.00M
                                           ps_pps->i1_pic_cr_qp_offset,
605
7.00M
                                           ps_slice_hdr->i1_tc_offset_div2,
606
7.00M
                                           filter_p, filter_q);
607
6.99M
                    if(chroma_yuv420sp_vu)
608
576k
                    {
609
576k
                        ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
610
576k
                                                                               src_strd,
611
576k
                                                                               qp_q,
612
576k
                                                                               qp_p,
613
576k
                                                                               ps_pps->i1_pic_cr_qp_offset,
614
576k
                                                                               ps_pps->i1_pic_cb_qp_offset,
615
576k
                                                                               i1_tc_offset_div2,
616
576k
                                                                               filter_q,
617
576k
                                                                               filter_p);
618
576k
                    }
619
6.42M
                    else
620
6.42M
                    {
621
6.42M
                        ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr(pu1_src,
622
6.42M
                                                                               src_strd,
623
6.42M
                                                                               qp_p,
624
6.42M
                                                                               qp_q,
625
6.42M
                                                                               ps_pps->i1_pic_cb_qp_offset,
626
6.42M
                                                                               ps_pps->i1_pic_cr_qp_offset,
627
6.42M
                                                                               i1_tc_offset_div2,
628
6.42M
                                                                               filter_p,
629
6.42M
                                                                               filter_q);
630
6.42M
                    }
631
6.99M
                }
632
633
6.99M
                pu1_src += 4 * src_strd;
634
6.99M
                u4_bs = u4_bs >> 4;
635
6.99M
                row++;
636
6.99M
            }
637
638
6.82M
            pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
639
6.82M
            pu1_src -= ((src_strd / 2) << log2_ctb_size);
640
6.82M
            pu1_src += 16;
641
6.82M
        }
642
2.97M
    }
643
644
    /* Chroma Horizontal Edge */
645
646
3.01M
    if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 == i4_is_last_ctb_y)
647
3.01M
    {
648
649
        /* Left CTB's slice header */
650
3.01M
        slice_header_t *ps_slice_hdr_left;
651
3.01M
        {
652
3.01M
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
653
3.01M
            if(i4_is_last_ctb_x)
654
41.1k
                cur_ctb_indx += 1;
655
3.01M
            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
656
3.01M
        }
657
658
3.01M
        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd / 2) << log2_ctb_size);
659
3.01M
        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
660
661
        /** Deblocking is done on a shifted CTB -
662
         * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
663
3.01M
        pu1_src -= 8;
664
9.95M
        for(row = 0; row < ctb_size / 16; row++)
665
6.95M
        {
666
            /* BS for the row - Last column is excluded and the left column is included*/
667
6.95M
            u4_bs = pu4_horz_bs[0] << 2;
668
669
6.95M
            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
670
6.54M
            {
671
                /** Picking the last BS of the previous CTB corresponding to the same row
672
                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
673
                */
674
6.54M
                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
675
6.54M
                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> ((1 << (log2_ctb_size - 1)) - 2);
676
6.54M
                u4_bs |= u4_left_bs & 3;
677
6.54M
            }
678
679
            /* Every alternate boundary strength value is used for chroma */
680
6.95M
            u4_bs &= 0x22222222;
681
682
23.5M
            for(col = 0; col < ctb_size / 8;)
683
16.5M
            {
684
16.5M
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
685
686
16.5M
                bs_tz = CTZ(u4_bs) >> 2;
687
16.5M
                if(0 != bs_tz)
688
9.72M
                {
689
9.72M
                    u4_bs = u4_bs >> (bs_tz << 2);
690
691
9.72M
                    if((col + bs_tz) >= (ctb_size / 8))
692
6.07M
                        pu1_src += 8 * (ctb_size / 8 - col);
693
3.65M
                    else
694
3.65M
                        pu1_src += 8 * bs_tz;
695
696
9.72M
                    col += bs_tz;
697
9.72M
                    continue;
698
9.72M
                }
699
700
6.83M
                if(0 == col)
701
872k
                {
702
872k
                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
703
704
872k
                    if(0 == row)
705
260k
                    {
706
260k
                        qp_p = u4_qp_const_in_ctb[0] ?
707
252k
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
708
260k
                                        pu1_qp[-qp_strd - 1];
709
260k
                    }
710
612k
                    else
711
612k
                    {
712
612k
                        qp_p = u4_qp_const_in_ctb[2] ?
713
589k
                                        pu1_qp[-ctb_size / 8] :
714
612k
                                        pu1_qp[(2 * row - 1) * qp_strd - 1];
715
612k
                    }
716
717
872k
                    qp_q = u4_qp_const_in_ctb[2] ?
718
843k
                                    pu1_qp[-ctb_size / 8] :
719
872k
                                    pu1_qp[(2 * row) * qp_strd - 1];
720
872k
                }
721
5.96M
                else
722
5.96M
                {
723
5.96M
                    if(0 == row)
724
1.73M
                    {
725
1.73M
                        qp_p = u4_qp_const_in_ctb[1] ?
726
1.68M
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
727
1.73M
                                        pu1_qp[col - 1 - qp_strd];
728
1.73M
                    }
729
4.23M
                    else
730
4.23M
                    {
731
4.23M
                        qp_p = u4_qp_const_in_ctb[3] ?
732
4.11M
                                        pu1_qp[0] :
733
4.23M
                                        pu1_qp[(col - 1) +  (2 * row - 1) * qp_strd];
734
4.23M
                    }
735
736
5.96M
                    qp_q = u4_qp_const_in_ctb[3] ?
737
5.79M
                                    pu1_qp[0] :
738
5.96M
                                    pu1_qp[(col - 1) + 2 * row * qp_strd];
739
5.96M
                }
740
741
6.83M
                filter_p = (pu2_ctb_no_loop_filter_flag[row << 1] >> col) & 1;
742
6.83M
                filter_q = (pu2_ctb_no_loop_filter_flag[(row << 1) + 1] >> col) & 1;
743
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
744
6.83M
                filter_p = !filter_p;
745
6.83M
                filter_q = !filter_q;
746
747
6.83M
                if(filter_p || filter_q)
748
6.84M
                {
749
6.84M
                    ASSERT(1 == ((u4_bs & 3) >> 1));
750
6.84M
                    DUMP_DEBLK_CHROMA_HORZ(pu1_src, src_strd,
751
6.84M
                                           u4_bs & 3, qp_p, qp_q,
752
6.84M
                                           ps_pps->i1_pic_cb_qp_offset,
753
6.84M
                                           ps_pps->i1_pic_cr_qp_offset,
754
6.84M
                                           ps_slice_hdr->i1_tc_offset_div2,
755
6.84M
                                           filter_p, filter_q);
756
6.83M
                    if(chroma_yuv420sp_vu)
757
557k
                    {
758
557k
                        ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
759
557k
                                                                               src_strd,
760
557k
                                                                               qp_q,
761
557k
                                                                               qp_p,
762
557k
                                                                               ps_pps->i1_pic_cr_qp_offset,
763
557k
                                                                               ps_pps->i1_pic_cb_qp_offset,
764
557k
                                                                               i1_tc_offset_div2,
765
557k
                                                                               filter_q,
766
557k
                                                                               filter_p);
767
557k
                    }
768
6.28M
                    else
769
6.28M
                    {
770
6.28M
                        ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr(pu1_src,
771
6.28M
                                                                               src_strd,
772
6.28M
                                                                               qp_p,
773
6.28M
                                                                               qp_q,
774
6.28M
                                                                               ps_pps->i1_pic_cb_qp_offset,
775
6.28M
                                                                               ps_pps->i1_pic_cr_qp_offset,
776
6.28M
                                                                               i1_tc_offset_div2,
777
6.28M
                                                                               filter_p,
778
6.28M
                                                                               filter_q);
779
6.28M
                    }
780
6.83M
                }
781
782
6.83M
                pu1_src += 8;
783
6.83M
                u4_bs = u4_bs >> 4;
784
6.83M
                col++;
785
6.83M
            }
786
787
6.94M
            pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
788
6.94M
            pu1_src -= ctb_size;
789
6.94M
            pu1_src += 8 * src_strd;
790
791
6.94M
        }
792
3.01M
    }
793
3.01M
}