Coverage Report

Created: 2026-06-10 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/decoder/ihevcd_deblk.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
*******************************************************************************
20
* @file
21
*  ihevc_deblk.c
22
*
23
* @brief
24
*  Contains definition for the ctb level deblk function
25
*
26
* @author
27
*  Srinivas T
28
*
29
* @par List of Functions:
30
*   - ihevc_deblk()
31
*
32
* @remarks
33
*  None
34
*
35
*******************************************************************************
36
*/
37
38
#include <stdio.h>
39
#include <stddef.h>
40
#include <stdlib.h>
41
#include <string.h>
42
#include <assert.h>
43
44
#include "ihevc_typedefs.h"
45
#include "iv.h"
46
#include "ivd.h"
47
#include "ihevcd_cxa.h"
48
#include "ithread.h"
49
50
#include "ihevc_defs.h"
51
#include "ihevc_debug.h"
52
#include "ihevc_defs.h"
53
#include "ihevc_structs.h"
54
#include "ihevc_macros.h"
55
#include "ihevc_platform_macros.h"
56
#include "ihevc_cabac_tables.h"
57
58
#include "ihevc_error.h"
59
#include "ihevc_common_tables.h"
60
61
#include "ihevcd_trace.h"
62
#include "ihevcd_defs.h"
63
#include "ihevcd_function_selector.h"
64
#include "ihevcd_structs.h"
65
#include "ihevcd_error.h"
66
#include "ihevcd_nal.h"
67
#include "ihevcd_bitstream.h"
68
#include "ihevcd_job_queue.h"
69
#include "ihevcd_utils.h"
70
#include "ihevcd_debug.h"
71
72
#include "ihevc_deblk.h"
73
#include "ihevc_deblk_tables.h"
74
#include "ihevcd_profile.h"
75
/**
76
*******************************************************************************
77
*
78
* @brief
79
*     Deblock CTB level function.
80
*
81
* @par Description:
82
*     For a given CTB, deblocking on both vertical and
83
*     horizontal edges is done. Both the luma and chroma
84
*     blocks are processed
85
*
86
* @param[in] ps_deblk
87
*  Pointer to the deblock context
88
*
89
* @returns
90
*
91
* @remarks
92
*  None
93
*
94
*******************************************************************************
95
*/
96
97
void ihevcd_deblk_ctb(deblk_ctxt_t *ps_deblk,
98
                      WORD32 i4_is_last_ctb_x,
99
                      WORD32 i4_is_last_ctb_y)
100
0
{
101
0
    WORD32 ctb_size;
102
0
    WORD32 log2_ctb_size;
103
0
    UWORD32 u4_bs;
104
0
    WORD32 bs_tz; /*Leading zeros in boundary strength*/
105
0
    WORD32 qp_p, qp_q;
106
107
0
    WORD32 filter_p, filter_q;
108
109
0
    UWORD8 *pu1_src;
110
0
    WORD32 qp_strd;
111
0
    UWORD32 *pu4_vert_bs, *pu4_horz_bs;
112
0
    UWORD32 *pu4_ctb_vert_bs, *pu4_ctb_horz_bs;
113
0
    WORD32 bs_strd;
114
0
    WORD32 src_strd, chroma_strd;
115
0
    UWORD8 *pu1_qp;
116
0
    UWORD16 *pu2_ctb_no_loop_filter_flag;
117
0
    UWORD16 au2_ctb_no_loop_filter_flag[9];
118
119
0
    WORD32 col, row;
120
121
    /* Flag to indicate if QP is constant in CTB
122
     * 0 - top_left, 1 - top, 2 - left, 3 - current */
123
0
    UWORD32 u4_qp_const_in_ctb[4] = { 0, 0, 0, 0 };
124
0
    WORD32 ctb_indx;
125
0
    WORD32  chroma_yuv420sp_vu = ps_deblk->is_chroma_yuv420sp_vu;
126
0
    sps_t *ps_sps;
127
0
    pps_t *ps_pps;
128
0
    codec_t *ps_codec;
129
0
    slice_header_t *ps_slice_hdr;
130
131
0
    ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz;
132
0
    ihevc_deblk_chroma_vert_ft *pf_deblk_chroma_vert;
133
134
0
    PROFILE_DISABLE_DEBLK();
135
136
0
    ps_sps = ps_deblk->ps_sps;
137
0
    ps_pps = ps_deblk->ps_pps;
138
0
    ps_codec = ps_deblk->ps_codec;
139
0
    ps_slice_hdr = ps_deblk->ps_slice_hdr;
140
141
0
    WORD32 is_yuv422 = ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422 ? 1 : 0;
142
0
    WORD32 is_yuv444 = ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444 ? 1 : 0;
143
0
    WORD32 h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2;
144
0
    WORD32 v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1;
145
0
    WORD32 chroma_pixel_strd = 2;
146
147
0
    log2_ctb_size = ps_sps->i1_log2_ctb_size;
148
0
    ctb_size = (1 << ps_sps->i1_log2_ctb_size);
149
150
0
    if(is_yuv422)
151
0
    {
152
0
        pf_deblk_chroma_horz = ps_codec->s_func_selector.ihevc_deblk_422chroma_horz_fptr;
153
0
        pf_deblk_chroma_vert = ps_codec->s_func_selector.ihevc_deblk_422chroma_vert_fptr;
154
0
    }
155
0
    else
156
0
    {
157
0
        pf_deblk_chroma_horz = ps_codec->s_func_selector.ihevc_deblk_chroma_horz_fptr;
158
0
        pf_deblk_chroma_vert = ps_codec->s_func_selector.ihevc_deblk_chroma_vert_fptr;
159
0
    }
160
161
    /* strides are in units of number of bytes */
162
    /* ctb_size * ctb_size / 8 / 16 is the number of bytes needed per CTB */
163
0
    bs_strd = (ps_sps->i2_pic_wd_in_ctb + 1) << (2 * log2_ctb_size - 7);
164
165
0
    pu4_vert_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_vert_bs +
166
0
                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
167
0
                    ps_deblk->i4_ctb_y * bs_strd);
168
0
    pu4_ctb_vert_bs = pu4_vert_bs;
169
170
0
    pu4_horz_bs = (UWORD32 *)((UWORD8 *)ps_deblk->s_bs_ctxt.pu4_pic_horz_bs +
171
0
                    (ps_deblk->i4_ctb_x << (2 * log2_ctb_size - 7)) +
172
0
                    ps_deblk->i4_ctb_y * bs_strd);
173
0
    pu4_ctb_horz_bs = pu4_horz_bs;
174
175
0
    qp_strd = ps_sps->i2_pic_wd_in_ctb << (log2_ctb_size - 3);
176
0
    pu1_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * qp_strd) << (log2_ctb_size - 3));
177
178
0
    pu2_ctb_no_loop_filter_flag = ps_deblk->au2_ctb_no_loop_filter_flag;
179
180
0
    ctb_indx = ps_deblk->i4_ctb_x + ps_sps->i2_pic_wd_in_ctb * ps_deblk->i4_ctb_y;
181
0
    if(i4_is_last_ctb_y)
182
0
    {
183
0
        pu4_vert_bs = (UWORD32 *)((UWORD8 *)pu4_vert_bs + bs_strd);
184
0
        pu4_ctb_vert_bs = pu4_vert_bs;
185
        /* ctb_size/8 is the number of edges per CTB
186
         * ctb_size/4 is the number of BS values needed per edge
187
         * divided by 8 for the number of bytes
188
         * 2 is the number of bits needed for each BS value */
189
0
        memset(pu4_vert_bs, 0, 1 << (2 * log2_ctb_size - 7));
190
191
0
        pu1_qp += (qp_strd << (log2_ctb_size - 3));
192
0
        pu2_ctb_no_loop_filter_flag += (ctb_size >> 3);
193
0
        ctb_indx += ps_sps->i2_pic_wd_in_ctb;
194
0
    }
195
196
0
    if(i4_is_last_ctb_x)
197
0
    {
198
0
        pu4_horz_bs = (UWORD32 *)((UWORD8 *)pu4_horz_bs + (1 << (2 * log2_ctb_size - 7)));
199
0
        pu4_ctb_horz_bs = pu4_horz_bs;
200
0
        memset(pu4_horz_bs, 0, 1 << (2 * log2_ctb_size - 7));
201
202
0
        pu1_qp += (ctb_size >> 3);
203
204
0
        for(row = 0; row < (ctb_size >> 3) + 1; row++)
205
0
            au2_ctb_no_loop_filter_flag[row] = ps_deblk->au2_ctb_no_loop_filter_flag[row] >> (ctb_size >> 3);
206
0
        pu2_ctb_no_loop_filter_flag = au2_ctb_no_loop_filter_flag;
207
0
        ctb_indx += 1;
208
0
    }
209
210
0
    u4_qp_const_in_ctb[3] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx) >> 3] & (1 << (ctb_indx & 7));
211
212
0
    if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
213
0
    {
214
0
        u4_qp_const_in_ctb[2] = ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - 1) >> 3] & (1 << ((ctb_indx - 1) & 7));
215
0
    }
216
217
0
    if((ps_deblk->i4_ctb_x || i4_is_last_ctb_x) && (ps_deblk->i4_ctb_y || i4_is_last_ctb_y))
218
0
    {
219
0
        u4_qp_const_in_ctb[0] =
220
0
                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) >> 3] &
221
0
                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb - 1) & 7));
222
0
    }
223
224
225
226
0
    if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
227
0
    {
228
0
        u4_qp_const_in_ctb[1] =
229
0
                        ps_deblk->s_bs_ctxt.pu1_pic_qp_const_in_ctb[(ctb_indx - ps_sps->i2_pic_wd_in_ctb) >> 3] &
230
0
                        (1 << ((ctb_indx - ps_sps->i2_pic_wd_in_ctb) & 7));
231
0
    }
232
233
0
    src_strd = ps_codec->i4_strd;
234
0
    chroma_strd = src_strd * chroma_pixel_strd / h_samp_factor;
235
236
    /* Luma Vertical Edge */
237
238
0
    if(0 == i4_is_last_ctb_x)
239
0
    {
240
        /* Top CTB's slice header */
241
0
        slice_header_t *ps_slice_hdr_top;
242
0
        {
243
0
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
244
0
            if(i4_is_last_ctb_y)
245
0
                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
246
0
            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
247
0
        }
248
249
0
        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << (log2_ctb_size));
250
0
        pu1_src += i4_is_last_ctb_y ? ps_deblk->ps_codec->i4_strd << log2_ctb_size : 0;
251
252
        /** Deblocking is done on a shifted CTB -
253
         *  Vertical edge processing is done by shifting the CTB up by four pixels */
254
0
        pu1_src -= 4 * src_strd;
255
256
0
        for(col = 0; col < ctb_size / 8; col++)
257
0
        {
258
0
            WORD32 shift = 0;
259
260
            /*  downshift vert_bs by ctb_size/2 for each column
261
             *  shift = (col & ((MAX_CTB_SIZE >> log2_ctb_size) - 1)) << (log2_ctb_size - 1);
262
             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
263
             *  and deblocking is done on 8x8 grid
264
             */
265
0
            if(6 != log2_ctb_size)
266
0
                shift = (col & 1) << (log2_ctb_size - 1);
267
268
            /* BS for the column - Last row is excluded and the top row is included*/
269
0
            u4_bs = (pu4_vert_bs[0] >> shift) << 2;
270
271
0
            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
272
0
            {
273
                /* Picking the last BS of the previous CTB corresponding to the same column */
274
0
                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
275
0
                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
276
0
                u4_bs |= u4_top_bs & 3;
277
0
            }
278
279
0
            for(row = 0; row < ctb_size / 4;)
280
0
            {
281
0
                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
282
0
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
283
284
                /* Trailing zeros are computed and the corresponding rows are not processed */
285
0
                bs_tz = CTZ(u4_bs) >> 1;
286
0
                if(0 != bs_tz)
287
0
                {
288
0
                    u4_bs = u4_bs >> (bs_tz << 1);
289
0
                    if((row + bs_tz) >= (ctb_size / 4))
290
0
                        pu1_src += 4 * (ctb_size / 4 - row) * src_strd;
291
0
                    else
292
0
                        pu1_src += 4 * bs_tz  * src_strd;
293
294
0
                    row += bs_tz;
295
0
                    continue;
296
0
                }
297
298
0
                if(0 == row)
299
0
                {
300
0
                    i1_beta_offset_div2 = ps_slice_hdr_top->i1_beta_offset_div2;
301
0
                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
302
303
0
                    if(0 == col)
304
0
                    {
305
0
                        qp_p = u4_qp_const_in_ctb[0] ?
306
0
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
307
0
                                        pu1_qp[-qp_strd - 1];
308
0
                    }
309
0
                    else
310
0
                    {
311
0
                        qp_p = u4_qp_const_in_ctb[1] ?
312
0
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
313
0
                                        pu1_qp[col - 1 - qp_strd];
314
0
                    }
315
316
0
                    qp_q = u4_qp_const_in_ctb[1] ?
317
0
                                    pu1_qp[-ctb_size / 8 * qp_strd] :
318
0
                                    pu1_qp[col - qp_strd];
319
0
                }
320
0
                else
321
0
                {
322
0
                    if(0 == col)
323
0
                    {
324
0
                        qp_p = u4_qp_const_in_ctb[2] ?
325
0
                                        pu1_qp[-ctb_size / 8] :
326
0
                                        pu1_qp[((row - 1) >> 1) * qp_strd - 1];
327
0
                    }
328
0
                    else
329
0
                    {
330
0
                        qp_p = u4_qp_const_in_ctb[3] ?
331
0
                                        pu1_qp[0] :
332
0
                                        pu1_qp[((row - 1) >> 1) * qp_strd + col - 1];
333
0
                    }
334
335
0
                    qp_q = u4_qp_const_in_ctb[3] ?
336
0
                                    pu1_qp[0] :
337
0
                                    pu1_qp[((row - 1) >> 1) * qp_strd + col];
338
0
                }
339
340
0
                filter_p = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 1;
341
0
                filter_q = (pu2_ctb_no_loop_filter_flag[(row + 1) >> 1] >> col) & 2;
342
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
343
0
                filter_p = !filter_p;
344
0
                filter_q = !filter_q;
345
346
0
                if(filter_p || filter_q)
347
0
                {
348
0
                    DUMP_DEBLK_LUMA_VERT(pu1_src, src_strd,
349
0
                                         u4_bs & 3, qp_p, qp_q,
350
0
                                         ps_slice_hdr->i1_beta_offset_div2,
351
0
                                         ps_slice_hdr->i1_tc_offset_div2,
352
0
                                         filter_p, filter_q);
353
0
                    ps_codec->s_func_selector.ihevc_deblk_luma_vert_fptr(pu1_src, src_strd,
354
0
                                                                         u4_bs & 3, qp_p, qp_q,
355
0
                                                                         i1_beta_offset_div2,
356
0
                                                                         i1_tc_offset_div2,
357
0
                                                                         filter_p, filter_q);
358
0
                }
359
360
0
                pu1_src += 4 * src_strd;
361
0
                u4_bs = u4_bs >> 2;
362
0
                row++;
363
0
            }
364
365
0
            if((64 == ctb_size) ||
366
0
                            ((32 == ctb_size) && (col & 1)))
367
0
            {
368
0
                pu4_vert_bs++;
369
0
            }
370
0
            pu1_src -= (src_strd << log2_ctb_size);
371
0
            pu1_src += 8;
372
0
        }
373
0
        pu4_vert_bs = pu4_ctb_vert_bs;
374
0
    }
375
376
377
    /* Luma Horizontal Edge */
378
379
0
    if(0 == i4_is_last_ctb_y)
380
0
    {
381
382
        /* Left CTB's slice header */
383
0
        slice_header_t *ps_slice_hdr_left;
384
0
        {
385
0
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
386
0
            if(i4_is_last_ctb_x)
387
0
                cur_ctb_indx += 1;
388
0
            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
389
0
        }
390
0
        pu1_src = ps_deblk->pu1_cur_pic_luma + ((ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd) << log2_ctb_size);
391
0
        pu1_src += i4_is_last_ctb_x ? ctb_size : 0;
392
393
        /** Deblocking is done on a shifted CTB -
394
         *  Horizontal edge processing is done by shifting the CTB left by four pixels */
395
0
        pu1_src -= 4;
396
0
        for(row = 0; row < ctb_size / 8; row++)
397
0
        {
398
0
            WORD32 shift = 0;
399
400
            /* downshift vert_bs by ctb_size/2 for each column
401
             *  shift = (row & (MAX_CTB_SIZE / ctb_size - 1)) * ctb_size / 2;
402
             *  which will reduce to the following assuming ctb size is one of 16, 32 and 64
403
             *  and deblocking is done on 8x8 grid
404
             */
405
0
            if(6 != log2_ctb_size)
406
0
                shift = (row & 1) << (log2_ctb_size - 1);
407
408
            /* BS for the row - Last column is excluded and the left column is included*/
409
0
            u4_bs = (pu4_horz_bs[0] >> shift) << 2;
410
411
0
            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
412
0
            {
413
                /** Picking the last BS of the previous CTB corresponding to the same row
414
                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
415
                */
416
0
                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
417
0
                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
418
0
                u4_bs |= u4_left_bs & 3;
419
0
            }
420
421
0
            for(col = 0; col < ctb_size / 4;)
422
0
            {
423
0
                WORD8 i1_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
424
0
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
425
426
0
                bs_tz = CTZ(u4_bs) >> 1;
427
0
                if(0 != bs_tz)
428
0
                {
429
0
                    u4_bs = u4_bs >> (bs_tz << 1);
430
431
0
                    if((col + bs_tz) >= (ctb_size / 4))
432
0
                        pu1_src += 4 * (ctb_size / 4 - col);
433
0
                    else
434
0
                        pu1_src += 4 * bs_tz;
435
436
0
                    col += bs_tz;
437
0
                    continue;
438
0
                }
439
440
0
                if(0 == col)
441
0
                {
442
0
                    i1_beta_offset_div2 = ps_slice_hdr_left->i1_beta_offset_div2;
443
0
                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
444
445
0
                    if(0 == row)
446
0
                    {
447
0
                        qp_p = u4_qp_const_in_ctb[0] ?
448
0
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
449
0
                                        pu1_qp[-qp_strd - 1];
450
0
                    }
451
0
                    else
452
0
                    {
453
0
                        qp_p = u4_qp_const_in_ctb[2] ?
454
0
                                        pu1_qp[-ctb_size / 8] :
455
0
                                        pu1_qp[(row - 1) * qp_strd - 1];
456
0
                    }
457
458
0
                    qp_q = u4_qp_const_in_ctb[2] ?
459
0
                                    pu1_qp[-ctb_size / 8] :
460
0
                                    pu1_qp[row * qp_strd - 1];
461
0
                }
462
0
                else
463
0
                {
464
0
                    if(0 == row)
465
0
                    {
466
0
                        qp_p = u4_qp_const_in_ctb[1] ?
467
0
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
468
0
                                        pu1_qp[((col - 1) >> 1) - qp_strd];
469
0
                    }
470
0
                    else
471
0
                    {
472
0
                        qp_p = u4_qp_const_in_ctb[3] ?
473
0
                                        pu1_qp[0] :
474
0
                                        pu1_qp[((col - 1) >> 1) + (row - 1) * qp_strd];
475
0
                    }
476
477
0
                    qp_q = u4_qp_const_in_ctb[3] ?
478
0
                                    pu1_qp[0] :
479
0
                                    pu1_qp[((col - 1) >> 1) + row * qp_strd];
480
0
                }
481
482
0
                filter_p = (pu2_ctb_no_loop_filter_flag[row] >> ((col + 1) >> 1)) & 1;
483
0
                filter_q = (pu2_ctb_no_loop_filter_flag[row + 1] >> ((col + 1) >> 1)) & 1;
484
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
485
0
                filter_p = !filter_p;
486
0
                filter_q = !filter_q;
487
488
0
                if(filter_p || filter_q)
489
0
                {
490
0
                    DUMP_DEBLK_LUMA_HORZ(pu1_src, src_strd,
491
0
                                         u4_bs & 3, qp_p, qp_q,
492
0
                                         ps_slice_hdr->i1_beta_offset_div2,
493
0
                                         ps_slice_hdr->i1_tc_offset_div2,
494
0
                                         filter_p, filter_q);
495
0
                    ps_codec->s_func_selector.ihevc_deblk_luma_horz_fptr(pu1_src, src_strd,
496
0
                                                                         u4_bs & 3, qp_p, qp_q,
497
0
                                                                         i1_beta_offset_div2,
498
0
                                                                         i1_tc_offset_div2, filter_p, filter_q);
499
0
                }
500
501
0
                pu1_src += 4;
502
0
                u4_bs = u4_bs >> 2;
503
0
                col++;
504
0
            }
505
506
0
            if((64 == ctb_size) ||
507
0
                            ((32 == ctb_size) && (row & 1)))
508
0
            {
509
0
                pu4_horz_bs++;
510
0
            }
511
0
            pu1_src -= ctb_size;
512
0
            pu1_src += (src_strd << 3);
513
0
        }
514
0
        pu4_horz_bs = pu4_ctb_horz_bs;
515
0
    }
516
517
518
    /* Chroma Veritcal Edge */
519
520
0
    if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 == i4_is_last_ctb_x)
521
0
    {
522
523
        /* Top CTB's slice header */
524
0
        slice_header_t *ps_slice_hdr_top;
525
0
        {
526
0
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
527
0
            if(i4_is_last_ctb_y)
528
0
                cur_ctb_indx += ps_sps->i2_pic_wd_in_ctb;
529
0
            ps_slice_hdr_top = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - ps_sps->i2_pic_wd_in_ctb];
530
0
        }
531
532
0
        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x * chroma_pixel_strd / h_samp_factor + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd * chroma_pixel_strd / (h_samp_factor * v_samp_factor)) << log2_ctb_size);
533
0
        pu1_src += i4_is_last_ctb_y ? ((ps_codec->i4_strd * chroma_pixel_strd) / (h_samp_factor * v_samp_factor)) << log2_ctb_size : 0;
534
535
        /** Deblocking is done on a shifted CTB -
536
         *  Vertical edge processing is done by shifting the CTB up by four pixels */
537
0
        pu1_src -= 4 * chroma_strd;
538
539
0
        for(col = 0; col < ctb_size / (8 * h_samp_factor); col++)
540
0
        {
541
0
            WORD32 shift = 0;
542
543
0
            if(is_yuv444 && 6 != log2_ctb_size)
544
0
                shift = (col & 1) << (log2_ctb_size - 1);
545
546
            /* BS for the column - Last row is excluded and the top row is included*/
547
0
            u4_bs = (pu4_vert_bs[0] >> shift) << 2;
548
549
0
            if(ps_deblk->i4_ctb_y || i4_is_last_ctb_y)
550
0
            {
551
                /* Picking the last BS of the previous CTB corresponding to the same column */
552
0
                UWORD32 *pu4_vert_bs_top = (UWORD32 *)((UWORD8 *)pu4_vert_bs - bs_strd);
553
0
                UWORD32 u4_top_bs = (*pu4_vert_bs_top) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
554
0
                u4_bs |= u4_top_bs & 3;
555
0
            }
556
557
0
            if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) {
558
                /* Every alternate boundary strength value is used for chroma */
559
0
                u4_bs &= 0x22222222;
560
0
            } else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444 || ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) {
561
                /* Every boundary strength value is used for chroma */
562
0
                u4_bs &= 0xAAAAAAAA;
563
0
            }
564
565
0
            for(row = 0; row < ctb_size / (4 * v_samp_factor);)
566
0
            {
567
0
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
568
569
0
                bs_tz = CTZ(u4_bs) >> v_samp_factor;
570
0
                if(0 != bs_tz)
571
0
                {
572
0
                    if((row + bs_tz) >= (ctb_size / (4 * v_samp_factor)))
573
0
                        pu1_src += 4 * (ctb_size / (4 * v_samp_factor) - row) * chroma_strd;
574
0
                    else
575
0
                        pu1_src += 4 * bs_tz  * chroma_strd;
576
0
                    row += bs_tz;
577
0
                    u4_bs = u4_bs >> (bs_tz << v_samp_factor);
578
0
                    continue;
579
0
                }
580
581
0
                if(0 == row)
582
0
                {
583
0
                    i1_tc_offset_div2 = ps_slice_hdr_top->i1_tc_offset_div2;
584
585
0
                    if(0 == col)
586
0
                    {
587
0
                        qp_p = u4_qp_const_in_ctb[0] ?
588
0
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
589
0
                                        pu1_qp[-qp_strd - 1];
590
0
                    }
591
0
                    else
592
0
                    {
593
0
                        qp_p = u4_qp_const_in_ctb[1] ?
594
0
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
595
0
                                        pu1_qp[h_samp_factor * col - 1 - qp_strd];
596
0
                    }
597
598
0
                    qp_q = u4_qp_const_in_ctb[1] ?
599
0
                                    pu1_qp[-ctb_size / 8 * qp_strd] :
600
0
                                    pu1_qp[h_samp_factor * col - qp_strd];
601
0
                }
602
0
                else
603
0
                {
604
0
                    if(0 == col)
605
0
                    {
606
0
                        qp_p = u4_qp_const_in_ctb[2] ?
607
0
                                        pu1_qp[-ctb_size / 8] :
608
0
                                        pu1_qp[((row - 1) >> (2 - v_samp_factor)) * qp_strd - 1];
609
0
                    }
610
0
                    else
611
0
                    {
612
0
                        qp_p = u4_qp_const_in_ctb[3] ?
613
0
                                        pu1_qp[0] :
614
0
                                        pu1_qp[((row - 1) >> (2 - v_samp_factor)) * qp_strd + h_samp_factor * col - 1];
615
0
                    }
616
617
0
                    qp_q = u4_qp_const_in_ctb[3] ?
618
0
                                    pu1_qp[0] :
619
0
                                    pu1_qp[((row - 1) >> (2 - v_samp_factor)) * qp_strd + h_samp_factor * col];
620
0
                }
621
622
0
                filter_p = (pu2_ctb_no_loop_filter_flag[(row + (2 - v_samp_factor)) >> (2 - v_samp_factor)] >> (col << (h_samp_factor - 1))) & 1;
623
0
                filter_q = (pu2_ctb_no_loop_filter_flag[(row + (2 - v_samp_factor)) >> (2 - v_samp_factor)] >> (col << (h_samp_factor - 1))) & 2;
624
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
625
0
                filter_p = !filter_p;
626
0
                filter_q = !filter_q;
627
628
0
                if(filter_p || filter_q)
629
0
                {
630
0
                    if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) {
631
0
                        ASSERT(1 == ((u4_bs & 3) >> 1));
632
0
                    }
633
0
                    DUMP_DEBLK_CHROMA_VERT(pu1_src, chroma_strd,
634
0
                                           u4_bs & 3, qp_p, qp_q,
635
0
                                           ps_pps->i1_pic_cb_qp_offset,
636
0
                                           ps_pps->i1_pic_cr_qp_offset,
637
0
                                           ps_slice_hdr->i1_tc_offset_div2,
638
0
                                           filter_p, filter_q);
639
0
                    if(chroma_yuv420sp_vu)
640
0
                    {
641
0
                        pf_deblk_chroma_vert(pu1_src,
642
0
                                             chroma_strd,
643
0
                                             qp_q,
644
0
                                             qp_p,
645
0
                                             ps_pps->i1_pic_cr_qp_offset,
646
0
                                             ps_pps->i1_pic_cb_qp_offset,
647
0
                                             i1_tc_offset_div2,
648
0
                                             filter_q,
649
0
                                             filter_p,
650
0
                                             ps_sps->i1_chroma_format_idc);
651
0
                    }
652
0
                    else
653
0
                    {
654
0
                        pf_deblk_chroma_vert(pu1_src,
655
0
                                             chroma_strd,
656
0
                                             qp_p,
657
0
                                             qp_q,
658
0
                                             ps_pps->i1_pic_cb_qp_offset,
659
0
                                             ps_pps->i1_pic_cr_qp_offset,
660
0
                                             i1_tc_offset_div2,
661
0
                                             filter_p,
662
0
                                             filter_q,
663
0
                                             ps_sps->i1_chroma_format_idc);
664
0
                    }
665
0
                }
666
667
0
                pu1_src += 4 * chroma_strd;
668
0
                u4_bs = u4_bs >> (2 * v_samp_factor);
669
0
                row++;
670
0
            }
671
672
0
            pu1_src -= (((src_strd * chroma_pixel_strd) / (h_samp_factor * v_samp_factor)) << log2_ctb_size);
673
0
            if(is_yuv444)
674
0
            {
675
0
                if((64 ==ctb_size) || ((32 == ctb_size) && (col & 1))) pu4_vert_bs++;
676
0
            }
677
0
            else
678
0
            {
679
0
                pu4_vert_bs += (64 == ctb_size) ? 2 : 1;
680
0
            }
681
0
            pu1_src += 16;
682
0
        }
683
0
    }
684
685
    /* Chroma Horizontal Edge */
686
687
0
    if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && 0 == i4_is_last_ctb_y)
688
0
    {
689
690
        /* Left CTB's slice header */
691
0
        slice_header_t *ps_slice_hdr_left;
692
0
        {
693
0
            WORD32 cur_ctb_indx = ps_deblk->i4_ctb_x + ps_deblk->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
694
0
            if(i4_is_last_ctb_x)
695
0
                cur_ctb_indx += 1;
696
0
            ps_slice_hdr_left = ps_codec->ps_slice_hdr_base + ps_deblk->pu1_slice_idx[cur_ctb_indx - 1];
697
0
        }
698
699
0
        pu1_src = ps_deblk->pu1_cur_pic_chroma + ((ps_deblk->i4_ctb_x * chroma_pixel_strd / h_samp_factor + ps_deblk->i4_ctb_y * ps_deblk->ps_codec->i4_strd * chroma_pixel_strd / (v_samp_factor * h_samp_factor)) << log2_ctb_size);
700
0
        pu1_src += i4_is_last_ctb_x ? ctb_size * chroma_pixel_strd / h_samp_factor : 0;
701
702
        /** Deblocking is done on a shifted CTB -
703
         * Vertical edge processing is done by shifting the CTB up by four pixels (8 here beacuse UV are interleaved) */
704
0
        pu1_src -= 8;
705
0
        for(row = 0; row < ctb_size / (8 * v_samp_factor); row++)
706
0
        {
707
0
            WORD32 shift = 0;
708
709
0
            if((is_yuv444 || is_yuv422) && 6 != log2_ctb_size)
710
0
                shift = (row & 1) << (log2_ctb_size - 1);
711
712
            /* BS for the row - Last column is excluded and the left column is included*/
713
0
            u4_bs = (pu4_horz_bs[0] >> shift) << 2;
714
715
0
            if(ps_deblk->i4_ctb_x || i4_is_last_ctb_x)
716
0
            {
717
                /** Picking the last BS of the previous CTB corresponding to the same row
718
                * UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (ctb_size / 8) * (ctb_size / 4) / 8 * 2);
719
                */
720
0
                UWORD32 *pu4_horz_bs_left = (UWORD32 *)((UWORD8 *)pu4_horz_bs - (1 << (2 * log2_ctb_size - 7)));
721
0
                UWORD32 u4_left_bs = (*pu4_horz_bs_left) >> (shift + (1 << (log2_ctb_size - 1)) - 2);
722
0
                u4_bs |= u4_left_bs & 3;
723
0
            }
724
725
0
            if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420 || ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422) {
726
                /* Every alternate boundary strength value is used for chroma */
727
0
                u4_bs &= 0x22222222;
728
0
            } else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444) {
729
                /* Every boundary strength value is used for chroma */
730
0
                u4_bs &= 0xAAAAAAAA;
731
0
            }
732
733
0
            for(col = 0; col < ctb_size / (4 * h_samp_factor);)
734
0
            {
735
0
                WORD8 i1_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
736
737
0
                bs_tz = CTZ(u4_bs) >> h_samp_factor;
738
0
                if(0 != bs_tz)
739
0
                {
740
0
                    u4_bs = u4_bs >> (bs_tz << h_samp_factor);
741
742
0
                    if((col + bs_tz) >= (ctb_size / (4 * h_samp_factor)))
743
0
                        pu1_src += 8 * (ctb_size / (4 * h_samp_factor) - col);
744
0
                    else
745
0
                        pu1_src += 8 * bs_tz;
746
747
0
                    col += bs_tz;
748
0
                    continue;
749
0
                }
750
751
0
                if(0 == col)
752
0
                {
753
0
                    i1_tc_offset_div2 = ps_slice_hdr_left->i1_tc_offset_div2;
754
755
0
                    if(0 == row)
756
0
                    {
757
0
                        qp_p = u4_qp_const_in_ctb[0] ?
758
0
                                        pu1_qp[-ctb_size / 8 * qp_strd - ctb_size / 8] :
759
0
                                        pu1_qp[-qp_strd - 1];
760
0
                    }
761
0
                    else
762
0
                    {
763
0
                        qp_p = u4_qp_const_in_ctb[2] ?
764
0
                                        pu1_qp[-ctb_size / 8] :
765
0
                                        pu1_qp[(v_samp_factor * row - 1) * qp_strd - 1];
766
0
                    }
767
768
0
                    qp_q = u4_qp_const_in_ctb[2] ?
769
0
                                    pu1_qp[-ctb_size / 8] :
770
0
                                    pu1_qp[(v_samp_factor * row) * qp_strd - 1];
771
0
                }
772
0
                else
773
0
                {
774
0
                    if(0 == row)
775
0
                    {
776
0
                        qp_p = u4_qp_const_in_ctb[1] ?
777
0
                                        pu1_qp[-ctb_size / 8 * qp_strd] :
778
0
                                        pu1_qp[((col - 1) >> (2 - h_samp_factor)) - qp_strd];
779
0
                    }
780
0
                    else
781
0
                    {
782
0
                        qp_p = u4_qp_const_in_ctb[3] ?
783
0
                                        pu1_qp[0] :
784
0
                                        pu1_qp[((col - 1) >> (2 - h_samp_factor)) +  (v_samp_factor * row - 1) * qp_strd];
785
0
                    }
786
787
0
                    qp_q = u4_qp_const_in_ctb[3] ?
788
0
                                    pu1_qp[0] :
789
0
                                    pu1_qp[((col - 1) >> (2 - h_samp_factor)) + v_samp_factor * row * qp_strd];
790
0
                }
791
0
                filter_p = (pu2_ctb_no_loop_filter_flag[row * v_samp_factor] >> ((col + 2 - h_samp_factor) >> (2 - h_samp_factor))) & 1;
792
0
                filter_q = (pu2_ctb_no_loop_filter_flag[(row * v_samp_factor) + 1] >> ((col + 2 - h_samp_factor) >> (2 - h_samp_factor))) & 1;
793
                /* filter_p and filter_q are inverted as they are calculated using no_loop_filter_flags */
794
0
                filter_p = !filter_p;
795
0
                filter_q = !filter_q;
796
797
0
                if(filter_p || filter_q)
798
0
                {
799
0
                    if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420) {
800
0
                        ASSERT(1 == ((u4_bs & 3) >> 1));
801
0
                    }
802
0
                    DUMP_DEBLK_CHROMA_HORZ(pu1_src, chroma_strd,
803
0
                                           u4_bs & 3, qp_p, qp_q,
804
0
                                           ps_pps->i1_pic_cb_qp_offset,
805
0
                                           ps_pps->i1_pic_cr_qp_offset,
806
0
                                           ps_slice_hdr->i1_tc_offset_div2,
807
0
                                           filter_p, filter_q);
808
0
                    if(chroma_yuv420sp_vu)
809
0
                    {
810
0
                        pf_deblk_chroma_horz(pu1_src,
811
0
                                             chroma_strd,
812
0
                                             qp_q,
813
0
                                             qp_p,
814
0
                                             ps_pps->i1_pic_cr_qp_offset,
815
0
                                             ps_pps->i1_pic_cb_qp_offset,
816
0
                                             i1_tc_offset_div2,
817
0
                                             filter_q,
818
0
                                             filter_p,
819
0
                                             ps_sps->i1_chroma_format_idc);
820
0
                    }
821
0
                    else
822
0
                    {
823
0
                        pf_deblk_chroma_horz(pu1_src,
824
0
                                             chroma_strd,
825
0
                                             qp_p,
826
0
                                             qp_q,
827
0
                                             ps_pps->i1_pic_cb_qp_offset,
828
0
                                             ps_pps->i1_pic_cr_qp_offset,
829
0
                                             i1_tc_offset_div2,
830
0
                                             filter_p,
831
0
                                             filter_q,
832
0
                                             ps_sps->i1_chroma_format_idc);
833
0
                    }
834
0
                }
835
836
0
                pu1_src += 8;
837
0
                u4_bs = u4_bs >> (2 * h_samp_factor);
838
0
                col++;
839
0
            }
840
0
            if(is_yuv444 || is_yuv422)
841
0
            {
842
0
                if((64 == ctb_size) || ((32 == ctb_size) && (row & 1))) pu4_horz_bs++;
843
0
            }
844
0
            else
845
0
            {
846
0
                pu4_horz_bs += (64 == ctb_size) ? 2 : 1;
847
0
            }
848
0
            pu1_src -= ctb_size * (chroma_pixel_strd / h_samp_factor);
849
0
            pu1_src += 8 * chroma_strd;
850
851
0
        }
852
0
    }
853
0
}