Coverage Report

Created: 2026-03-20 07:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_deblk.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ihevce_deblk.c
24
*
25
* @brief
26
*  Contains definition for the ctb level deblk function
27
*
28
* @author
29
*  ittiam
30
*
31
* @List of Functions:
32
*  ihevce_deblk_populate_qp_map()
33
*  ihevce_deblk_ctb()
34
*  ihevce_hbd_deblk_ctb()
35
*
36
* @remarks
37
*  None
38
*
39
*******************************************************************************
40
*/
41
42
/*****************************************************************************/
43
/* File Includes                                                             */
44
/*****************************************************************************/
45
/* System include files */
46
#include <stdio.h>
47
#include <string.h>
48
#include <stdlib.h>
49
#include <assert.h>
50
#include <stdarg.h>
51
#include <math.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_debug.h"
64
#include "ihevc_structs.h"
65
#include "ihevc_platform_macros.h"
66
#include "ihevc_deblk.h"
67
#include "ihevc_deblk_tables.h"
68
#include "ihevc_common_tables.h"
69
#include "ihevc_itrans_recon.h"
70
#include "ihevc_chroma_itrans_recon.h"
71
#include "ihevc_chroma_intra_pred.h"
72
#include "ihevc_intra_pred.h"
73
#include "ihevc_inter_pred.h"
74
#include "ihevc_mem_fns.h"
75
#include "ihevc_padding.h"
76
#include "ihevc_weighted_pred.h"
77
#include "ihevc_sao.h"
78
#include "ihevc_resi_trans.h"
79
#include "ihevc_quant_iquant_ssd.h"
80
#include "ihevc_cabac_tables.h"
81
82
#include "ihevce_defs.h"
83
#include "ihevce_hle_interface.h"
84
#include "ihevce_lap_enc_structs.h"
85
#include "ihevce_multi_thrd_structs.h"
86
#include "ihevce_me_common_defs.h"
87
#include "ihevce_had_satd.h"
88
#include "ihevce_error_codes.h"
89
#include "ihevce_bitstream.h"
90
#include "ihevce_cabac.h"
91
#include "ihevce_rdoq_macros.h"
92
#include "ihevce_function_selector.h"
93
#include "ihevce_enc_structs.h"
94
#include "ihevce_entropy_structs.h"
95
#include "ihevce_cmn_utils_instr_set_router.h"
96
#include "ihevce_enc_loop_structs.h"
97
#include "ihevce_common_utils.h"
98
#include "ihevce_global_tables.h"
99
#include "ihevce_deblk.h"
100
#include "ihevce_tile_interface.h"
101
102
/*****************************************************************************/
103
/* Function Definitions                                                      */
104
/*****************************************************************************/
105
106
/*!
107
******************************************************************************
108
* \if Function name : ihevce_deblk_populate_qp_map \endif
109
*
110
* \brief
111
*
112
*
113
*****************************************************************************
114
*/
115
void ihevce_deblk_populate_qp_map(
116
    ihevce_enc_loop_ctxt_t *ps_ctxt,
117
    deblk_ctbrow_prms_t *ps_deblk_ctb_row_params,
118
    ctb_enc_loop_out_t *ps_ctb_out_dblk,
119
    WORD32 vert_ctr,
120
    frm_ctb_ctxt_t *ps_frm_ctb_prms,
121
    ihevce_tile_params_t *ps_col_tile_params)
122
3.37k
{
123
3.37k
    ctb_enc_loop_out_t *ps_ctb_out;
124
3.37k
    WORD32 ctb_ctr, ctb_start, ctb_end;
125
3.37k
    WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp;
126
    /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/
127
    /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/
128
    /*Qp of the last CU of previous CTB row*/
129
3.37k
    WORD8 i1_last_cu_qp;
130
    /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/
131
3.37k
    WORD8 *pi1_qp_top_4x4_ctb_row =
132
3.37k
        ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] +
133
3.37k
        (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num);
134
135
3.37k
    UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd;
136
137
    /*The Qp map which has to be populated*/
138
3.37k
    UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
139
3.37k
    WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
140
141
    /*Temporary pointers to Qp map at CTB level*/
142
3.37k
    WORD8 *pi1_ctb_qp_map_tile;
143
144
3.37k
    i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx];
145
    /* total QPs to be copied for current row is : */
146
3.37k
    tile_qp_size = i4_offset_for_last_cu_qp + 1;
147
    /*Pointing to the first CTB of current CTB row*/
148
3.37k
    ps_ctb_out = ps_ctb_out_dblk;
149
    /* Offset req. for the row QP to the tile start */
150
3.37k
    tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4);
151
152
3.37k
    ctb_start = ps_col_tile_params->i4_first_ctb_x;
153
3.37k
    ctb_end =
154
3.37k
        (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit);
155
156
3.37k
    if(vert_ctr) /*Not first CTB row of frame*/
157
1.65k
    {
158
        /*copy from top4x4_array data stored by upper CTB-row to qp-map*/
159
1.65k
        memcpy(
160
1.65k
            pi1_ctb_tile_qp,
161
1.65k
            (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset),
162
1.65k
            tile_qp_size);
163
1.65k
    }
164
165
    /*pu1_ctb_row_qp points to top4x4 row in Qp-map.
166
    Now pointing pu1_ctb_qp_map to cur 4x4 row*/
167
3.37k
    pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride;
168
169
    /* This i1_last_cu_qp will be conditionally overwritten later */
170
3.37k
    i1_last_cu_qp = ps_ctxt->i4_frame_qp;
171
172
    /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */
173
14.0k
    for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
174
10.7k
    {
175
10.7k
        WORD32 cu_ctr;
176
10.7k
        cu_enc_loop_out_t *ps_curr_cu;
177
178
        /* Update i1_last_cu_qp based on CTB's position in tile */
179
10.7k
        update_last_coded_cu_qp(
180
10.7k
            (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp),
181
10.7k
            ps_ctxt->i1_entropy_coding_sync_enabled_flag,
182
10.7k
            ps_frm_ctb_prms,
183
10.7k
            ps_ctxt->i4_frame_qp,
184
10.7k
            vert_ctr,
185
10.7k
            ctb_ctr,
186
10.7k
            &i1_last_cu_qp);
187
188
        /* store the pointer of first cu of current ctb */
189
10.7k
        ps_curr_cu = ps_ctb_out->ps_enc_cu;
190
191
        /* --------- loop over all the CUs in the CTB --------------- */
192
219k
        for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++)
193
208k
        {
194
208k
            UWORD8 u1_vert_4x4, u1_horz_4x4;  //for_loop counters
195
208k
            WORD8 *pi1_cu_qp_map;
196
197
208k
            WORD8 i1_qp, i1_qp_left, i1_qp_top;
198
199
208k
            pi1_cu_qp_map = pi1_ctb_qp_map_tile +
200
208k
                            (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride +
201
208k
                            (ps_curr_cu->b3_cu_pos_x * 2);
202
203
            /*If the current CU is coded in skip_mode/zero_CBF then
204
            for deblocking, Qp of the previously coded CU will be used*/
205
208k
            if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag)
206
50.2k
            {
207
50.2k
                if(0 == ps_curr_cu->b3_cu_pos_x)
208
12.4k
                    i1_qp_left = i1_last_cu_qp;
209
37.7k
                else
210
37.7k
                    i1_qp_left = *(pi1_cu_qp_map - 1);
211
212
50.2k
                if(0 == ps_curr_cu->b3_cu_pos_y)
213
9.99k
                    i1_qp_top = i1_last_cu_qp;
214
40.2k
                else
215
40.2k
                    i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride);
216
217
50.2k
                i1_qp = (i1_qp_left + i1_qp_top + 1) / 2;
218
219
50.2k
                if(0 == ps_curr_cu->b1_first_cu_in_qg)
220
0
                {
221
0
                    i1_qp = i1_last_cu_qp;
222
0
                }
223
50.2k
            }
224
158k
            else
225
158k
            {
226
158k
                i1_qp = ps_curr_cu->i1_cu_qp;
227
158k
            }
228
229
208k
            i1_last_cu_qp = i1_qp;
230
231
            /*---- Loop for populating Qp map for the current CU -------*/
232
785k
            for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++)
233
576k
            {
234
2.88M
                for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++)
235
2.30M
                {
236
2.30M
                    pi1_cu_qp_map[u1_horz_4x4] = i1_qp;
237
2.30M
                }
238
576k
                pi1_cu_qp_map += u4_qp_buffer_stride;
239
576k
            }
240
            /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/
241
208k
            ps_curr_cu++;
242
208k
        }
243
10.7k
        pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4);  //one qp per 4x4 block.
244
10.7k
        ps_ctb_out++;
245
246
10.7k
    }  //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++)
247
248
    /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/
249
3.37k
    memcpy(
250
3.37k
        (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset),
251
3.37k
        (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride),
252
3.37k
        tile_qp_size);
253
3.37k
}
254
255
/**
256
*******************************************************************************
257
*
258
* @brief
259
*   Deblock CTB level function.
260
*
261
* @par Description:
262
*   For a given CTB, deblocking on both vertical and
263
*   horizontal edges is done. Both the luma and chroma
264
*   blocks are processed
265
*
266
* @param[in]
267
*   ps_deblk:   Pointer to the deblock context
268
*   last_col:   if the CTB is the last CTB of current CTB-row value is 1 else 0
269
*   ps_deblk_ctb_row_params: deblk ctb row params
270
*
271
* @returns
272
*
273
* @remarks
274
*  None
275
*
276
*******************************************************************************
277
*/
278
void ihevce_deblk_ctb(
279
    deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params)
280
10.7k
{
281
10.7k
    WORD32 ctb_size;
282
10.7k
    UWORD32 u4_bs;
283
10.7k
    WORD32 bs_lz; /*Leading zeros in boundary strength*/
284
10.7k
    WORD32 qp_p, qp_q;
285
10.7k
    UWORD8 *pu1_src;
286
10.7k
    UWORD8 *pu1_src_uv;
287
10.7k
    UWORD8 *pu1_curr_src;
288
10.7k
    WORD32 col_size;
289
10.7k
    WORD32 col, row, i4_edge_count;
290
10.7k
    WORD32 num_columns_for_vert_filt;
291
10.7k
    WORD32 num_blks_for_vert_filt;
292
10.7k
    WORD32 num_rows_for_horz_filt;
293
294
10.7k
    ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz;
295
10.7k
    ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert;
296
297
    /* Filter flags are packed along with the qp info.
298
    6 out of the 8 bits correspond to qp and 1 to filter flag. */
299
    /* filter_p and filter_q are initialized to 1.
300
    They are to be extracted along with the qp info. */
301
10.7k
    WORD32 filter_p, filter_q;
302
10.7k
    WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp;
303
10.7k
    WORD8 *pi1_ctb_row_qp_q;
304
305
10.7k
    func_selector_t *ps_func_slector = ps_deblk->ps_func_selector;
306
307
10.7k
    WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
308
10.7k
    WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
309
10.7k
    WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
310
10.7k
    WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
311
10.7k
    UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert;
312
10.7k
    UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz;
313
10.7k
    UWORD32 *bs_vert_uv = bs_vert;
314
10.7k
    UWORD32 *bs_horz_uv = bs_horz;
315
10.7k
    UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
316
10.7k
    UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2);
317
318
10.7k
    if(u1_is_422)
319
0
    {
320
0
        pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr;
321
0
        pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr;
322
0
    }
323
10.7k
    else
324
10.7k
    {
325
10.7k
        pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr;
326
10.7k
        pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr;
327
10.7k
    }
328
329
10.7k
    ctb_size = ps_deblk->i4_ctb_size;
330
331
    /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */
332
    /* Can be removed during optimization */
333
10.7k
    filter_q = 1;
334
10.7k
    filter_p = 1;
335
336
    //////////////////////////////////////////////////////////////////////////////
337
    /* Luma Veritcal Edge */
338
10.7k
    pu1_src = ps_deblk->pu1_ctb_y;
339
10.7k
    pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
340
10.7k
    num_columns_for_vert_filt = ctb_size / 8;
341
10.7k
    num_blks_for_vert_filt = ctb_size / 4;
342
343
96.5k
    for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
344
85.7k
    {
345
85.7k
        u4_bs = *bs_vert;
346
        /* get the current 4x4 vertical pointer */
347
85.7k
        pu1_curr_src = pu1_src;
348
85.7k
        pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1);
349
350
        /* If the current edge is not the 1st edge of frame or slice */
351
85.7k
        if(1 == left_luma_edge_filter_flag)
352
82.4k
        {
353
693k
            for(row = 0; row < num_blks_for_vert_filt;)
354
611k
            {
355
611k
                bs_lz = CLZ(u4_bs) >> 1;
356
                /* If BS = 0, skip the egde filtering */
357
611k
                if(0 != bs_lz)
358
102k
                {
359
102k
                    u4_bs = u4_bs << (bs_lz << 1);
360
102k
                    pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride);
361
102k
                    pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride);
362
102k
                    row += bs_lz;
363
102k
                    continue;
364
102k
                }
365
509k
                qp_p = *(pi1_ctb_row_qp_q - 1);
366
509k
                qp_q = *pi1_ctb_row_qp_q;
367
368
509k
                ps_func_slector->ihevc_deblk_luma_vert_fptr(
369
509k
                    pu1_curr_src,
370
509k
                    ps_deblk->i4_luma_pic_stride,
371
509k
                    (u4_bs >> 30), /* bits 31 and 30 are extracted */
372
509k
                    qp_p,
373
509k
                    qp_q,
374
509k
                    ps_deblk->i4_beta_offset_div2,
375
509k
                    ps_deblk->i4_tc_offset_div2,
376
509k
                    filter_p,
377
509k
                    filter_q);
378
379
509k
                u4_bs = u4_bs << 2;
380
509k
                pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2);
381
509k
                pi1_ctb_row_qp_q += u4_qp_buffer_stride;
382
509k
                row++;
383
509k
            }
384
82.4k
        }
385
386
        /* Increment the boundary strength and src pointer for the next column */
387
85.7k
        bs_vert += 1;
388
85.7k
        pu1_src += 8;
389
390
        /* Enable for the next edges of ctb*/
391
85.7k
        left_luma_edge_filter_flag = 1;
392
85.7k
    }
393
394
    //////////////////////////////////////////////////////////////////////////////
395
    /* Chroma Veritcal Edge */
396
10.7k
    pu1_src_uv = ps_deblk->pu1_ctb_uv;
397
10.7k
    pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
398
399
    /* Column spacing is 4 for each chroma component */
400
    /* and hence 8 when they are interleaved. */
401
    /* But, only those columns with a x co-ordinate */
402
    /* that is divisiblee by 8 are filtered */
403
    /* Hence, denominator is 16 */
404
10.7k
    num_columns_for_vert_filt = ctb_size / 16;
405
    /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */
406
10.7k
    num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4;
407
408
53.6k
    for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
409
42.8k
    {
410
        /* Every alternate boundary strength value is used for 420 chroma */
411
42.8k
        u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa);
412
42.8k
        pu1_curr_src = pu1_src_uv;
413
42.8k
        pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2);
414
415
        /* If the current edge is not the 1st edge of frame or slice */
416
42.8k
        if(1 == left_chroma_edge_filter_flag)
417
39.5k
        {
418
            /* Each 'bs' is 2 bits long */
419
            /* The divby4 in 420 is */
420
            /* necessitated by the fact that */
421
            /* chroma ctb_ht is half that of luma */
422
39.5k
            WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1);
423
            /* i4_sub_heightC = 2 for 420 */
424
            /* i4_sub_heightC = 1 for 422 */
425
39.5k
            WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs;
426
427
231k
            for(row = 0; row < num_blks_for_vert_filt;)
428
191k
            {
429
191k
                bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs;
430
431
                /* If BS = 0, skip the egde filtering */
432
191k
                if(0 != bs_lz)
433
41.1k
                {
434
41.1k
                    row += bs_lz;
435
41.1k
                    u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs);
436
                    /* '<<2' because of blk_size being 4x4 */
437
41.1k
                    pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride);
438
439
                    /* In 420, every alternate QP row is skipped, because chroma height */
440
                    /* In 422, no row is skipped */
441
41.1k
                    pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz);
442
443
41.1k
                    continue;
444
41.1k
                }
445
446
150k
                qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC);
447
150k
                qp_q = *pi1_ctb_row_qp_q;
448
449
150k
                pf_deblk_chroma_vert(
450
150k
                    pu1_curr_src,
451
150k
                    ps_deblk->i4_chroma_pic_stride,
452
150k
                    qp_p,
453
150k
                    qp_q,
454
150k
                    ps_deblk->i4_cb_qp_indx_offset,
455
150k
                    ps_deblk->i4_cr_qp_indx_offset,
456
150k
                    ps_deblk->i4_tc_offset_div2,
457
150k
                    filter_p,
458
150k
                    filter_q);
459
460
150k
                u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs);
461
150k
                pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2);
462
150k
                pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1));
463
150k
                row++;
464
150k
            }
465
39.5k
        }
466
        /* Increment the boundary strength by 2 and src pointer for the next column */
467
        /* As the edge filtering happens for alternate column */
468
42.8k
        bs_vert_uv += 2;
469
42.8k
        pu1_src_uv += 16;
470
42.8k
        left_chroma_edge_filter_flag = 1;
471
42.8k
    }
472
473
    //////////////////////////////////////////////////////////////////////////////
474
475
    /* Luma Horizontal Edge */
476
10.7k
    pu1_src = ps_deblk->pu1_ctb_y;
477
10.7k
    col_size = ctb_size / 4;
478
479
    /* If the ctb is the 1st ctb of row,                     */
480
    /* Decrement the loop count to exclude filtering of last 4 pixels */
481
    /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */
482
10.7k
    if(ps_deblk->i4_deblock_left_ctb_edge == 1)
483
7.35k
    {
484
7.35k
        pu1_src -= 4;
485
        /*If the ctb is at the horizonatl end of PIC*/
486
        /* Increase the column size to filter last 4 pixels */
487
7.35k
        col_size += last_col;
488
7.35k
    }
489
3.37k
    else if(!last_col)
490
1.96k
    {
491
1.96k
        col_size -= 1;
492
1.96k
    }
493
10.7k
    {
494
10.7k
        UWORD8 *pu1_src_temp = pu1_src;
495
        //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
496
10.7k
        pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
497
498
10.7k
        num_rows_for_horz_filt = ctb_size / 8;
499
500
96.5k
        for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
501
85.7k
        {
502
85.7k
            WORD32 col_size_temp = col_size;
503
85.7k
            pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
504
85.7k
            pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride);
505
506
85.7k
            if(1 == top_luma_edge_filter_flag)
507
81.0k
            {
508
                //Deblock the last vertical_4x4_column of previous CTB
509
81.0k
                if(ps_deblk->i4_deblock_left_ctb_edge == 1)
510
55.8k
                {
511
55.8k
                    u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3;
512
55.8k
                    if(u4_bs != 0)
513
25.3k
                    {
514
25.3k
                        qp_p = *(pi1_ctb_row_qp_p - 1);
515
25.3k
                        qp_q = *(pi1_ctb_row_qp_q - 1);
516
517
25.3k
                        ps_func_slector->ihevc_deblk_luma_horz_fptr(
518
25.3k
                            pu1_src,
519
25.3k
                            ps_deblk->i4_luma_pic_stride,
520
25.3k
                            u4_bs,
521
25.3k
                            qp_p,
522
25.3k
                            qp_q,
523
25.3k
                            ps_deblk->i4_beta_offset_div2,
524
25.3k
                            ps_deblk->i4_tc_offset_div2,
525
25.3k
                            1,
526
25.3k
                            1);
527
25.3k
                    }
528
529
55.8k
                    pu1_src += 4;
530
55.8k
                    col_size_temp--;
531
55.8k
                }
532
                //Start deblocking current CTB
533
81.0k
                u4_bs = *(bs_horz);
534
535
654k
                for(col = 0; col < col_size_temp;)
536
573k
                {
537
573k
                    bs_lz = CLZ(u4_bs) >> 1;
538
573k
                    if(0 != bs_lz)
539
68.6k
                    {
540
68.6k
                        u4_bs = u4_bs << (bs_lz << 1);
541
68.6k
                        pu1_src += 4 * bs_lz;
542
68.6k
                        col += bs_lz;
543
68.6k
                        continue;
544
68.6k
                    }
545
504k
                    qp_p = *(pi1_ctb_row_qp_p + col);
546
504k
                    qp_q = *(pi1_ctb_row_qp_q + col);
547
548
504k
                    ps_func_slector->ihevc_deblk_luma_horz_fptr(
549
504k
                        pu1_src,
550
504k
                        ps_deblk->i4_luma_pic_stride,
551
504k
                        u4_bs >> (sizeof(u4_bs) * 8 - 2),
552
504k
                        qp_p,
553
504k
                        qp_q,
554
504k
                        ps_deblk->i4_beta_offset_div2,
555
504k
                        ps_deblk->i4_tc_offset_div2,
556
504k
                        filter_p,
557
504k
                        filter_q);
558
559
504k
                    pu1_src += 4;
560
504k
                    u4_bs = u4_bs << 2;
561
504k
                    col++;
562
504k
                }
563
                //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
564
81.0k
                u4_bs = *bs_horz;
565
81.0k
                ps_deblk->au1_prev_bs[i4_edge_count] =
566
81.0k
                    (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30);
567
81.0k
            }
568
85.7k
            bs_horz += 1;
569
85.7k
            pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1);
570
85.7k
            top_luma_edge_filter_flag = 1;
571
85.7k
        }
572
10.7k
    }
573
574
    //////////////////////////////////////////////////////////////////////////////
575
    /* Chroma Horizontal Edge */
576
10.7k
    pu1_src_uv = ps_deblk->pu1_ctb_uv;
577
10.7k
    col_size = ctb_size / 8;
578
579
    /* If the ctb is the 1st ctb of row,                     */
580
    /* Decrement the loop count to exclude filtering of last 4 pixels */
581
    /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */
582
10.7k
    if(ps_deblk->i4_deblock_left_ctb_edge == 1)
583
7.35k
    {
584
7.35k
        pu1_src_uv -= 8;
585
586
        /*If the ctb is at the horizonatl end of PIC*/
587
        /* Increase the column size to filter last 8 (uv) pixels */
588
7.35k
        col_size += last_col;
589
7.35k
    }
590
3.37k
    else if(!last_col)
591
1.96k
    {
592
1.96k
        col_size--;
593
1.96k
    }
594
595
10.7k
    {
596
10.7k
        UWORD8 *pu1_src_temp = pu1_src_uv;
597
598
        //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
599
10.7k
        pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
600
10.7k
        num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8);
601
602
53.6k
        for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
603
42.8k
        {
604
42.8k
            WORD32 col_size_temp = col_size;
605
606
42.8k
            pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
607
42.8k
            pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride);
608
609
42.8k
            if(1 == top_chroma_edge_filter_flag)
610
38.1k
            {
611
                //Deblock the last vertical _4x4_column of previous CTB
612
38.1k
                if(ps_deblk->i4_deblock_left_ctb_edge == 1)
613
26.4k
                {
614
26.4k
                    u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2;
615
616
26.4k
                    if(u4_bs == 2)
617
14.6k
                    {
618
14.6k
                        qp_p = *(pi1_ctb_row_qp_p - 1);
619
14.6k
                        qp_q = *(pi1_ctb_row_qp_q - 1);
620
621
14.6k
                        pf_deblk_chroma_horz(
622
14.6k
                            pu1_src_uv,
623
14.6k
                            ps_deblk->i4_chroma_pic_stride,
624
14.6k
                            qp_p,
625
14.6k
                            qp_q,
626
14.6k
                            ps_deblk->i4_cb_qp_indx_offset,
627
14.6k
                            ps_deblk->i4_cr_qp_indx_offset,
628
14.6k
                            ps_deblk->i4_tc_offset_div2,
629
14.6k
                            1,
630
14.6k
                            1);
631
14.6k
                    }
632
633
26.4k
                    pu1_src_uv += 8;
634
26.4k
                    col_size_temp--;
635
26.4k
                }
636
637
                //Start deblocking current CTB
638
38.1k
                u4_bs = *(bs_horz_uv)&0x88888888;
639
640
200k
                for(col = 0; col < col_size_temp;)
641
162k
                {
642
162k
                    bs_lz = CLZ(u4_bs) >> 2;
643
644
162k
                    if(0 != bs_lz)
645
28.5k
                    {
646
28.5k
                        u4_bs = u4_bs << (bs_lz << 2);
647
28.5k
                        pu1_src_uv += (8 * bs_lz);
648
649
28.5k
                        col += bs_lz;
650
28.5k
                        continue;
651
28.5k
                    }
652
653
134k
                    qp_p = *(pi1_ctb_row_qp_p + (col << 1));
654
134k
                    qp_q = *(pi1_ctb_row_qp_q + (col << 1));
655
656
134k
                    pf_deblk_chroma_horz(
657
134k
                        pu1_src_uv,
658
134k
                        ps_deblk->i4_chroma_pic_stride,
659
134k
                        qp_p,
660
134k
                        qp_q,
661
134k
                        ps_deblk->i4_cb_qp_indx_offset,
662
134k
                        ps_deblk->i4_cr_qp_indx_offset,
663
134k
                        ps_deblk->i4_tc_offset_div2,
664
134k
                        filter_p,
665
134k
                        filter_q);
666
667
134k
                    pu1_src_uv += 8;
668
134k
                    u4_bs = u4_bs << 4;
669
134k
                    col++;
670
134k
                }
671
672
                //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
673
38.1k
                u4_bs = *bs_horz_uv;
674
38.1k
                ps_deblk->au1_prev_bs_uv[i4_edge_count] =
675
38.1k
                    (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30);
676
38.1k
            }
677
678
42.8k
            bs_horz_uv += ((0 == u1_is_422) + 1);
679
42.8k
            pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1));
680
42.8k
            top_chroma_edge_filter_flag = 1;
681
42.8k
        }
682
10.7k
    }
683
684
10.7k
    return;
685
10.7k
}