Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/cdef_process.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 2 Clause License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
#include <stdlib.h>
14
#include "aom_dsp_rtcd.h"
15
#include "definitions.h"
16
#include "enc_handle.h"
17
#include "cdef_process.h"
18
#include "enc_dec_results.h"
19
#include "svt_threads.h"
20
#include "reference_object.h"
21
#include "enc_cdef.h"
22
#include "enc_dec_process.h"
23
#include "pic_buffer_desc.h"
24
#include "sequence_control_set.h"
25
#include "utility.h"
26
#include "pcs.h"
27
#include "resize.h"
28
#include "super_res.h"
29
30
0
static void set_unscaled_input_16bit(PictureControlSet* pcs) {
31
0
    EbPictureBufferDesc* input_pic  = pcs->ppcs->enhanced_unscaled_pic;
32
0
    EbPictureBufferDesc* output_pic = pcs->input_frame16bit;
33
0
    uint16_t             ss_x       = pcs->ppcs->scs->subsampling_x;
34
0
    uint16_t             ss_y       = pcs->ppcs->scs->subsampling_y;
35
0
    svt_aom_copy_buffer_info(input_pic, pcs->input_frame16bit);
36
0
    if (input_pic->bit_depth == EB_EIGHT_BIT) {
37
0
        svt_aom_convert_pic_8bit_to_16bit(input_pic, output_pic, ss_x, ss_y);
38
0
    } else {
39
0
        uint16_t* planes[3] = {
40
0
            (uint16_t*)output_pic->y_buffer, (uint16_t*)output_pic->u_buffer, (uint16_t*)output_pic->v_buffer};
41
0
        svt_aom_pack_2d_pic(input_pic, planes);
42
0
    }
43
0
}
44
45
static EbErrorType copy_recon_enc(SequenceControlSet* scs, EbPictureBufferDesc* recon_picture_src,
46
0
                                  EbPictureBufferDesc* recon_picture_dst, int num_planes, int skip_copy) {
47
0
    recon_picture_dst->border       = recon_picture_src->border;
48
0
    recon_picture_dst->width        = recon_picture_src->width;
49
0
    recon_picture_dst->height       = recon_picture_src->height;
50
0
    recon_picture_dst->max_width    = recon_picture_src->max_width;
51
0
    recon_picture_dst->max_height   = recon_picture_src->max_height;
52
0
    recon_picture_dst->bit_depth    = recon_picture_src->bit_depth;
53
0
    recon_picture_dst->color_format = recon_picture_src->color_format;
54
55
0
    recon_picture_dst->y_stride = recon_picture_src->y_stride;
56
0
    recon_picture_dst->u_stride = recon_picture_src->u_stride;
57
0
    recon_picture_dst->v_stride = recon_picture_src->v_stride;
58
59
0
    recon_picture_dst->luma_size   = recon_picture_src->luma_size;
60
0
    recon_picture_dst->chroma_size = recon_picture_src->chroma_size;
61
0
    recon_picture_dst->packed_flag = recon_picture_src->packed_flag;
62
63
0
    recon_picture_dst->y_stride_bit_inc = recon_picture_src->y_stride_bit_inc;
64
0
    recon_picture_dst->u_stride_bit_inc = recon_picture_src->u_stride_bit_inc;
65
0
    recon_picture_dst->v_stride_bit_inc = recon_picture_src->v_stride_bit_inc;
66
67
0
    recon_picture_dst->buffer_enable_mask = scs->seq_header.color_config.mono_chrome ? PICTURE_BUFFER_DESC_LUMA_MASK
68
0
                                                                                     : PICTURE_BUFFER_DESC_FULL_MASK;
69
70
0
    int      ss_x            = scs->subsampling_x;
71
0
    int      ss_y            = scs->subsampling_y;
72
0
    uint32_t bytes_per_pixel = scs->is_16bit_pipeline ? 2 : 1;
73
74
    // Get frame size to alloc
75
0
    uint32_t alloc_sz       = 0;
76
0
    uint32_t buffer_size[3] = {0};
77
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Y_FLAG) {
78
0
        alloc_sz += buffer_size[0] = recon_picture_dst->luma_size * bytes_per_pixel;
79
0
    }
80
81
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
82
0
        alloc_sz += buffer_size[1] = recon_picture_dst->chroma_size * bytes_per_pixel;
83
0
    }
84
85
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
86
0
        alloc_sz += buffer_size[2] = recon_picture_dst->chroma_size * bytes_per_pixel;
87
0
    }
88
89
    // Allocate the Picture Buffers (luma & chroma)
90
0
    EB_CALLOC_ALIGNED_ARRAY(recon_picture_dst->buffer_alloc, alloc_sz);
91
0
    recon_picture_dst->buffer_alloc_sz = alloc_sz;
92
0
    uint32_t assigned_space            = 0;
93
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Y_FLAG) {
94
0
        recon_picture_dst->y_buffer = recon_picture_dst->buffer_alloc +
95
0
            (recon_picture_dst->border + (recon_picture_dst->y_stride * recon_picture_dst->border)) * bytes_per_pixel;
96
0
        assigned_space += buffer_size[0];
97
0
    } else {
98
0
        recon_picture_dst->y_buffer = NULL;
99
0
    }
100
101
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
102
0
        recon_picture_dst->u_buffer = recon_picture_dst->buffer_alloc + assigned_space +
103
0
            ((recon_picture_dst->border >> ss_x) +
104
0
             (recon_picture_dst->u_stride * (recon_picture_dst->border >> ss_y))) *
105
0
                bytes_per_pixel;
106
0
        assigned_space += buffer_size[1];
107
0
    } else {
108
0
        recon_picture_dst->u_buffer = NULL;
109
0
    }
110
111
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
112
0
        recon_picture_dst->v_buffer = recon_picture_dst->buffer_alloc + assigned_space +
113
0
            ((recon_picture_dst->border >> ss_x) +
114
0
             (recon_picture_dst->v_stride * (recon_picture_dst->border >> ss_y))) *
115
0
                bytes_per_pixel;
116
0
        assigned_space += buffer_size[2];
117
0
    } else {
118
0
        recon_picture_dst->v_buffer = NULL;
119
0
    }
120
0
    assert(assigned_space == alloc_sz);
121
122
0
    int use_highbd = scs->is_16bit_pipeline;
123
124
0
    if (!skip_copy) {
125
0
        assert(num_planes <= MAX_PLANES);
126
0
        for (int plane = 0; plane < num_planes; ++plane) {
127
0
            uint8_t *src_buf, *dst_buf;
128
0
            int32_t  src_stride, dst_stride;
129
130
0
            int sub_x = plane ? scs->subsampling_x : 0;
131
0
            int sub_y = plane ? scs->subsampling_y : 0;
132
133
0
            src_buf    = recon_picture_src->buffer[plane];
134
0
            src_stride = recon_picture_src->stride[plane];
135
0
            dst_buf    = recon_picture_dst->buffer[plane];
136
0
            dst_stride = recon_picture_dst->stride[plane];
137
138
0
            int height = ((recon_picture_src->height + sub_y) >> sub_y);
139
0
            for (int row = 0; row < height; ++row) {
140
0
                svt_memcpy(
141
0
                    dst_buf, src_buf, ((recon_picture_src->width + sub_x) >> sub_x) * sizeof(*src_buf) << use_highbd);
142
0
                src_buf += src_stride << use_highbd;
143
0
                dst_buf += dst_stride << use_highbd;
144
0
            }
145
0
        }
146
0
    }
147
148
0
    return EB_ErrorNone;
149
0
}
150
151
0
static void svt_av1_superres_upscale_frame(struct Av1Common* cm, PictureControlSet* pcs, SequenceControlSet* scs) {
152
    // Set these parameters for testing since they are not correctly populated yet
153
0
    EbPictureBufferDesc* recon_ptr;
154
155
0
    bool is_16bit = scs->is_16bit_pipeline;
156
157
0
    svt_aom_get_recon_pic(pcs, &recon_ptr, is_16bit);
158
159
0
    uint16_t  ss_x       = scs->subsampling_x;
160
0
    uint16_t  ss_y       = scs->subsampling_y;
161
0
    const int num_planes = scs->seq_header.color_config.mono_chrome ? 1 : MAX_PLANES;
162
163
0
    EbPictureBufferDesc  recon_pic_temp;
164
0
    EbPictureBufferDesc* ps_recon_pic_temp;
165
0
    ps_recon_pic_temp = &recon_pic_temp;
166
167
0
    EbErrorType return_error = copy_recon_enc(scs, recon_ptr, ps_recon_pic_temp, num_planes, 0);
168
169
0
    if (return_error != EB_ErrorNone) {
170
0
        ps_recon_pic_temp = NULL;
171
0
        assert(0);
172
0
    }
173
174
0
    EbPictureBufferDesc* src = ps_recon_pic_temp;
175
0
    EbPictureBufferDesc* dst = recon_ptr;
176
177
    // get the bit-depth from the encoder config instead of from the recon ptr
178
0
    int bit_depth = scs->static_config.encoder_bit_depth;
179
180
0
    assert(num_planes <= MAX_PLANES);
181
0
    for (int plane = 0; plane < num_planes; ++plane) {
182
0
        uint8_t *src_buf, *dst_buf;
183
0
        int32_t  src_stride, dst_stride;
184
185
0
        int sub_x  = plane ? ss_x : 0;
186
0
        int sub_y  = plane ? ss_y : 0;
187
0
        src_buf    = src->buffer[plane];
188
0
        src_stride = src->stride[plane];
189
0
        dst_buf    = dst->buffer[plane];
190
0
        dst_stride = dst->stride[plane];
191
192
0
        svt_av1_upscale_normative_rows(cm,
193
0
                                       (const uint8_t*)src_buf,
194
0
                                       src_stride,
195
0
                                       dst_buf,
196
0
                                       dst_stride,
197
0
                                       (src->height + sub_y) >> sub_y,
198
0
                                       sub_x,
199
0
                                       bit_depth,
200
0
                                       is_16bit);
201
0
    }
202
203
    // free the memory
204
0
    EB_FREE_ALIGNED_ARRAY(ps_recon_pic_temp->buffer_alloc);
205
0
}
206
207
/**************************************
208
 * Cdef Context
209
 **************************************/
210
typedef struct CdefContext {
211
    EbFifo* cdef_input_fifo_ptr;
212
    EbFifo* cdef_output_fifo_ptr;
213
} CdefContext;
214
215
474
static void cdef_context_dctor(EbPtr p) {
216
474
    EbThreadContext* thread_ctx = (EbThreadContext*)p;
217
474
    CdefContext*     obj        = (CdefContext*)thread_ctx->priv;
218
474
    EB_FREE_ARRAY(obj);
219
474
}
220
221
/******************************************************
222
 * Cdef Context Constructor
223
 ******************************************************/
224
474
EbErrorType svt_aom_cdef_context_ctor(EbThreadContext* thread_ctx, const EbEncHandle* enc_handle_ptr, int index) {
225
474
    CdefContext* cdef_ctx;
226
474
    EB_CALLOC_ARRAY(cdef_ctx, 1);
227
474
    thread_ctx->priv  = cdef_ctx;
228
474
    thread_ctx->dctor = cdef_context_dctor;
229
230
    // Input/Output System Resource Manager FIFOs
231
474
    cdef_ctx->cdef_input_fifo_ptr  = svt_system_resource_get_consumer_fifo(enc_handle_ptr->dlf_results_resource_ptr,
232
474
                                                                          index);
233
474
    cdef_ctx->cdef_output_fifo_ptr = svt_system_resource_get_producer_fifo(enc_handle_ptr->cdef_results_resource_ptr,
234
474
                                                                           index);
235
236
474
    return EB_ErrorNone;
237
474
}
238
239
0
#define default_mse_uv 1040400
240
241
static uint64_t compute_cdef_dist(const EbByte dst, int32_t doffset, int32_t dstride, const uint8_t* src,
242
                                  const CdefList* dlist, int32_t cdef_count, BlockSize bsize, int32_t coeff_shift,
243
0
                                  uint8_t subsampling_factor, bool is_16bit) {
244
0
    uint64_t curr_mse = 0;
245
0
    if (is_16bit) {
246
0
        curr_mse = svt_compute_cdef_dist_16bit(((uint16_t*)dst) + doffset,
247
0
                                               dstride,
248
0
                                               (uint16_t*)src,
249
0
                                               dlist,
250
0
                                               cdef_count,
251
0
                                               bsize,
252
0
                                               coeff_shift,
253
0
                                               subsampling_factor);
254
255
0
    } else {
256
0
        curr_mse = svt_compute_cdef_dist_8bit(
257
0
            dst + doffset, dstride, src, dlist, cdef_count, bsize, coeff_shift, subsampling_factor);
258
0
    }
259
0
    return curr_mse;
260
0
}
261
262
/* Search for the best filter strength pair for each 64x64 filter block.
263
 *
264
 * For each 64x64 filter block and each plane, search the allowable filter strength pairs.
265
 * Call cdef_filter_fb() to perform filtering, then compute the MSE for each pair.
266
*/
267
0
static void cdef_seg_search(PictureControlSet* pcs, SequenceControlSet* scs, uint32_t segment_index) {
268
0
    PictureParentControlSet* ppcs     = pcs->ppcs;
269
0
    FrameHeader*             frm_hdr  = &ppcs->frm_hdr;
270
0
    Av1Common*               cm       = ppcs->av1_cm;
271
0
    const bool               is_16bit = scs->is_16bit_pipeline;
272
0
    uint32_t                 x_seg_idx;
273
0
    uint32_t                 y_seg_idx;
274
0
    const uint32_t           b64_pic_width  = (ppcs->aligned_width + 64 - 1) / 64;
275
0
    const uint32_t           b64_pic_height = (ppcs->aligned_height + 64 - 1) / 64;
276
0
    SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_seg_idx, y_seg_idx, pcs->cdef_segments_column_count);
277
0
    const uint32_t x_b64_start_idx = SEGMENT_START_IDX(x_seg_idx, b64_pic_width, pcs->cdef_segments_column_count);
278
0
    const uint32_t x_b64_end_idx   = SEGMENT_END_IDX(x_seg_idx, b64_pic_width, pcs->cdef_segments_column_count);
279
0
    const uint32_t y_b64_start_idx = SEGMENT_START_IDX(y_seg_idx, b64_pic_height, pcs->cdef_segments_row_count);
280
0
    const uint32_t y_b64_end_idx   = SEGMENT_END_IDX(y_seg_idx, b64_pic_height, pcs->cdef_segments_row_count);
281
282
0
    const int32_t       mi_rows                    = cm->mi_rows;
283
0
    const int32_t       mi_cols                    = cm->mi_cols;
284
0
    CdefSearchControls* cdef_ctrls                 = &ppcs->cdef_search_ctrls;
285
0
    const int           first_pass_fs_num          = cdef_ctrls->first_pass_fs_num;
286
0
    const int           default_second_pass_fs_num = cdef_ctrls->default_second_pass_fs_num;
287
0
    EbByte              src[3];
288
0
    EbByte              ref[3];
289
0
    int32_t             stride_src[3];
290
0
    int32_t             stride_ref[3];
291
0
    int32_t             plane_bsize[3];
292
0
    int32_t             mi_wide_l2[3];
293
0
    int32_t             mi_high_l2[3];
294
0
    int32_t             xdec[3];
295
0
    int32_t             ydec[3];
296
0
    int32_t             cdef_count;
297
0
    const int32_t       coeff_shift = AOMMAX(scs->static_config.encoder_bit_depth - 8, 0);
298
0
    const int32_t       nvfb        = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
299
0
    const int32_t       nhfb        = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
300
0
    const int32_t       pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
301
0
    const int32_t       sec_damping = pri_damping;
302
0
    const int32_t       num_planes  = 3;
303
0
    CdefList            dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
304
305
0
    int32_t toff_prev  = CDEF_VBORDER;
306
0
    int32_t loff_prev  = CDEF_HBORDER;
307
0
    int32_t ysize_prev = (1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER;
308
0
    int32_t xsize_prev = (1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_HBORDER;
309
0
    DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
310
0
    uint16_t* in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
311
    // tmp_dst is uint16_t to accommodate high bit depth content; 8bit will treat it as a uint8_t
312
    // buffer and will not use half of the buffer
313
0
    DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
314
315
0
    EbPictureBufferDesc* input_pic = is_16bit ? pcs->input_frame16bit : ppcs->enhanced_pic;
316
0
    EbPictureBufferDesc* recon_pic;
317
0
    svt_aom_get_recon_pic(pcs, &recon_pic, is_16bit);
318
319
0
    for (int pli = 0; pli < num_planes; pli++) {
320
0
        const int subsampling_x = (pli == 0) ? 0 : 1;
321
0
        const int subsampling_y = (pli == 0) ? 0 : 1;
322
0
        xdec[pli]               = subsampling_x;
323
0
        ydec[pli]               = subsampling_y;
324
        // The checks are stubs for 4:2:2 and 4:4:4 support
325
        // cppcheck-suppress knownConditionTrueFalse
326
0
        plane_bsize[pli] = subsampling_y ? (subsampling_x ? BLOCK_4X4 : BLOCK_8X4)
327
0
                                         : (subsampling_x ? BLOCK_4X8 : BLOCK_8X8);
328
0
        mi_wide_l2[pli]  = MI_SIZE_LOG2 - subsampling_x;
329
0
        mi_high_l2[pli]  = MI_SIZE_LOG2 - subsampling_y;
330
0
        src[pli]         = pcs->cdef_input_recon[pli];
331
0
        ref[pli]         = pcs->cdef_input_source[pli];
332
0
        stride_src[pli]  = pli == 0 ? recon_pic->y_stride : (pli == 1 ? recon_pic->u_stride : recon_pic->v_stride);
333
0
        stride_ref[pli]  = pli == 0 ? input_pic->y_stride : (pli == 1 ? input_pic->u_stride : input_pic->v_stride);
334
0
    }
335
336
    // Loop over all filter blocks (64x64)
337
0
    for (uint32_t fbr = y_b64_start_idx; fbr < y_b64_end_idx; ++fbr) {
338
0
        for (uint32_t fbc = x_b64_start_idx; fbc < x_b64_end_idx; ++fbc) {
339
0
            int32_t           dirinit = 0;
340
0
            const uint32_t    lc      = MI_SIZE_64X64 * fbc;
341
0
            const uint32_t    lr      = MI_SIZE_64X64 * fbr;
342
0
            int               nhb     = AOMMIN(MI_SIZE_64X64, mi_cols - lc);
343
0
            int               nvb     = AOMMIN(MI_SIZE_64X64, mi_rows - lr);
344
0
            int               hb_step = 1; //these should be all time with 64x64 SBs
345
0
            int               vb_step = 1;
346
0
            BlockSize         bs      = BLOCK_64X64;
347
0
            const MbModeInfo* mbmi    = pcs->mi_grid_base[lr * cm->mi_stride + lc];
348
0
            const BlockSize   bsize   = mbmi->bsize;
349
0
            if (((fbc & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_128X64)) ||
350
0
                ((fbr & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_64X128))) {
351
0
                continue;
352
0
            }
353
0
            if (bsize == BLOCK_128X128 || bsize == BLOCK_128X64 || bsize == BLOCK_64X128) {
354
0
                bs = bsize;
355
0
            }
356
357
0
            if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
358
0
                nhb     = AOMMIN(MI_SIZE_128X128, cm->mi_cols - lc);
359
0
                hb_step = 2;
360
0
            }
361
0
            if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
362
0
                nvb     = AOMMIN(MI_SIZE_128X128, cm->mi_rows - lr);
363
0
                vb_step = 2;
364
0
            }
365
0
            const uint32_t fb_idx = fbr * nhfb + fbc;
366
            // No filtering if the entire filter block is skipped
367
0
            cdef_count = svt_sb_compute_cdef_list(pcs, cm, lr, lc, dlist, bs);
368
0
            if (cdef_count == 0) {
369
0
                pcs->skip_cdef_seg[fb_idx] = 1;
370
0
                continue;
371
0
            }
372
0
            pcs->skip_cdef_seg[fb_idx] = 0;
373
374
0
            int32_t toff = CDEF_VBORDER * (fbr != 0);
375
0
            int32_t loff = CDEF_HBORDER * (fbc != 0);
376
0
            int32_t boff = CDEF_VBORDER * ((int32_t)fbr + vb_step < nvfb);
377
0
            int32_t roff = CDEF_HBORDER * ((int32_t)fbc + hb_step < nhfb);
378
379
0
            uint8_t (*dir)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fb_idx].dir;
380
0
            int32_t (*var)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fb_idx].var;
381
0
            for (int pli = 0; pli < num_planes; pli++) {
382
0
                int32_t ysize = (nvb << mi_high_l2[pli]) + boff + toff;
383
0
                int32_t xsize = (nhb << mi_wide_l2[pli]) + roff + loff;
384
                /* We avoid filtering the pixels for which some of the pixels to
385
                   average are outside the frame. We could change the filter instead,
386
                   but it would add special cases for any future vectorization.
387
                   Avoid memset'ting when dirty rect is inside the new one.
388
                   TODO: this could be further optimized - fill out only borders, separate buffers for Y & UV */
389
0
                bool need_to_reset = toff_prev > toff || loff_prev > loff || ysize < ysize_prev || xsize < xsize_prev;
390
0
                if (need_to_reset) {
391
0
                    uint16_t* p = &in[(-toff_prev * CDEF_BSTRIDE - loff_prev)];
392
0
                    for (int r = 0; r < ysize_prev; r++) {
393
0
                        svt_memset(p, (uint8_t)CDEF_VERY_LARGE, sizeof(p[0]) * xsize_prev);
394
0
                        p += CDEF_BSTRIDE;
395
0
                    }
396
0
                }
397
0
                toff_prev  = toff;
398
0
                loff_prev  = loff;
399
0
                ysize_prev = ysize;
400
0
                xsize_prev = xsize;
401
402
0
                svt_aom_copy_sb8_16(&in[(-toff * CDEF_BSTRIDE - loff)],
403
0
                                    CDEF_BSTRIDE,
404
0
                                    src[pli],
405
0
                                    (lr << mi_high_l2[pli]) - toff,
406
0
                                    (lc << mi_wide_l2[pli]) - loff,
407
0
                                    stride_src[pli],
408
0
                                    ysize,
409
0
                                    xsize,
410
0
                                    is_16bit);
411
412
0
                uint8_t subsampling_factor = cdef_ctrls->subsampling_factor;
413
                /*
414
                Cap the subsampling for certain block sizes.
415
416
                The intrinsics process several lines simultaneously, so blocks can only be subsampled
417
                a finite amount before there is no more speed gain.  If the space between processed lines
418
                is too large, the intrinsics will begin accessing memory outside the block.
419
                */
420
0
                switch (plane_bsize[pli]) {
421
0
                case BLOCK_8X8:
422
0
                    subsampling_factor = MIN(subsampling_factor, 4);
423
0
                    break;
424
0
                case BLOCK_8X4:
425
0
                case BLOCK_4X8:
426
0
                    subsampling_factor = MIN(subsampling_factor, 2);
427
0
                    break;
428
0
                case BLOCK_4X4:
429
0
                    subsampling_factor = MIN(subsampling_factor, 1);
430
0
                    break;
431
0
                }
432
433
                /* first cdef stage
434
                 * Perform the pri_filter strength search for the current sub_block
435
                 */
436
0
                for (int gi = 0; gi < first_pass_fs_num; gi++) {
437
                    // Check if chroma filter is set to be tested
438
0
                    if (pli && (cdef_ctrls->default_first_pass_fs_uv[gi] == -1)) {
439
0
                        pcs->mse_seg[1][fb_idx][gi] = default_mse_uv * 64;
440
0
                        continue;
441
0
                    }
442
443
0
                    int32_t pri_strength = cdef_ctrls->default_first_pass_fs[gi] / CDEF_SEC_STRENGTHS;
444
0
                    int32_t sec_strength = cdef_ctrls->default_first_pass_fs[gi] % CDEF_SEC_STRENGTHS;
445
446
0
                    svt_cdef_filter_fb(is_16bit ? NULL : (uint8_t*)tmp_dst,
447
0
                                       is_16bit ? tmp_dst : NULL,
448
0
                                       0,
449
0
                                       in,
450
0
                                       xdec[pli],
451
0
                                       ydec[pli],
452
0
                                       *dir,
453
0
                                       &dirinit,
454
0
                                       *var,
455
0
                                       pli,
456
0
                                       dlist,
457
0
                                       cdef_count,
458
0
                                       pri_strength,
459
0
                                       sec_strength + (sec_strength == 3),
460
0
                                       pri_damping,
461
0
                                       sec_damping,
462
0
                                       coeff_shift,
463
0
                                       subsampling_factor);
464
0
                    uint64_t curr_mse = compute_cdef_dist(
465
0
                        ref[pli],
466
0
                        (lr << mi_high_l2[pli]) * stride_ref[pli] + (lc << mi_wide_l2[pli]),
467
0
                        stride_ref[pli],
468
0
                        (uint8_t*)tmp_dst,
469
0
                        dlist,
470
0
                        cdef_count,
471
0
                        (BlockSize)plane_bsize[pli],
472
0
                        coeff_shift,
473
0
                        subsampling_factor,
474
0
                        is_16bit);
475
476
0
                    if (pli < 2) {
477
0
                        pcs->mse_seg[pli][fb_idx][gi] = curr_mse * subsampling_factor;
478
0
                    } else {
479
0
                        pcs->mse_seg[1][fb_idx][gi] += (curr_mse * subsampling_factor);
480
0
                    }
481
0
                }
482
483
                /* second cdef stage
484
                 * Perform the sec_filter strength search for the current sub_block
485
                 */
486
0
                for (int gi = first_pass_fs_num; gi < first_pass_fs_num + default_second_pass_fs_num; gi++) {
487
                    // Check if chroma filter is set to be tested
488
0
                    if (pli && (cdef_ctrls->default_second_pass_fs_uv[gi - first_pass_fs_num] == -1)) {
489
0
                        pcs->mse_seg[1][fb_idx][gi] = default_mse_uv * 64;
490
0
                        continue;
491
0
                    }
492
493
0
                    int32_t pri_strength = cdef_ctrls->default_second_pass_fs[gi - first_pass_fs_num] /
494
0
                        CDEF_SEC_STRENGTHS;
495
0
                    int32_t sec_strength = cdef_ctrls->default_second_pass_fs[gi - first_pass_fs_num] %
496
0
                        CDEF_SEC_STRENGTHS;
497
498
0
                    svt_cdef_filter_fb(is_16bit ? NULL : (uint8_t*)tmp_dst,
499
0
                                       is_16bit ? tmp_dst : NULL,
500
0
                                       0,
501
0
                                       in,
502
0
                                       xdec[pli],
503
0
                                       ydec[pli],
504
0
                                       *dir,
505
0
                                       &dirinit,
506
0
                                       *var,
507
0
                                       pli,
508
0
                                       dlist,
509
0
                                       cdef_count,
510
0
                                       pri_strength,
511
0
                                       sec_strength + (sec_strength == 3),
512
0
                                       pri_damping,
513
0
                                       sec_damping,
514
0
                                       coeff_shift,
515
0
                                       subsampling_factor);
516
0
                    uint64_t curr_mse = compute_cdef_dist(
517
0
                        ref[pli],
518
0
                        (lr << mi_high_l2[pli]) * stride_ref[pli] + (lc << mi_wide_l2[pli]),
519
0
                        stride_ref[pli],
520
0
                        (uint8_t*)tmp_dst,
521
0
                        dlist,
522
0
                        cdef_count,
523
0
                        (BlockSize)plane_bsize[pli],
524
0
                        coeff_shift,
525
0
                        subsampling_factor,
526
0
                        is_16bit);
527
528
0
                    if (pli < 2) {
529
0
                        pcs->mse_seg[pli][fb_idx][gi] = curr_mse * subsampling_factor;
530
0
                    } else {
531
0
                        pcs->mse_seg[1][fb_idx][gi] += (curr_mse * subsampling_factor);
532
0
                    }
533
0
                }
534
0
            }
535
0
        }
536
0
    }
537
0
}
538
539
/******************************************************
540
 * CDEF Kernel
541
 ******************************************************/
542
948
EbErrorType svt_aom_cdef_kernel_iter(void* context) {
543
    // Context & SCS & PCS
544
948
    CdefContext*        context_ptr = (CdefContext*)context;
545
948
    PictureControlSet*  pcs;
546
948
    SequenceControlSet* scs;
547
548
    //// Input
549
948
    EbObjectWrapper* dlf_results_wrapper;
550
948
    DlfResults*      dlf_results;
551
552
    //// Output
553
948
    EbObjectWrapper* cdef_results_wrapper;
554
555
948
    FrameHeader* frm_hdr;
556
557
    // Get DLF Results
558
948
    EB_GET_FULL_OBJECT(context_ptr->cdef_input_fifo_ptr, &dlf_results_wrapper);
559
560
474
    dlf_results                   = (DlfResults*)dlf_results_wrapper->object_ptr;
561
474
    pcs                           = (PictureControlSet*)dlf_results->pcs_wrapper->object_ptr;
562
474
    PictureParentControlSet* ppcs = pcs->ppcs;
563
474
    scs                           = pcs->scs;
564
565
474
    bool       is_16bit                   = scs->is_16bit_pipeline;
566
474
    Av1Common* cm                         = pcs->ppcs->av1_cm;
567
474
    frm_hdr                               = &pcs->ppcs->frm_hdr;
568
474
    CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls;
569
474
    if (!cdef_search_ctrls->use_reference_cdef_fs && !cdef_search_ctrls->use_qp_strength) {
570
0
        if (scs->seq_header.cdef_level && pcs->ppcs->cdef_level) {
571
0
            cdef_seg_search(pcs, scs, dlf_results->segment_index);
572
0
        }
573
0
    }
574
    //all seg based search is done. update total processed segments. if all done, finish the search and perfrom application.
575
474
    svt_block_on_mutex(pcs->cdef_search_mutex);
576
577
474
    pcs->tot_seg_searched_cdef++;
578
474
    if (pcs->tot_seg_searched_cdef == pcs->cdef_segments_total_count) {
579
474
        pcs->cdef_dist_dev = -1;
580
474
        if (scs->seq_header.cdef_level && pcs->ppcs->cdef_level) {
581
274
            finish_cdef_search(pcs);
582
274
            if (ppcs->enable_restoration || pcs->ppcs->is_ref || scs->static_config.recon_enabled) {
583
                // Do application iff there are non-zero filters
584
0
                if (frm_hdr->cdef_params.cdef_y_strength[0] != 0 || frm_hdr->cdef_params.cdef_uv_strength[0] != 0 ||
585
0
                    pcs->ppcs->nb_cdef_strengths != 1) {
586
0
                    svt_av1_cdef_frame(scs, pcs);
587
0
                }
588
0
            }
589
274
        } else {
590
200
            frm_hdr->cdef_params.cdef_bits           = 0;
591
200
            frm_hdr->cdef_params.cdef_y_strength[0]  = 0;
592
200
            pcs->ppcs->nb_cdef_strengths             = 1;
593
200
            frm_hdr->cdef_params.cdef_uv_strength[0] = 0;
594
200
        }
595
596
474
        if (pcs->ppcs->nb_cdef_strengths == 1 && frm_hdr->cdef_params.cdef_y_strength[0] == 0 &&
597
233
            frm_hdr->cdef_params.cdef_uv_strength[0] == 0) {
598
224
            pcs->cdef_dist_dev = 0;
599
224
        }
600
601
        //restoration prep
602
474
        bool is_lr = ppcs->enable_restoration && frm_hdr->allow_intrabc == 0;
603
474
        if (is_lr) {
604
0
            svt_av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1);
605
0
            if (is_16bit) {
606
0
                set_unscaled_input_16bit(pcs);
607
0
            }
608
0
        }
609
610
        // ------- start: Normative upscaling - super-resolution tool
611
474
        if (frm_hdr->allow_intrabc == 0 && pcs->ppcs->frame_superres_enabled) {
612
0
            svt_av1_superres_upscale_frame(cm, pcs, scs);
613
0
        }
614
474
        if (scs->static_config.resize_mode != RESIZE_NONE) {
615
0
            EbPictureBufferDesc* recon = NULL;
616
0
            svt_aom_get_recon_pic(pcs, &recon, is_16bit);
617
0
            recon->width  = pcs->ppcs->render_width;
618
0
            recon->height = pcs->ppcs->render_height;
619
0
            if (is_lr) {
620
0
                EbPictureBufferDesc* input_pic = is_16bit ? pcs->input_frame16bit : pcs->ppcs->enhanced_unscaled_pic;
621
622
0
                svt_aom_assert_err(pcs->scaled_input_pic == NULL, "pcs_ptr->scaled_input_pic is not desctoried!");
623
0
                EbPictureBufferDesc* scaled_input_pic = NULL;
624
                // downscale input picture if recon is resized
625
0
                bool is_resized = recon->width != input_pic->width || recon->height != input_pic->height;
626
0
                if (is_resized) {
627
0
                    superres_params_type spr_params = {recon->width, recon->height, 0};
628
0
                    svt_aom_downscaled_source_buffer_desc_ctor(&scaled_input_pic, input_pic, spr_params);
629
0
                    svt_aom_resize_frame(input_pic,
630
0
                                         scaled_input_pic,
631
0
                                         scs->static_config.encoder_bit_depth,
632
0
                                         av1_num_planes(&scs->seq_header.color_config),
633
0
                                         scs->subsampling_x,
634
0
                                         scs->subsampling_y,
635
0
                                         input_pic->packed_flag,
636
0
                                         PICTURE_BUFFER_DESC_FULL_MASK,
637
0
                                         0); // is_2bcompress
638
0
                    pcs->scaled_input_pic = scaled_input_pic;
639
0
                }
640
0
            }
641
0
        }
642
        // ------- end: Normative upscaling - super-resolution tool
643
644
474
        pcs->rest_segments_column_count = scs->rest_segment_column_count;
645
474
        pcs->rest_segments_row_count    = scs->rest_segment_row_count;
646
474
        pcs->rest_segments_total_count  = (uint16_t)(pcs->rest_segments_column_count * pcs->rest_segments_row_count);
647
474
        pcs->tot_seg_searched_rest      = 0;
648
474
        pcs->ppcs->av1_cm->use_boundaries_in_rest_search = scs->use_boundaries_in_rest_search;
649
474
        pcs->rest_extend_flag[0]                         = false;
650
474
        pcs->rest_extend_flag[1]                         = false;
651
474
        pcs->rest_extend_flag[2]                         = false;
652
653
474
        uint32_t segment_index;
654
948
        for (segment_index = 0; segment_index < pcs->rest_segments_total_count; ++segment_index) {
655
            // Get Empty Cdef Results to Rest
656
474
            svt_get_empty_object(context_ptr->cdef_output_fifo_ptr, &cdef_results_wrapper);
657
474
            CdefResults* cdef_results   = (struct CdefResults*)cdef_results_wrapper->object_ptr;
658
474
            cdef_results->pcs_wrapper   = dlf_results->pcs_wrapper;
659
474
            cdef_results->segment_index = segment_index;
660
            // Post Cdef Results
661
474
            svt_post_full_object(cdef_results_wrapper);
662
474
        }
663
474
    }
664
474
    svt_release_mutex(pcs->cdef_search_mutex);
665
666
    // Release Dlf Results
667
474
    svt_release_object(dlf_results_wrapper);
668
669
474
    return EB_ErrorNone;
670
948
}
671
672
474
void* svt_aom_cdef_kernel(void* input_ptr) {
673
474
    EbThreadContext* thread_ctx = (EbThreadContext*)input_ptr;
674
948
    for (;;) {
675
948
        EbErrorType err = svt_aom_cdef_kernel_iter(thread_ctx->priv);
676
948
        if (err == EB_NoErrorFifoShutdown) {
677
474
            return NULL;
678
474
        }
679
948
    }
680
0
    return NULL;
681
474
}