Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/cdef_process.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 2 Clause License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
#include <stdlib.h>
14
#include "aom_dsp_rtcd.h"
15
#include "definitions.h"
16
#include "enc_handle.h"
17
#include "cdef_process.h"
18
#include "enc_dec_results.h"
19
#include "svt_threads.h"
20
#include "reference_object.h"
21
#include "enc_cdef.h"
22
#include "enc_dec_process.h"
23
#include "pic_buffer_desc.h"
24
#include "sequence_control_set.h"
25
#include "utility.h"
26
#include "pcs.h"
27
#include "resize.h"
28
#include "super_res.h"
29
30
0
static void set_unscaled_input_16bit(PictureControlSet* pcs) {
31
0
    EbPictureBufferDesc* input_pic  = pcs->ppcs->enhanced_unscaled_pic;
32
0
    EbPictureBufferDesc* output_pic = pcs->input_frame16bit;
33
0
    uint16_t             ss_x       = pcs->ppcs->scs->subsampling_x;
34
0
    uint16_t             ss_y       = pcs->ppcs->scs->subsampling_y;
35
0
    svt_aom_copy_buffer_info(input_pic, pcs->input_frame16bit);
36
0
    if (input_pic->bit_depth == EB_EIGHT_BIT) {
37
0
        svt_aom_convert_pic_8bit_to_16bit(input_pic, output_pic, ss_x, ss_y);
38
0
    } else {
39
0
        uint16_t* planes[3] = {
40
0
            (uint16_t*)output_pic->y_buffer, (uint16_t*)output_pic->u_buffer, (uint16_t*)output_pic->v_buffer};
41
0
        svt_aom_pack_2d_pic(input_pic, planes);
42
0
    }
43
0
}
44
45
static EbErrorType copy_recon_enc(SequenceControlSet* scs, EbPictureBufferDesc* recon_picture_src,
46
0
                                  EbPictureBufferDesc* recon_picture_dst, int num_planes, int skip_copy) {
47
0
    recon_picture_dst->border       = recon_picture_src->border;
48
0
    recon_picture_dst->width        = recon_picture_src->width;
49
0
    recon_picture_dst->height       = recon_picture_src->height;
50
0
    recon_picture_dst->max_width    = recon_picture_src->max_width;
51
0
    recon_picture_dst->max_height   = recon_picture_src->max_height;
52
0
    recon_picture_dst->bit_depth    = recon_picture_src->bit_depth;
53
0
    recon_picture_dst->color_format = recon_picture_src->color_format;
54
55
0
    recon_picture_dst->y_stride = recon_picture_src->y_stride;
56
0
    recon_picture_dst->u_stride = recon_picture_src->u_stride;
57
0
    recon_picture_dst->v_stride = recon_picture_src->v_stride;
58
59
0
    recon_picture_dst->luma_size   = recon_picture_src->luma_size;
60
0
    recon_picture_dst->chroma_size = recon_picture_src->chroma_size;
61
0
    recon_picture_dst->packed_flag = recon_picture_src->packed_flag;
62
63
0
    recon_picture_dst->y_stride_bit_inc = recon_picture_src->y_stride_bit_inc;
64
0
    recon_picture_dst->u_stride_bit_inc = recon_picture_src->u_stride_bit_inc;
65
0
    recon_picture_dst->v_stride_bit_inc = recon_picture_src->v_stride_bit_inc;
66
67
0
    recon_picture_dst->buffer_enable_mask = scs->seq_header.color_config.mono_chrome ? PICTURE_BUFFER_DESC_LUMA_MASK
68
0
                                                                                     : PICTURE_BUFFER_DESC_FULL_MASK;
69
70
0
    int      ss_x            = scs->subsampling_x;
71
0
    int      ss_y            = scs->subsampling_y;
72
0
    uint32_t bytes_per_pixel = scs->is_16bit_pipeline ? 2 : 1;
73
74
    // Get frame size to alloc
75
0
    uint32_t alloc_sz       = 0;
76
0
    uint32_t buffer_size[3] = {0};
77
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Y_FLAG) {
78
0
        alloc_sz += buffer_size[0] = recon_picture_dst->luma_size * bytes_per_pixel;
79
0
    }
80
81
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
82
0
        alloc_sz += buffer_size[1] = recon_picture_dst->chroma_size * bytes_per_pixel;
83
0
    }
84
85
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
86
0
        alloc_sz += buffer_size[2] = recon_picture_dst->chroma_size * bytes_per_pixel;
87
0
    }
88
89
    // Allocate the Picture Buffers (luma & chroma)
90
0
    EB_CALLOC_ALIGNED_ARRAY(recon_picture_dst->buffer_alloc, alloc_sz);
91
0
    recon_picture_dst->buffer_alloc_sz = alloc_sz;
92
0
    uint32_t assigned_space            = 0;
93
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Y_FLAG) {
94
0
        recon_picture_dst->y_buffer = recon_picture_dst->buffer_alloc +
95
0
            (recon_picture_dst->border + (recon_picture_dst->y_stride * recon_picture_dst->border)) * bytes_per_pixel;
96
0
        assigned_space += buffer_size[0];
97
0
    } else {
98
0
        recon_picture_dst->y_buffer = NULL;
99
0
    }
100
101
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
102
0
        recon_picture_dst->u_buffer = recon_picture_dst->buffer_alloc + assigned_space +
103
0
            ((recon_picture_dst->border >> ss_x) +
104
0
             (recon_picture_dst->u_stride * (recon_picture_dst->border >> ss_y))) *
105
0
                bytes_per_pixel;
106
0
        assigned_space += buffer_size[1];
107
0
    } else {
108
0
        recon_picture_dst->u_buffer = NULL;
109
0
    }
110
111
0
    if (recon_picture_dst->buffer_enable_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
112
0
        recon_picture_dst->v_buffer = recon_picture_dst->buffer_alloc + assigned_space +
113
0
            ((recon_picture_dst->border >> ss_x) +
114
0
             (recon_picture_dst->v_stride * (recon_picture_dst->border >> ss_y))) *
115
0
                bytes_per_pixel;
116
0
        assigned_space += buffer_size[2];
117
0
    } else {
118
0
        recon_picture_dst->v_buffer = NULL;
119
0
    }
120
0
    assert(assigned_space == alloc_sz);
121
122
0
    int use_highbd = scs->is_16bit_pipeline;
123
124
0
    if (!skip_copy) {
125
0
        assert(num_planes <= MAX_PLANES);
126
0
        for (int plane = 0; plane < num_planes; ++plane) {
127
0
            uint8_t *src_buf, *dst_buf;
128
0
            int32_t  src_stride, dst_stride;
129
130
0
            int sub_x = plane ? scs->subsampling_x : 0;
131
0
            int sub_y = plane ? scs->subsampling_y : 0;
132
133
0
            src_buf    = recon_picture_src->buffer[plane];
134
0
            src_stride = recon_picture_src->stride[plane];
135
0
            dst_buf    = recon_picture_dst->buffer[plane];
136
0
            dst_stride = recon_picture_dst->stride[plane];
137
138
0
            int height = ((recon_picture_src->height + sub_y) >> sub_y);
139
0
            for (int row = 0; row < height; ++row) {
140
0
                svt_memcpy(
141
0
                    dst_buf, src_buf, ((recon_picture_src->width + sub_x) >> sub_x) * sizeof(*src_buf) << use_highbd);
142
0
                src_buf += src_stride << use_highbd;
143
0
                dst_buf += dst_stride << use_highbd;
144
0
            }
145
0
        }
146
0
    }
147
148
0
    return EB_ErrorNone;
149
0
}
150
151
0
static void svt_av1_superres_upscale_frame(struct Av1Common* cm, PictureControlSet* pcs, SequenceControlSet* scs) {
152
    // Set these parameters for testing since they are not correctly populated yet
153
0
    EbPictureBufferDesc* recon_ptr;
154
155
0
    bool is_16bit = scs->is_16bit_pipeline;
156
157
0
    svt_aom_get_recon_pic(pcs, &recon_ptr, is_16bit);
158
159
0
    uint16_t  ss_x       = scs->subsampling_x;
160
0
    uint16_t  ss_y       = scs->subsampling_y;
161
0
    const int num_planes = scs->seq_header.color_config.mono_chrome ? 1 : MAX_PLANES;
162
163
0
    EbPictureBufferDesc  recon_pic_temp;
164
0
    EbPictureBufferDesc* ps_recon_pic_temp;
165
0
    ps_recon_pic_temp = &recon_pic_temp;
166
167
0
    EbErrorType return_error = copy_recon_enc(scs, recon_ptr, ps_recon_pic_temp, num_planes, 0);
168
169
0
    if (return_error != EB_ErrorNone) {
170
0
        ps_recon_pic_temp = NULL;
171
0
        assert(0);
172
0
    }
173
174
0
    EbPictureBufferDesc* src = ps_recon_pic_temp;
175
0
    EbPictureBufferDesc* dst = recon_ptr;
176
177
    // get the bit-depth from the encoder config instead of from the recon ptr
178
0
    int bit_depth = scs->static_config.encoder_bit_depth;
179
180
0
    assert(num_planes <= MAX_PLANES);
181
0
    for (int plane = 0; plane < num_planes; ++plane) {
182
0
        uint8_t *src_buf, *dst_buf;
183
0
        int32_t  src_stride, dst_stride;
184
185
0
        int sub_x  = plane ? ss_x : 0;
186
0
        int sub_y  = plane ? ss_y : 0;
187
0
        src_buf    = src->buffer[plane];
188
0
        src_stride = src->stride[plane];
189
0
        dst_buf    = dst->buffer[plane];
190
0
        dst_stride = dst->stride[plane];
191
192
0
        svt_av1_upscale_normative_rows(cm,
193
0
                                       (const uint8_t*)src_buf,
194
0
                                       src_stride,
195
0
                                       dst_buf,
196
0
                                       dst_stride,
197
0
                                       (src->height + sub_y) >> sub_y,
198
0
                                       sub_x,
199
0
                                       bit_depth,
200
0
                                       is_16bit);
201
0
    }
202
203
    // free the memory
204
0
    EB_FREE_ALIGNED_ARRAY(ps_recon_pic_temp->buffer_alloc);
205
0
}
206
207
/**************************************
208
 * Cdef Context
209
 **************************************/
210
typedef struct CdefContext {
211
    EbFifo* cdef_input_fifo_ptr;
212
    EbFifo* cdef_output_fifo_ptr;
213
} CdefContext;
214
215
431
static void cdef_context_dctor(EbPtr p) {
216
431
    EbThreadContext* thread_ctx = (EbThreadContext*)p;
217
431
    CdefContext*     obj        = (CdefContext*)thread_ctx->priv;
218
431
    EB_FREE_ARRAY(obj);
219
431
}
220
221
/******************************************************
222
 * Cdef Context Constructor
223
 ******************************************************/
224
431
EbErrorType svt_aom_cdef_context_ctor(EbThreadContext* thread_ctx, const EbEncHandle* enc_handle_ptr, int index) {
225
431
    CdefContext* cdef_ctx;
226
431
    EB_CALLOC_ARRAY(cdef_ctx, 1);
227
431
    thread_ctx->priv  = cdef_ctx;
228
431
    thread_ctx->dctor = cdef_context_dctor;
229
230
    // Input/Output System Resource Manager FIFOs
231
431
    cdef_ctx->cdef_input_fifo_ptr  = svt_system_resource_get_consumer_fifo(enc_handle_ptr->dlf_results_resource_ptr,
232
431
                                                                          index);
233
431
    cdef_ctx->cdef_output_fifo_ptr = svt_system_resource_get_producer_fifo(enc_handle_ptr->cdef_results_resource_ptr,
234
431
                                                                           index);
235
236
431
    return EB_ErrorNone;
237
431
}
238
239
0
#define default_mse_uv 1040400
240
241
static uint64_t compute_cdef_dist(const EbByte dst, int32_t doffset, int32_t dstride, const uint8_t* src,
242
                                  const CdefList* dlist, int32_t cdef_count, BlockSize bsize, int32_t coeff_shift,
243
0
                                  uint8_t subsampling_factor, bool is_16bit) {
244
0
    uint64_t curr_mse = 0;
245
0
    if (is_16bit) {
246
0
        curr_mse = svt_compute_cdef_dist_16bit(((uint16_t*)dst) + doffset,
247
0
                                               dstride,
248
0
                                               (uint16_t*)src,
249
0
                                               dlist,
250
0
                                               cdef_count,
251
0
                                               bsize,
252
0
                                               coeff_shift,
253
0
                                               subsampling_factor);
254
255
0
    } else {
256
0
        curr_mse = svt_compute_cdef_dist_8bit(
257
0
            dst + doffset, dstride, src, dlist, cdef_count, bsize, coeff_shift, subsampling_factor);
258
0
    }
259
0
    return curr_mse;
260
0
}
261
262
/* Search for the best filter strength pair for each 64x64 filter block.
263
 *
264
 * For each 64x64 filter block and each plane, search the allowable filter strength pairs.
265
 * Call cdef_filter_fb() to perform filtering, then compute the MSE for each pair.
266
*/
267
0
static void cdef_seg_search(PictureControlSet* pcs, SequenceControlSet* scs, uint32_t segment_index) {
268
0
    PictureParentControlSet* ppcs     = pcs->ppcs;
269
0
    FrameHeader*             frm_hdr  = &ppcs->frm_hdr;
270
0
    Av1Common*               cm       = ppcs->av1_cm;
271
0
    const bool               is_16bit = scs->is_16bit_pipeline;
272
0
    uint32_t                 x_seg_idx;
273
0
    uint32_t                 y_seg_idx;
274
0
    const uint32_t           b64_pic_width  = (ppcs->aligned_width + 64 - 1) / 64;
275
0
    const uint32_t           b64_pic_height = (ppcs->aligned_height + 64 - 1) / 64;
276
0
    SEGMENT_CONVERT_IDX_TO_XY(segment_index, x_seg_idx, y_seg_idx, pcs->cdef_segments_column_count);
277
0
    const uint32_t x_b64_start_idx = SEGMENT_START_IDX(x_seg_idx, b64_pic_width, pcs->cdef_segments_column_count);
278
0
    const uint32_t x_b64_end_idx   = SEGMENT_END_IDX(x_seg_idx, b64_pic_width, pcs->cdef_segments_column_count);
279
0
    const uint32_t y_b64_start_idx = SEGMENT_START_IDX(y_seg_idx, b64_pic_height, pcs->cdef_segments_row_count);
280
0
    const uint32_t y_b64_end_idx   = SEGMENT_END_IDX(y_seg_idx, b64_pic_height, pcs->cdef_segments_row_count);
281
282
0
    const int32_t       mi_rows                    = cm->mi_rows;
283
0
    const int32_t       mi_cols                    = cm->mi_cols;
284
0
    CdefSearchControls* cdef_ctrls                 = &ppcs->cdef_search_ctrls;
285
0
    const int           first_pass_fs_num          = cdef_ctrls->first_pass_fs_num;
286
0
    const int           default_second_pass_fs_num = cdef_ctrls->default_second_pass_fs_num;
287
0
    EbByte              src[3];
288
0
    EbByte              ref[3];
289
0
    int32_t             stride_src[3];
290
0
    int32_t             stride_ref[3];
291
0
    int32_t             plane_bsize[3];
292
0
    int32_t             mi_wide_l2[3];
293
0
    int32_t             mi_high_l2[3];
294
0
    int32_t             xdec[3];
295
0
    int32_t             ydec[3];
296
0
    int32_t             cdef_count;
297
0
    const int32_t       coeff_shift = AOMMAX(scs->static_config.encoder_bit_depth - 8, 0);
298
0
    const int32_t       nvfb        = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
299
0
    const int32_t       nhfb        = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
300
0
    const int32_t       pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
301
0
    const int32_t       sec_damping = pri_damping;
302
0
    const int32_t       num_planes  = 3;
303
0
    CdefList            dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
304
305
0
    int32_t toff_prev  = CDEF_VBORDER;
306
0
    int32_t loff_prev  = CDEF_HBORDER;
307
0
    int32_t ysize_prev = (1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_VBORDER;
308
0
    int32_t xsize_prev = (1 << MAX_SB_SIZE_LOG2) + 2 * CDEF_HBORDER;
309
0
    DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
310
0
    uint16_t* in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
311
    // tmp_dst is uint16_t to accommodate high bit depth content; 8bit will treat it as a uint8_t
312
    // buffer and will not use half of the buffer
313
0
    DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
314
315
0
    EbPictureBufferDesc* input_pic = is_16bit ? pcs->input_frame16bit : ppcs->enhanced_pic;
316
0
    EbPictureBufferDesc* recon_pic;
317
0
    svt_aom_get_recon_pic(pcs, &recon_pic, is_16bit);
318
319
0
    for (int pli = 0; pli < num_planes; pli++) {
320
0
        const int subsampling_x = (pli == 0) ? 0 : 1;
321
0
        const int subsampling_y = (pli == 0) ? 0 : 1;
322
0
        xdec[pli]               = subsampling_x;
323
0
        ydec[pli]               = subsampling_y;
324
        // The checks are stubs for 4:2:2 and 4:4:4 support
325
        // cppcheck-suppress knownConditionTrueFalse
326
0
        plane_bsize[pli] = subsampling_y ? (subsampling_x ? BLOCK_4X4 : BLOCK_8X4)
327
0
                                         : (subsampling_x ? BLOCK_4X8 : BLOCK_8X8);
328
0
        mi_wide_l2[pli]  = MI_SIZE_LOG2 - subsampling_x;
329
0
        mi_high_l2[pli]  = MI_SIZE_LOG2 - subsampling_y;
330
0
        src[pli]         = pcs->cdef_input_recon[pli];
331
0
        ref[pli]         = pcs->cdef_input_source[pli];
332
0
        stride_src[pli]  = pli == 0 ? recon_pic->y_stride : (pli == 1 ? recon_pic->u_stride : recon_pic->v_stride);
333
0
        stride_ref[pli]  = pli == 0 ? input_pic->y_stride : (pli == 1 ? input_pic->u_stride : input_pic->v_stride);
334
0
    }
335
336
    // Loop over all filter blocks (64x64)
337
0
    for (uint32_t fbr = y_b64_start_idx; fbr < y_b64_end_idx; ++fbr) {
338
0
        for (uint32_t fbc = x_b64_start_idx; fbc < x_b64_end_idx; ++fbc) {
339
0
            int32_t           dirinit = 0;
340
0
            const uint32_t    lc      = MI_SIZE_64X64 * fbc;
341
0
            const uint32_t    lr      = MI_SIZE_64X64 * fbr;
342
0
            int               nhb     = AOMMIN(MI_SIZE_64X64, mi_cols - lc);
343
0
            int               nvb     = AOMMIN(MI_SIZE_64X64, mi_rows - lr);
344
0
            int               hb_step = 1; //these should be all time with 64x64 SBs
345
0
            int               vb_step = 1;
346
0
            BlockSize         bs      = BLOCK_64X64;
347
0
            const MbModeInfo* mbmi    = pcs->mi_grid_base[lr * cm->mi_stride + lc];
348
0
            const BlockSize   bsize   = mbmi->bsize;
349
0
            if (((fbc & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_128X64)) ||
350
0
                ((fbr & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_64X128))) {
351
0
                continue;
352
0
            }
353
0
            if (bsize == BLOCK_128X128 || bsize == BLOCK_128X64 || bsize == BLOCK_64X128) {
354
0
                bs = bsize;
355
0
            }
356
357
0
            if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
358
0
                nhb     = AOMMIN(MI_SIZE_128X128, cm->mi_cols - lc);
359
0
                hb_step = 2;
360
0
            }
361
0
            if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
362
0
                nvb     = AOMMIN(MI_SIZE_128X128, cm->mi_rows - lr);
363
0
                vb_step = 2;
364
0
            }
365
0
            const uint32_t fb_idx = fbr * nhfb + fbc;
366
            // No filtering if the entire filter block is skipped
367
0
            cdef_count = svt_sb_compute_cdef_list(pcs, cm, lr, lc, dlist, bs);
368
0
            if (cdef_count == 0) {
369
0
                pcs->skip_cdef_seg[fb_idx] = 1;
370
0
                continue;
371
0
            }
372
0
            pcs->skip_cdef_seg[fb_idx] = 0;
373
374
0
            int32_t toff = CDEF_VBORDER * (fbr != 0);
375
0
            int32_t loff = CDEF_HBORDER * (fbc != 0);
376
0
            int32_t boff = CDEF_VBORDER * ((int32_t)fbr + vb_step < nvfb);
377
0
            int32_t roff = CDEF_HBORDER * ((int32_t)fbc + hb_step < nhfb);
378
379
0
            uint8_t (*dir)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fb_idx].dir;
380
0
            int32_t (*var)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fb_idx].var;
381
0
            for (int pli = 0; pli < num_planes; pli++) {
382
0
                int32_t ysize = (nvb << mi_high_l2[pli]) + boff + toff;
383
0
                int32_t xsize = (nhb << mi_wide_l2[pli]) + roff + loff;
384
                /* We avoid filtering the pixels for which some of the pixels to
385
                   average are outside the frame. We could change the filter instead,
386
                   but it would add special cases for any future vectorization.
387
                   Avoid memset'ting when dirty rect is inside the new one.
388
                   TODO: this could be further optimized - fill out only borders, separate buffers for Y & UV */
389
0
                bool need_to_reset = toff_prev > toff || loff_prev > loff || ysize < ysize_prev || xsize < xsize_prev;
390
0
                if (need_to_reset) {
391
0
                    uint16_t* p = &in[(-toff_prev * CDEF_BSTRIDE - loff_prev)];
392
0
                    for (int r = 0; r < ysize_prev; r++) {
393
0
                        svt_memset(p, (uint8_t)CDEF_VERY_LARGE, sizeof(p[0]) * xsize_prev);
394
0
                        p += CDEF_BSTRIDE;
395
0
                    }
396
0
                }
397
0
                toff_prev  = toff;
398
0
                loff_prev  = loff;
399
0
                ysize_prev = ysize;
400
0
                xsize_prev = xsize;
401
402
0
                svt_aom_copy_sb8_16(&in[(-toff * CDEF_BSTRIDE - loff)],
403
0
                                    CDEF_BSTRIDE,
404
0
                                    src[pli],
405
0
                                    (lr << mi_high_l2[pli]) - toff,
406
0
                                    (lc << mi_wide_l2[pli]) - loff,
407
0
                                    stride_src[pli],
408
0
                                    ysize,
409
0
                                    xsize,
410
0
                                    is_16bit);
411
412
0
                uint8_t subsampling_factor = cdef_ctrls->subsampling_factor;
413
                /*
414
                Cap the subsampling for certain block sizes.
415
416
                The intrinsics process several lines simultaneously, so blocks can only be subsampled
417
                a finite amount before there is no more speed gain.  If the space between processed lines
418
                is too large, the intrinsics will begin accessing memory outside the block.
419
                */
420
0
                switch (plane_bsize[pli]) {
421
0
                case BLOCK_8X8:
422
0
                    subsampling_factor = MIN(subsampling_factor, 4);
423
0
                    break;
424
0
                case BLOCK_8X4:
425
0
                case BLOCK_4X8:
426
0
                    subsampling_factor = MIN(subsampling_factor, 2);
427
0
                    break;
428
0
                case BLOCK_4X4:
429
0
                    subsampling_factor = MIN(subsampling_factor, 1);
430
0
                    break;
431
0
                }
432
433
                /* first cdef stage
434
                 * Perform the pri_filter strength search for the current sub_block
435
                 */
436
0
                for (int gi = 0; gi < first_pass_fs_num; gi++) {
437
                    // Check if chroma filter is set to be tested
438
0
                    if (pli && (cdef_ctrls->default_first_pass_fs_uv[gi] == -1)) {
439
0
                        pcs->mse_seg[1][fb_idx][gi] = default_mse_uv * 64;
440
0
                        continue;
441
0
                    }
442
443
0
                    int32_t pri_strength = cdef_ctrls->default_first_pass_fs[gi] / CDEF_SEC_STRENGTHS;
444
0
                    int32_t sec_strength = cdef_ctrls->default_first_pass_fs[gi] % CDEF_SEC_STRENGTHS;
445
446
0
                    svt_cdef_filter_fb(is_16bit ? NULL : (uint8_t*)tmp_dst,
447
0
                                       is_16bit ? tmp_dst : NULL,
448
0
                                       0,
449
0
                                       in,
450
0
                                       xdec[pli],
451
0
                                       ydec[pli],
452
0
                                       *dir,
453
0
                                       &dirinit,
454
0
                                       *var,
455
0
                                       pli,
456
0
                                       dlist,
457
0
                                       cdef_count,
458
0
                                       pri_strength,
459
0
                                       sec_strength + (sec_strength == 3),
460
0
                                       pri_damping,
461
0
                                       sec_damping,
462
0
                                       coeff_shift,
463
0
                                       subsampling_factor);
464
0
                    uint64_t curr_mse = compute_cdef_dist(
465
0
                        ref[pli],
466
0
                        (lr << mi_high_l2[pli]) * stride_ref[pli] + (lc << mi_wide_l2[pli]),
467
0
                        stride_ref[pli],
468
0
                        (uint8_t*)tmp_dst,
469
0
                        dlist,
470
0
                        cdef_count,
471
0
                        (BlockSize)plane_bsize[pli],
472
0
                        coeff_shift,
473
0
                        subsampling_factor,
474
0
                        is_16bit);
475
476
0
                    if (pli < 2) {
477
0
                        pcs->mse_seg[pli][fb_idx][gi] = curr_mse * subsampling_factor;
478
0
                    } else {
479
0
                        pcs->mse_seg[1][fb_idx][gi] += (curr_mse * subsampling_factor);
480
0
                    }
481
0
                }
482
483
                /* second cdef stage
484
                 * Perform the sec_filter strength search for the current sub_block
485
                 */
486
0
                for (int gi = first_pass_fs_num; gi < first_pass_fs_num + default_second_pass_fs_num; gi++) {
487
                    // Check if chroma filter is set to be tested
488
0
                    if (pli && (cdef_ctrls->default_second_pass_fs_uv[gi - first_pass_fs_num] == -1)) {
489
0
                        pcs->mse_seg[1][fb_idx][gi] = default_mse_uv * 64;
490
0
                        continue;
491
0
                    }
492
493
0
                    int32_t pri_strength = cdef_ctrls->default_second_pass_fs[gi - first_pass_fs_num] /
494
0
                        CDEF_SEC_STRENGTHS;
495
0
                    int32_t sec_strength = cdef_ctrls->default_second_pass_fs[gi - first_pass_fs_num] %
496
0
                        CDEF_SEC_STRENGTHS;
497
498
0
                    svt_cdef_filter_fb(is_16bit ? NULL : (uint8_t*)tmp_dst,
499
0
                                       is_16bit ? tmp_dst : NULL,
500
0
                                       0,
501
0
                                       in,
502
0
                                       xdec[pli],
503
0
                                       ydec[pli],
504
0
                                       *dir,
505
0
                                       &dirinit,
506
0
                                       *var,
507
0
                                       pli,
508
0
                                       dlist,
509
0
                                       cdef_count,
510
0
                                       pri_strength,
511
0
                                       sec_strength + (sec_strength == 3),
512
0
                                       pri_damping,
513
0
                                       sec_damping,
514
0
                                       coeff_shift,
515
0
                                       subsampling_factor);
516
0
                    uint64_t curr_mse = compute_cdef_dist(
517
0
                        ref[pli],
518
0
                        (lr << mi_high_l2[pli]) * stride_ref[pli] + (lc << mi_wide_l2[pli]),
519
0
                        stride_ref[pli],
520
0
                        (uint8_t*)tmp_dst,
521
0
                        dlist,
522
0
                        cdef_count,
523
0
                        (BlockSize)plane_bsize[pli],
524
0
                        coeff_shift,
525
0
                        subsampling_factor,
526
0
                        is_16bit);
527
528
0
                    if (pli < 2) {
529
0
                        pcs->mse_seg[pli][fb_idx][gi] = curr_mse * subsampling_factor;
530
0
                    } else {
531
0
                        pcs->mse_seg[1][fb_idx][gi] += (curr_mse * subsampling_factor);
532
0
                    }
533
0
                }
534
0
            }
535
0
        }
536
0
    }
537
0
}
538
539
/******************************************************
540
 * CDEF Kernel
541
 ******************************************************/
542
862
EbErrorType svt_aom_cdef_kernel_iter(void* context) {
543
    // Context & SCS & PCS
544
862
    CdefContext*        context_ptr = (CdefContext*)context;
545
862
    PictureControlSet*  pcs;
546
862
    SequenceControlSet* scs;
547
548
    //// Input
549
862
    EbObjectWrapper* dlf_results_wrapper;
550
862
    DlfResults*      dlf_results;
551
552
    //// Output
553
862
    EbObjectWrapper* cdef_results_wrapper;
554
555
862
    FrameHeader* frm_hdr;
556
557
    // Get DLF Results
558
862
    EB_GET_FULL_OBJECT(context_ptr->cdef_input_fifo_ptr, &dlf_results_wrapper);
559
560
431
    dlf_results                   = (DlfResults*)dlf_results_wrapper->object_ptr;
561
431
    pcs                           = (PictureControlSet*)dlf_results->pcs_wrapper->object_ptr;
562
431
    PictureParentControlSet* ppcs = pcs->ppcs;
563
431
    scs                           = pcs->scs;
564
565
431
    bool       is_16bit                   = scs->is_16bit_pipeline;
566
431
    Av1Common* cm                         = pcs->ppcs->av1_cm;
567
431
    frm_hdr                               = &pcs->ppcs->frm_hdr;
568
431
    CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls;
569
431
    if (!cdef_search_ctrls->use_reference_cdef_fs && !cdef_search_ctrls->use_qp_strength) {
570
0
        if (scs->seq_header.cdef_level && pcs->ppcs->cdef_level) {
571
0
            cdef_seg_search(pcs, scs, dlf_results->segment_index);
572
0
        }
573
0
    }
574
    //all seg based search is done. update total processed segments. if all done, finish the search and perfrom application.
575
431
    svt_block_on_mutex(pcs->cdef_search_mutex);
576
577
431
    pcs->tot_seg_searched_cdef++;
578
431
    if (pcs->tot_seg_searched_cdef == pcs->cdef_segments_total_count) {
579
431
        pcs->cdef_dist_dev = -1;
580
431
        if (scs->seq_header.cdef_level && pcs->ppcs->cdef_level) {
581
257
            finish_cdef_search(pcs);
582
257
            if (ppcs->enable_restoration || pcs->ppcs->is_ref || scs->static_config.recon_enabled) {
583
                // Do application iff there are non-zero filters
584
0
                if (frm_hdr->cdef_params.cdef_y_strength[0] != 0 || frm_hdr->cdef_params.cdef_uv_strength[0] != 0 ||
585
0
                    pcs->ppcs->nb_cdef_strengths != 1) {
586
0
                    svt_av1_cdef_frame(scs, pcs);
587
0
                }
588
0
            }
589
257
        } else {
590
174
            frm_hdr->cdef_params.cdef_bits           = 0;
591
174
            frm_hdr->cdef_params.cdef_y_strength[0]  = 0;
592
174
            pcs->ppcs->nb_cdef_strengths             = 1;
593
174
            frm_hdr->cdef_params.cdef_uv_strength[0] = 0;
594
174
        }
595
596
431
        if (pcs->ppcs->nb_cdef_strengths == 1 && frm_hdr->cdef_params.cdef_y_strength[0] == 0 &&
597
203
            frm_hdr->cdef_params.cdef_uv_strength[0] == 0) {
598
198
            pcs->cdef_dist_dev = 0;
599
198
        }
600
601
        //restoration prep
602
431
        bool is_lr = ppcs->enable_restoration && frm_hdr->allow_intrabc == 0;
603
431
        if (is_lr) {
604
0
            svt_av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1);
605
0
            if (is_16bit) {
606
0
                set_unscaled_input_16bit(pcs);
607
0
            }
608
0
        }
609
610
        // ------- start: Normative upscaling - super-resolution tool
611
431
        if (frm_hdr->allow_intrabc == 0 && pcs->ppcs->frame_superres_enabled) {
612
0
            svt_av1_superres_upscale_frame(cm, pcs, scs);
613
0
        }
614
431
        if (scs->static_config.resize_mode != RESIZE_NONE) {
615
0
            EbPictureBufferDesc* recon = NULL;
616
0
            svt_aom_get_recon_pic(pcs, &recon, is_16bit);
617
0
            recon->width  = pcs->ppcs->render_width;
618
0
            recon->height = pcs->ppcs->render_height;
619
0
            if (is_lr) {
620
0
                EbPictureBufferDesc* input_pic = is_16bit ? pcs->input_frame16bit : pcs->ppcs->enhanced_unscaled_pic;
621
622
0
                svt_aom_assert_err(pcs->scaled_input_pic == NULL, "pcs_ptr->scaled_input_pic is not desctoried!");
623
0
                EbPictureBufferDesc* scaled_input_pic = NULL;
624
                // downscale input picture if recon is resized
625
0
                bool is_resized = recon->width != input_pic->width || recon->height != input_pic->height;
626
0
                if (is_resized) {
627
0
                    superres_params_type spr_params = {recon->width, recon->height, 0};
628
0
                    svt_aom_downscaled_source_buffer_desc_ctor(&scaled_input_pic, input_pic, spr_params);
629
0
                    svt_aom_resize_frame(input_pic,
630
0
                                         scaled_input_pic,
631
0
                                         scs->static_config.encoder_bit_depth,
632
0
                                         av1_num_planes(&scs->seq_header.color_config),
633
0
                                         scs->subsampling_x,
634
0
                                         scs->subsampling_y,
635
0
                                         input_pic->packed_flag,
636
0
                                         PICTURE_BUFFER_DESC_FULL_MASK,
637
0
                                         0); // is_2bcompress
638
0
                    pcs->scaled_input_pic = scaled_input_pic;
639
0
                }
640
0
            }
641
0
        }
642
        // ------- end: Normative upscaling - super-resolution tool
643
644
431
        pcs->rest_segments_column_count = scs->rest_segment_column_count;
645
431
        pcs->rest_segments_row_count    = scs->rest_segment_row_count;
646
431
        pcs->rest_segments_total_count  = (uint16_t)(pcs->rest_segments_column_count * pcs->rest_segments_row_count);
647
431
        pcs->tot_seg_searched_rest      = 0;
648
431
        pcs->ppcs->av1_cm->use_boundaries_in_rest_search = scs->use_boundaries_in_rest_search;
649
431
        pcs->rest_extend_flag[0]                         = false;
650
431
        pcs->rest_extend_flag[1]                         = false;
651
431
        pcs->rest_extend_flag[2]                         = false;
652
653
431
        uint32_t segment_index;
654
862
        for (segment_index = 0; segment_index < pcs->rest_segments_total_count; ++segment_index) {
655
            // Get Empty Cdef Results to Rest
656
431
            svt_get_empty_object(context_ptr->cdef_output_fifo_ptr, &cdef_results_wrapper);
657
431
            CdefResults* cdef_results   = (struct CdefResults*)cdef_results_wrapper->object_ptr;
658
431
            cdef_results->pcs_wrapper   = dlf_results->pcs_wrapper;
659
431
            cdef_results->segment_index = segment_index;
660
            // Post Cdef Results
661
431
            svt_post_full_object(cdef_results_wrapper);
662
431
        }
663
431
    }
664
431
    svt_release_mutex(pcs->cdef_search_mutex);
665
666
    // Release Dlf Results
667
431
    svt_release_object(dlf_results_wrapper);
668
669
431
    return EB_ErrorNone;
670
862
}
671
672
431
void* svt_aom_cdef_kernel(void* input_ptr) {
673
431
    EbThreadContext* thread_ctx = (EbThreadContext*)input_ptr;
674
862
    for (;;) {
675
862
        EbErrorType err = svt_aom_cdef_kernel_iter(thread_ctx->priv);
676
862
        if (err == EB_NoErrorFifoShutdown) {
677
431
            return NULL;
678
431
        }
679
862
    }
680
0
    return NULL;
681
431
}