Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/enc_cdef.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 3-Clause Clear License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at https://www.aomedia.org/license. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
 */
11
#include <stdio.h>
12
#include <stdlib.h>
13
#include <math.h>
14
#include <string.h>
15
16
#include "enc_cdef.h"
17
#include <stdint.h>
18
#include "aom_dsp_rtcd.h"
19
#include "svt_log.h"
20
#include "rd_cost.h"
21
#include "rc_process.h"
22
23
static INLINE uint64_t mse_8xn_16bit_c(const uint16_t* src, const uint16_t* dst, const int32_t dstride,
24
0
                                       const int32_t height, uint8_t subsampling_factor) {
25
0
    uint64_t sum = 0;
26
0
    int32_t  i, j;
27
0
    for (i = 0; i < height; i += subsampling_factor) {
28
0
        for (j = 0; j < 8; j++) {
29
0
            int32_t e = dst[i * dstride + j] - src[8 * i + j];
30
0
            sum += e * e;
31
0
        }
32
0
    }
33
0
    return sum;
34
0
}
35
36
static INLINE uint64_t mse_4xn_16bit_c(const uint16_t* src, const uint16_t* dst, const int32_t dstride,
37
0
                                       const int32_t height, uint8_t subsampling_factor) {
38
0
    uint64_t sum = 0;
39
0
    int32_t  i, j;
40
0
    for (i = 0; i < height; i += subsampling_factor) {
41
0
        for (j = 0; j < 4; j++) {
42
0
            int32_t e = dst[i * dstride + j] - src[4 * i + j];
43
0
            sum += e * e;
44
0
        }
45
0
    }
46
0
    return sum;
47
0
}
48
49
static INLINE uint64_t mse_8xn_8bit_c(const uint8_t* src, const uint8_t* dst, const int32_t dstride,
50
0
                                      const int32_t height, uint8_t subsampling_factor) {
51
0
    uint64_t sum = 0;
52
0
    int32_t  i, j;
53
0
    for (i = 0; i < height; i += subsampling_factor) {
54
0
        for (j = 0; j < 8; j++) {
55
0
            int32_t e = dst[i * dstride + j] - src[8 * i + j];
56
0
            sum += e * e;
57
0
        }
58
0
    }
59
0
    return sum;
60
0
}
61
62
static INLINE uint64_t mse_4xn_8bit_c(const uint8_t* src, const uint8_t* dst, const int32_t dstride,
63
0
                                      const int32_t height, uint8_t subsampling_factor) {
64
0
    uint64_t sum = 0;
65
0
    int32_t  i, j;
66
0
    for (i = 0; i < height; i += subsampling_factor) {
67
0
        for (j = 0; j < 4; j++) {
68
0
            int32_t e = dst[i * dstride + j] - src[4 * i + j];
69
0
            sum += e * e;
70
0
        }
71
0
    }
72
0
    return sum;
73
0
}
74
75
/* Compute MSE only on the blocks we filtered. */
76
uint64_t svt_aom_compute_cdef_dist_16bit_c(const uint16_t* dst, int32_t dstride, const uint16_t* src,
77
                                           const CdefList* dlist, int32_t cdef_count, BlockSize bsize,
78
0
                                           int32_t coeff_shift, uint8_t subsampling_factor) {
79
0
    uint64_t sum = 0;
80
0
    int32_t  bi, bx, by;
81
0
    if (bsize == BLOCK_8X8) {
82
0
        for (bi = 0; bi < cdef_count; bi++) {
83
0
            by = dlist[bi].by;
84
0
            bx = dlist[bi].bx;
85
0
            sum += mse_8xn_16bit_c(
86
0
                &src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride, 8, subsampling_factor);
87
0
        }
88
0
    } else if (bsize == BLOCK_4X8) {
89
0
        for (bi = 0; bi < cdef_count; bi++) {
90
0
            by = dlist[bi].by;
91
0
            bx = dlist[bi].bx;
92
0
            sum += mse_4xn_16bit_c(
93
0
                &src[bi << (3 + 2)], &dst[(by << 3) * dstride + (bx << 2)], dstride, 8, subsampling_factor);
94
0
        }
95
0
    } else if (bsize == BLOCK_8X4) {
96
0
        for (bi = 0; bi < cdef_count; bi++) {
97
0
            by = dlist[bi].by;
98
0
            bx = dlist[bi].bx;
99
0
            sum += mse_8xn_16bit_c(
100
0
                &src[bi << (2 + 3)], &dst[(by << 2) * dstride + (bx << 3)], dstride, 4, subsampling_factor);
101
0
        }
102
0
    } else {
103
0
        assert(bsize == BLOCK_4X4);
104
0
        for (bi = 0; bi < cdef_count; bi++) {
105
0
            by = dlist[bi].by;
106
0
            bx = dlist[bi].bx;
107
0
            sum += mse_4xn_16bit_c(
108
0
                &src[bi << (2 + 2)], &dst[(by << 2) * dstride + (bx << 2)], dstride, 4, subsampling_factor);
109
0
        }
110
0
    }
111
0
    return sum >> 2 * coeff_shift;
112
0
}
113
114
uint64_t svt_aom_compute_cdef_dist_8bit_c(const uint8_t* dst8, int32_t dstride, const uint8_t* src8,
115
                                          const CdefList* dlist, int32_t cdef_count, BlockSize bsize,
116
0
                                          int32_t coeff_shift, uint8_t subsampling_factor) {
117
0
    uint64_t sum = 0;
118
0
    int32_t  bi, bx, by;
119
0
    if (bsize == BLOCK_8X8) {
120
0
        for (bi = 0; bi < cdef_count; bi++) {
121
0
            by = dlist[bi].by;
122
0
            bx = dlist[bi].bx;
123
0
            sum += mse_8xn_8bit_c(
124
0
                &src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride, 8, subsampling_factor);
125
0
        }
126
0
    } else if (bsize == BLOCK_4X8) {
127
0
        for (bi = 0; bi < cdef_count; bi++) {
128
0
            by = dlist[bi].by;
129
0
            bx = dlist[bi].bx;
130
0
            sum += mse_4xn_8bit_c(
131
0
                &src8[bi << (3 + 2)], &dst8[(by << 3) * dstride + (bx << 2)], dstride, 8, subsampling_factor);
132
0
        }
133
0
    } else if (bsize == BLOCK_8X4) {
134
0
        for (bi = 0; bi < cdef_count; bi++) {
135
0
            by = dlist[bi].by;
136
0
            bx = dlist[bi].bx;
137
0
            sum += mse_8xn_8bit_c(
138
0
                &src8[bi << (2 + 3)], &dst8[(by << 2) * dstride + (bx << 3)], dstride, 4, subsampling_factor);
139
0
        }
140
0
    } else {
141
0
        assert(bsize == BLOCK_4X4);
142
0
        for (bi = 0; bi < cdef_count; bi++) {
143
0
            by = dlist[bi].by;
144
0
            bx = dlist[bi].bx;
145
0
            sum += mse_4xn_8bit_c(
146
0
                &src8[bi << (2 + 2)], &dst8[(by << 2) * dstride + (bx << 2)], dstride, 4, subsampling_factor);
147
0
        }
148
0
    }
149
0
    return sum >> 2 * coeff_shift;
150
0
}
151
152
3.55k
static int32_t svt_sb_all_skip(PictureControlSet* pcs, const Av1Common* const cm, int32_t mi_row, int32_t mi_col) {
153
3.55k
    int32_t maxc, maxr;
154
3.55k
    maxc = cm->mi_cols - mi_col;
155
3.55k
    maxr = cm->mi_rows - mi_row;
156
157
3.55k
    maxr = AOMMIN(maxr, MI_SIZE_64X64);
158
3.55k
    maxc = AOMMIN(maxc, MI_SIZE_64X64);
159
160
8.13k
    for (int32_t r = 0; r < maxr; r++) {
161
70.0k
        for (int32_t c = 0; c < maxc; c++) {
162
65.4k
            if (!(pcs->mi_grid_base[(mi_row + r) * pcs->mi_stride + mi_col + c]->block_mi.skip)) {
163
3.21k
                return 0;
164
3.21k
            }
165
65.4k
        }
166
7.79k
    }
167
336
    return 1;
168
3.55k
}
169
170
int32_t svt_sb_compute_cdef_list(PictureControlSet* pcs, const Av1Common* const cm, int32_t mi_row, int32_t mi_col,
171
0
                                 CdefList* dlist, BlockSize bs) {
172
0
    MbModeInfo** grid      = pcs->mi_grid_base;
173
0
    int32_t      mi_stride = pcs->mi_stride;
174
175
0
    int32_t maxc = cm->mi_cols - mi_col;
176
0
    int32_t maxr = cm->mi_rows - mi_row;
177
178
0
    if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
179
0
        maxc = AOMMIN(maxc, MI_SIZE_128X128);
180
0
    } else {
181
0
        maxc = AOMMIN(maxc, MI_SIZE_64X64);
182
0
    }
183
0
    if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
184
0
        maxr = AOMMIN(maxr, MI_SIZE_128X128);
185
0
    } else {
186
0
        maxr = AOMMIN(maxr, MI_SIZE_64X64);
187
0
    }
188
189
0
    const int32_t r_step  = mi_size_high[BLOCK_8X8];
190
0
    const int32_t c_step  = mi_size_wide[BLOCK_8X8];
191
0
    const int32_t r_shift = (r_step == 2);
192
0
    const int32_t c_shift = (c_step == 2);
193
194
0
    assert(r_step == 1 || r_step == 2);
195
0
    assert(c_step == 1 || c_step == 2);
196
197
0
    int32_t count = 0;
198
0
    for (int32_t r = 0; r < maxr; r += r_step) {
199
0
        for (int32_t c = 0; c < maxc; c += c_step) {
200
0
            if (!grid[(mi_row + r) * mi_stride + (mi_col + c)]->block_mi.skip ||
201
0
                !grid[(mi_row + r) * mi_stride + (mi_col + c + 1)]->block_mi.skip ||
202
0
                !grid[(mi_row + r + 1) * mi_stride + (mi_col + c)]->block_mi.skip ||
203
0
                !grid[(mi_row + r + 1) * mi_stride + (mi_col + c + 1)]->block_mi.skip) {
204
0
                dlist[count].by = (uint8_t)(r >> r_shift);
205
0
                dlist[count].bx = (uint8_t)(c >> c_shift);
206
0
                count++;
207
0
            }
208
0
        }
209
0
    }
210
0
    return count;
211
0
}
212
213
0
static inline void svt_aom_fill_rect(uint16_t* dst, int32_t dstride, int32_t v, int32_t h, uint16_t x) {
214
0
    for (int32_t i = 0; i < v; i++) {
215
0
        for (int32_t j = 0; j < h; j++) {
216
0
            dst[i * dstride + j] = x;
217
0
        }
218
0
    }
219
0
}
220
221
static inline void svt_aom_copy_rect(uint16_t* dst, int32_t dstride, const uint16_t* src, int32_t sstride, int32_t v,
222
0
                                     int32_t h) {
223
0
    for (int32_t i = 0; i < v; i++) {
224
0
        svt_memcpy(dst, src, sizeof(dst[0]) * h);
225
0
        dst += dstride;
226
0
        src += sstride;
227
0
    }
228
0
}
229
230
/*
231
Loop over all 64x64 filter blocks and perform the CDEF filtering for each block, using
232
the filter strength pairs chosen in finish_cdef_search().
233
*/
234
0
void svt_av1_cdef_frame(SequenceControlSet* scs, PictureControlSet* pcs) {
235
0
    PictureParentControlSet* ppcs     = pcs->ppcs;
236
0
    Av1Common*               cm       = ppcs->av1_cm;
237
0
    FrameHeader*             frm_hdr  = &ppcs->frm_hdr;
238
0
    bool                     is_16bit = scs->is_16bit_pipeline;
239
240
0
    EbPictureBufferDesc* recon_pic;
241
0
    svt_aom_get_recon_pic(pcs, &recon_pic, is_16bit);
242
243
0
    const int32_t num_planes = av1_num_planes(&scs->seq_header.color_config);
244
0
    DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
245
0
    uint16_t*      linebuf[3];
246
0
    uint16_t*      colbuf[3];
247
0
    CdefList       dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
248
0
    uint8_t *      row_cdef, *prev_row_cdef, *curr_row_cdef;
249
0
    int32_t        cdef_count;
250
0
    const uint32_t sb_size = scs->super_block_size;
251
0
    int32_t        mi_wide_l2[3];
252
0
    int32_t        mi_high_l2[3];
253
0
    int32_t        xdec[3];
254
0
    int32_t        ydec[3];
255
0
    int32_t        coeff_shift = AOMMAX(scs->static_config.encoder_bit_depth - 8, 0);
256
0
    const int32_t  nvfb        = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
257
0
    const int32_t  nhfb        = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
258
0
    const uint32_t cdef_size   = sizeof(*row_cdef) * (nhfb + 2) * 2;
259
260
0
    row_cdef = (uint8_t*)svt_aom_malloc(cdef_size);
261
0
    assert(row_cdef != NULL);
262
0
    memset(row_cdef, 1, cdef_size);
263
0
    prev_row_cdef = row_cdef + 1;
264
0
    curr_row_cdef = prev_row_cdef + nhfb + 2;
265
0
    for (int32_t pli = 0; pli < num_planes; pli++) {
266
0
        int32_t subsampling_x = (pli == 0) ? 0 : 1;
267
0
        int32_t subsampling_y = (pli == 0) ? 0 : 1;
268
0
        xdec[pli]             = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
269
0
        ydec[pli]             = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
270
0
        mi_wide_l2[pli]       = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
271
0
        mi_high_l2[pli]       = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
272
0
    }
273
274
0
    const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
275
0
    for (int32_t pli = 0; pli < num_planes; pli++) {
276
0
        linebuf[pli] = (uint16_t*)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
277
0
        colbuf[pli]  = (uint16_t*)svt_aom_malloc(
278
0
            sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * CDEF_HBORDER);
279
0
    }
280
0
#if OPT_CDEF_SKIP_CHROMA_BORDER
281
    // Frame-level check: if every UV strength entry is 0, no chroma block
282
    // will ever be filtered.  In that case skip all chroma border copies
283
    // (including linebuf/colbuf saves) for the entire frame
284
0
    bool chroma_filter_off = (num_planes > 1);
285
0
    if (chroma_filter_off) {
286
0
        for (int32_t i = 0; i < ppcs->nb_cdef_strengths; i++) {
287
0
            if (frm_hdr->cdef_params.cdef_uv_strength[i] != 0) {
288
0
                chroma_filter_off = false;
289
0
                break;
290
0
            }
291
0
        }
292
0
    }
293
0
    const int32_t active_planes = chroma_filter_off ? 1 : num_planes;
294
0
#endif
295
0
    for (int32_t fbr = 0; fbr < nvfb; fbr++) {
296
0
        int32_t cdef_left = 1;
297
0
        for (int32_t fbc = 0; fbc < nhfb; fbc++) {
298
0
            int32_t level, sec_strength;
299
0
            int32_t uv_level, uv_sec_strength;
300
0
            int32_t nhb, nvb;
301
0
            int32_t cstart     = 0;
302
0
            curr_row_cdef[fbc] = 0;
303
0
            assert(pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] != NULL &&
304
0
                   "CDEF ERROR: Skipping Current FB");
305
0
            assert(pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength != -1 &&
306
0
                   "CDEF ERROR: Skipping Current FB");
307
0
            if (!cdef_left) {
308
0
                cstart =
309
0
                    -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input.
310
0
            }
311
312
0
            nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
313
0
            nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
314
0
            int32_t frame_top, frame_left, frame_bottom, frame_right;
315
316
0
            int32_t mi_row = MI_SIZE_64X64 * fbr;
317
0
            int32_t mi_col = MI_SIZE_64X64 * fbc;
318
            // for the current filter block, it's top left corner mi structure (mi_tl)
319
            // is first accessed to check whether the top and left boundaries are
320
            // frame boundaries. Then bottom-left and top-right mi structures are
321
            // accessed to check whether the bottom and right boundaries
322
            // (respectively) are frame boundaries.
323
            //
324
            // Note that we can't just check the bottom-right mi structure - eg. if
325
            // we're at the right-hand edge of the frame but not the bottom, then
326
            // the bottom-right mi is NULL but the bottom-left is not.
327
0
            frame_top  = (mi_row == 0) ? 1 : 0;
328
0
            frame_left = (mi_col == 0) ? 1 : 0;
329
330
0
            if (fbr != nvfb - 1) {
331
0
                frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
332
0
            } else {
333
0
                frame_bottom = 1;
334
0
            }
335
336
0
            if (fbc != nhfb - 1) {
337
0
                frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
338
0
            } else {
339
0
                frame_right = 1;
340
0
            }
341
342
            // Find the index of the CDEF strength for the filter block
343
0
            const int32_t mbmi_cdef_strength =
344
0
                pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength;
345
0
            level        = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
346
0
            sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
347
            // Secondary luma strength takes values in {0, 1, 2, 4}. If sec_strength is equal to 3 from the step above, change it to 4.
348
0
            sec_strength += sec_strength == 3;
349
            // Set primary and secondary chroma strengths.
350
0
            uv_level        = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
351
0
            uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
352
            // Secondary chroma strength takes values in {0, 1, 2, 4}. If sec_strength is equal to 3 from the step above, change it to 4.
353
0
            uv_sec_strength += uv_sec_strength == 3;
354
0
            if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
355
0
                (cdef_count = svt_sb_compute_cdef_list(
356
0
                     pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
357
0
                cdef_left = 0;
358
0
                continue;
359
0
            }
360
361
0
            int dirinit = !(ppcs->cdef_search_ctrls.use_reference_cdef_fs || ppcs->cdef_search_ctrls.use_qp_strength);
362
            // When SB 128 is used, the search for certain blocks is skipped, so dir/var info is not generated
363
            // In those cases, must generate info here
364
0
            if (sb_size == 128) {
365
0
                const uint32_t    lc    = MI_SIZE_64X64 * fbc;
366
0
                const uint32_t    lr    = MI_SIZE_64X64 * fbr;
367
0
                const MbModeInfo* mbmi  = pcs->mi_grid_base[lr * cm->mi_stride + lc];
368
0
                const BlockSize   bsize = mbmi->bsize;
369
0
                if (((fbc & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_128X64)) ||
370
0
                    ((fbr & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_64X128))) {
371
0
                    dirinit = 0;
372
0
                }
373
0
            }
374
0
            uint8_t (*dir)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fbr * nhfb + fbc].dir;
375
0
            int32_t (*var)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fbr * nhfb + fbc].var;
376
0
            curr_row_cdef[fbc]                         = 1;
377
0
#if OPT_CDEF_SKIP_CHROMA_BORDER
378
0
            for (int32_t pli = 0; pli < active_planes; pli++) {
379
#else
380
            for (int32_t pli = 0; pli < num_planes; pli++) {
381
#endif
382
0
                int32_t coffset;
383
0
                int32_t rend, cend;
384
0
                int32_t pri_damping = frm_hdr->cdef_params.cdef_damping;
385
0
                int32_t sec_damping = pri_damping;
386
0
                int32_t hsize       = nhb << mi_wide_l2[pli];
387
0
                int32_t vsize       = nvb << mi_high_l2[pli];
388
0
                if (fbc == nhfb - 1) {
389
0
                    cend = hsize;
390
0
                } else {
391
0
                    cend = hsize + CDEF_HBORDER;
392
0
                }
393
394
0
                if (fbr == nvfb - 1) {
395
0
                    rend = vsize;
396
0
                } else {
397
0
                    rend = vsize + CDEF_VBORDER;
398
0
                }
399
400
0
                coffset             = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
401
0
                EbByte   rec_buff   = recon_pic->buffer[pli];
402
0
                uint32_t rec_stride = recon_pic->stride[pli];
403
0
                if (pli) {
404
0
                    level        = uv_level;
405
0
                    sec_strength = uv_sec_strength;
406
0
                }
407
0
#if OPT_CDEF_PER_PLANE_SKIP
408
                // Per-plane elision: when this plane's strength is (0,0), the
409
                // filter is a no-op and the only purpose of the per-plane src[]
410
                // border-copy machinery is to feed colbuf[pli] / linebuf[pli]
411
                // for neighbours (right, below). Since the recon for this plane
412
                // is unmodified, we can source those saves directly from
413
                // rec_buff and skip the whole src[] dance.
414
                //
415
                // For luma (pli=0) we additionally require dirinit==1 so that
416
                // dir/var are already populated by the search; if dirinit==0
417
                // (use_reference_cdef_fs / use_qp_strength), svt_cdef_filter_fb
418
                // must still run to populate dir for subsequent chroma planes.
419
0
                if (level == 0 && sec_strength == 0 && (pli != 0 || dirinit) && fbc == nhfb - 1) {
420
                    // Save linebuf[pli] (bottom edge for the FB below) from rec_buff.
421
0
                    if (fbr < nvfb - 1) {
422
0
                        svt_aom_copy_sb8_16(&linebuf[pli][coffset],
423
0
                                            stride,
424
0
                                            rec_buff,
425
0
                                            (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
426
0
                                            coffset,
427
0
                                            rec_stride,
428
0
                                            CDEF_VBORDER,
429
0
                                            hsize,
430
0
                                            is_16bit);
431
0
                    }
432
                    // Save colbuf[pli] (right edge for the FB to the right) from rec_buff.
433
                    // colbuf layout: rend+VBORDER rows x HBORDER cols, mirroring the
434
                    // standard save which reads from src[] rows 0..rend+VBORDER-1 cols
435
                    // hsize..hsize+HBORDER-1 (== rec_buff cols coffset+hsize-HBORDER..coffset+hsize-1).
436
                    //
437
                    // - For fbr > 0: copy the full rend+VBORDER rows starting VBORDER above the FB.
438
                    // - For fbr == 0: the top VBORDER rows of colbuf are read by the right
439
                    //   neighbour but immediately overwritten by its frame_top fill, so we
440
                    //   skip them (rec_buff has no rows above 0).
441
0
                    if (fbc < nhfb - 1) {
442
0
                        const int32_t row_top     = (fbr == 0) ? 0 : -CDEF_VBORDER;
443
0
                        const int32_t num_rows    = (fbr == 0) ? rend : (rend + CDEF_VBORDER);
444
0
                        const int32_t dst_row_off = (fbr == 0) ? CDEF_VBORDER : 0;
445
0
                        svt_aom_copy_sb8_16(colbuf[pli] + dst_row_off * CDEF_HBORDER,
446
0
                                            CDEF_HBORDER,
447
0
                                            rec_buff,
448
0
                                            (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr + row_top,
449
0
                                            coffset + hsize - CDEF_HBORDER,
450
0
                                            rec_stride,
451
0
                                            num_rows,
452
0
                                            CDEF_HBORDER,
453
0
                                            is_16bit);
454
0
                    }
455
0
                    continue;
456
0
                }
457
0
#endif
458
459
                /* Copy in the pixels we need from the current superblock for
460
                   deringing.*/
461
0
                svt_aom_copy_sb8_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
462
0
                                    CDEF_BSTRIDE,
463
0
                                    rec_buff,
464
0
                                    (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr,
465
0
                                    coffset + cstart,
466
0
                                    rec_stride,
467
0
                                    rend,
468
0
                                    cend - cstart,
469
0
                                    is_16bit);
470
0
                if (!prev_row_cdef[fbc]) {
471
0
                    svt_aom_copy_sb8_16(&src[CDEF_HBORDER],
472
0
                                        CDEF_BSTRIDE,
473
0
                                        rec_buff,
474
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
475
0
                                        coffset,
476
0
                                        rec_stride,
477
0
                                        CDEF_VBORDER,
478
0
                                        hsize,
479
0
                                        is_16bit);
480
0
                } else if (fbr > 0) {
481
0
                    svt_aom_copy_rect(
482
0
                        &src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset], stride, CDEF_VBORDER, hsize);
483
0
                } else {
484
0
                    svt_aom_fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE);
485
0
                }
486
487
0
                if (!prev_row_cdef[fbc - 1]) {
488
0
                    svt_aom_copy_sb8_16(src,
489
0
                                        CDEF_BSTRIDE,
490
0
                                        rec_buff,
491
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
492
0
                                        coffset - CDEF_HBORDER,
493
0
                                        rec_stride,
494
0
                                        CDEF_VBORDER,
495
0
                                        CDEF_HBORDER,
496
0
                                        is_16bit);
497
0
                } else if (fbr > 0 && fbc > 0) {
498
0
                    svt_aom_copy_rect(
499
0
                        src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER], stride, CDEF_VBORDER, CDEF_HBORDER);
500
0
                } else {
501
0
                    svt_aom_fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
502
0
                }
503
504
0
                if (!prev_row_cdef[fbc + 1]) {
505
0
                    svt_aom_copy_sb8_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
506
0
                                        CDEF_BSTRIDE,
507
0
                                        rec_buff,
508
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
509
0
                                        coffset + hsize,
510
0
                                        rec_stride,
511
0
                                        CDEF_VBORDER,
512
0
                                        CDEF_HBORDER,
513
0
                                        is_16bit);
514
0
                } else if (fbr > 0 && fbc < nhfb - 1) {
515
0
                    svt_aom_copy_rect(&src[hsize + CDEF_HBORDER],
516
0
                                      CDEF_BSTRIDE,
517
0
                                      &linebuf[pli][coffset + hsize],
518
0
                                      stride,
519
0
                                      CDEF_VBORDER,
520
0
                                      CDEF_HBORDER);
521
0
                } else {
522
0
                    svt_aom_fill_rect(
523
0
                        &src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
524
0
                }
525
526
0
                if (cdef_left) {
527
                    /* If we deringed the superblock on the left then we need to copy in
528
                       saved pixels. */
529
0
                    svt_aom_copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER, rend + CDEF_VBORDER, CDEF_HBORDER);
530
0
                }
531
532
                /* Saving pixels in case we need to dering the superblock on the
533
                    right. */
534
0
                if (fbc < nhfb - 1) {
535
0
                    svt_aom_copy_rect(
536
0
                        colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE, rend + CDEF_VBORDER, CDEF_HBORDER);
537
0
                }
538
539
0
                if (fbr < nvfb - 1) {
540
0
                    svt_aom_copy_sb8_16(&linebuf[pli][coffset],
541
0
                                        stride,
542
0
                                        rec_buff,
543
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
544
0
                                        coffset,
545
0
                                        rec_stride,
546
0
                                        CDEF_VBORDER,
547
0
                                        hsize,
548
0
                                        is_16bit);
549
0
                }
550
551
0
                if (frame_top) {
552
0
                    svt_aom_fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
553
0
                }
554
0
                if (frame_left) {
555
0
                    svt_aom_fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
556
0
                }
557
0
                if (frame_bottom) {
558
0
                    svt_aom_fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE],
559
0
                                      CDEF_BSTRIDE,
560
0
                                      CDEF_VBORDER,
561
0
                                      hsize + 2 * CDEF_HBORDER,
562
0
                                      CDEF_VERY_LARGE);
563
0
                }
564
0
                if (frame_right) {
565
0
                    svt_aom_fill_rect(&src[hsize + CDEF_HBORDER],
566
0
                                      CDEF_BSTRIDE,
567
0
                                      vsize + 2 * CDEF_VBORDER,
568
0
                                      CDEF_HBORDER,
569
0
                                      CDEF_VERY_LARGE);
570
0
                }
571
                // if ppcs->cdef_ctrls.use_reference_cdef_fs is true, then search was not performed
572
                // Therefore, need to make sure dir and var are initialized
573
0
                if (level || sec_strength || !dirinit) {
574
0
                    svt_cdef_filter_fb(
575
0
                        is_16bit ? NULL
576
0
                                 : &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
577
0
                                             (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
578
0
                        is_16bit ? &((uint16_t*)rec_buff)[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
579
0
                                                          (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])]
580
0
                                 : NULL,
581
0
                        rec_stride,
582
0
                        &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER],
583
0
                        xdec[pli],
584
0
                        ydec[pli],
585
0
                        *dir,
586
0
                        &dirinit,
587
0
                        *var,
588
0
                        pli,
589
0
                        dlist,
590
0
                        cdef_count,
591
0
                        level,
592
0
                        sec_strength,
593
0
                        pri_damping,
594
0
                        sec_damping,
595
0
                        coeff_shift,
596
0
                        1); // no subsampling
597
0
                }
598
0
            }
599
0
            cdef_left = 1; //CHKN filtered data is written back directy to recFrame.
600
0
        }
601
0
        {
602
0
            uint8_t* tmp  = prev_row_cdef;
603
0
            prev_row_cdef = curr_row_cdef;
604
0
            curr_row_cdef = tmp;
605
0
        }
606
0
    }
607
0
    svt_aom_free(row_cdef);
608
0
    for (int32_t pli = 0; pli < num_planes; pli++) {
609
0
        svt_aom_free(linebuf[pli]);
610
0
        svt_aom_free(colbuf[pli]);
611
0
    }
612
0
}
613
614
///-------search
615
/*
616
 * Search for the best luma+chroma strength to add as an option, knowing we
617
 * already selected nb_strengths options
618
 *
619
 * Params:
620
 *
621
 * lev0 : Array of indices of selected luma strengths.
622
 * lev1 : Array of indices of selected chroma strengths.
623
 * nb_strengths : Number of selected (Luma_strength, Chroma_strength) pairs.
624
 * mse : Array of luma and chroma filtering mse values.
625
 * sb_count : Number of filter blocks in the frame.
626
 * start_gi : starting strength index for the search of the additional strengths.
627
 * end_gi : End index for the for the search of the additional strengths.
628
*/
629
uint64_t svt_search_one_dual_c(int* lev0, int* lev1, int nb_strengths, uint64_t** mse[2], int sb_count, int start_gi,
630
0
                               int end_gi) {
631
0
    uint64_t      tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
632
0
    int32_t       i, j;
633
0
    uint64_t      best_tot_mse    = (uint64_t)1 << 63;
634
0
    int32_t       best_id0        = 0;
635
0
    int32_t       best_id1        = 0;
636
0
    const int32_t total_strengths = end_gi;
637
0
    memset(tot_mse, 0, sizeof(tot_mse));
638
    /* Loop over the filter blocks in the frame */
639
0
    for (i = 0; i < sb_count; i++) {
640
0
        int32_t  gi;
641
0
        uint64_t best_mse = (uint64_t)1 << 63;
642
        /* Loop over the already selected nb_strengths (Luma_strength,
643
           Chroma_strength) pairs, and find the pair that has the smallest mse
644
           (best_mse) for the current filter block.*/
645
        /* Find best mse among already selected options. */
646
0
        for (gi = 0; gi < nb_strengths; gi++) {
647
0
            uint64_t curr = mse[0][i][lev0[gi]];
648
0
            curr += mse[1][i][lev1[gi]];
649
0
            if (curr < best_mse) {
650
0
                best_mse = curr;
651
0
            }
652
0
        }
653
        /* Loop over the set of available (Luma_strength, Chroma_strength)
654
           pairs, identify any that provide an mse better than best_mse from the
655
           step above for the current filter block, and update any corresponding
656
           total mse (tot_mse[j][k]). */
657
        /* Find best mse when adding each possible new option. */
658
0
        for (j = start_gi; j < total_strengths; j++) {
659
0
            int32_t k;
660
0
            for (k = start_gi; k < total_strengths; k++) {
661
0
                uint64_t best = best_mse;
662
0
                uint64_t curr = mse[0][i][j];
663
0
                curr += mse[1][i][k];
664
0
                if (curr < best) {
665
0
                    best = curr;
666
0
                }
667
0
                tot_mse[j][k] += best;
668
0
            }
669
0
        }
670
0
    }
671
    /* Loop over the additionally searched (Luma_strength, Chroma_strength) pairs
672
       from the step above, and identify any such pair that provided the best mse for
673
       the whole frame. The identified pair would be added to the set of already selected pairs. */
674
0
    for (j = start_gi; j < total_strengths; j++) { // Loop over the additionally searched luma strengths
675
0
        int32_t k;
676
0
        for (k = start_gi; k < total_strengths; k++) { // Loop over the additionally searched chroma strengths
677
0
            if (tot_mse[j][k] < best_tot_mse) {
678
0
                best_tot_mse = tot_mse[j][k];
679
0
                best_id0     = j; // index for the best luma strength
680
0
                best_id1     = k; // index for the best chroma strength
681
0
            }
682
0
        }
683
0
    }
684
0
    lev0[nb_strengths] = best_id0; // Add the identified luma strength to the list of selected luma strengths
685
0
    lev1[nb_strengths] = best_id1; // Add the identified chroma strength to the list of selected chroma strengths
686
0
    return best_tot_mse;
687
0
}
688
689
/*
690
 * Search for the set of luma+chroma strengths that minimizes mse.
691
 *
692
 * Params:
693
 *
694
 * best_lev0 : Array of indices of selected luma strengths.
695
 * best_lev1 : Array of indices of selected chroma strengths.
696
 * nb_strengths : Number of selected (Luma_strength, Chroma_strength) pairs.
697
 * mse : Array of luma and chroma filtering mse values.
698
 * sb_count : Number of filter blocks in the frame.
699
 * start_gi : starting strength index for the search of the additional strengths.
700
 * end_gi : End index for the for the search of the additional strengths.
701
*/
702
static uint64_t joint_strength_search_dual(int32_t* best_lev0, int32_t* best_lev1, int32_t nb_strengths,
703
0
                                           uint64_t** mse[2], int32_t sb_count, int32_t start_gi, int32_t end_gi) {
704
0
    uint64_t best_tot_mse;
705
0
    int32_t  i;
706
0
    best_tot_mse = (uint64_t)1 << 63;
707
    /* Greedy search: add one strength options at a time.
708
709
    Determine nb_strengths (Luma_strength, Chroma_strength) pairs.
710
    The list of nb_strengths pairs is determined by adding one such pair at
711
    a time through the call to the function search_one_dual. When the
712
    function search_one_dual is called, the search accounts for the
713
    strength pairs that have already been added in the previous iteration of
714
    the loop below. The loop below returns in the end best_tot_mse
715
    representing the best filtering mse for the whole frame based on the
716
    selected list of best (Luma_strength, Chroma_strength) pairs.
717
    */
718
0
    for (i = 0; i < nb_strengths; i++) {
719
0
        best_tot_mse = svt_search_one_dual(best_lev0, best_lev1, i, mse, sb_count, start_gi, end_gi);
720
0
    }
721
    /* Performing further refinements on the search based on the results
722
    from the step above. Trying to refine the greedy search by reconsidering each
723
    already-selected option. */
724
0
    for (i = 0; i < 4 * nb_strengths; i++) {
725
0
        int32_t j;
726
0
        for (j = 0; j < nb_strengths - 1; j++) {
727
0
            best_lev0[j] = best_lev0[j + 1];
728
0
            best_lev1[j] = best_lev1[j + 1];
729
0
        }
730
0
        best_tot_mse = svt_search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count, start_gi, end_gi);
731
0
    }
732
0
    return best_tot_mse;
733
0
}
734
735
// This kernel is ported/adapted from libaom (AV1 reference implementation).
736
// Original logic inspired by aom_pick_cdef_from_qp().
737
// Adjusted to match SVT-AV1 data structures and pipeline integration.
738
static void svt_pick_cdef_from_qp(PictureParentControlSet* ppcs, int32_t is_screen_content, int32_t* pred_y_strength,
739
257
                                  int32_t* pred_uv_strength) {
740
257
    FrameHeader*  frm_hdr    = &ppcs->frm_hdr;
741
257
    const uint8_t bit_depth  = ppcs->enhanced_pic->bit_depth;
742
257
    const int32_t base_q_idx = frm_hdr->quantization_params.base_q_idx;
743
744
257
    int32_t q = svt_aom_ac_quant_qtx(base_q_idx, 0, bit_depth);
745
257
    q >>= (bit_depth - 8);
746
747
257
    int32_t y_f1 = 0, y_f2 = 0;
748
257
    int32_t uv_f1 = 0, uv_f2 = 0;
749
750
257
    const int32_t is_intra = (frm_hdr->frame_type == KEY_FRAME || frm_hdr->frame_type == INTRA_ONLY_FRAME);
751
752
257
    if (is_screen_content) {
753
0
        y_f1 = (int32_t)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
754
755
0
        y_f2 = (int32_t)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
756
757
0
        uv_f1 = (int32_t)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
758
759
0
        uv_f2 = (int32_t)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
760
257
    } else if (!is_intra) {
761
0
        y_f1 = (int32_t)roundf(q * q * -0.0000023593946f + q * 0.0068615186f + 0.02709886f);
762
763
0
        y_f2 = (int32_t)roundf(q * q * -0.00000057629734f + q * 0.0013993345f + 0.03831067f);
764
765
0
        uv_f1 = (int32_t)roundf(q * q * -0.0000007095069f + q * 0.0034628846f + 0.00887099f);
766
767
0
        uv_f2 = (int32_t)roundf(q * q * 0.00000023874085f + q * 0.00028223585f + 0.05576307f);
768
257
    } else { // Intra
769
257
        y_f1 = (int32_t)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f);
770
771
257
        y_f2 = (int32_t)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f);
772
773
257
        uv_f1 = (int32_t)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f);
774
775
257
        uv_f2 = (int32_t)roundf(q * q * 0.0000032651783f + q * 0.00035520183f + 0.00228092f);
776
257
    }
777
778
    // Clamp to AV1 limits
779
257
    y_f1  = clamp(y_f1, 0, 15);
780
257
    y_f2  = clamp(y_f2, 0, 3);
781
257
    uv_f1 = clamp(uv_f1, 0, 15);
782
257
    uv_f2 = clamp(uv_f2, 0, 3);
783
784
    // Pack primary + secondary
785
257
    *pred_y_strength  = y_f1 * CDEF_SEC_STRENGTHS + y_f2;
786
257
    *pred_uv_strength = uv_f1 * CDEF_SEC_STRENGTHS + uv_f2;
787
257
}
788
789
#if CLN_FINISH_CDEF
790
791
// Propagate cdef_strength to all 64x64 mi
792
3.21k
static INLINE void propagate_cdef_strength(PictureControlSet* pcs, int32_t sb_index, int8_t strength) {
793
3.21k
    MbModeInfo* mbmi    = pcs->mi_grid_base[sb_index];
794
3.21k
    mbmi->cdef_strength = strength;
795
3.21k
    switch (mbmi->bsize) {
796
0
    case BLOCK_128X128:
797
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64]->cdef_strength                                  = strength;
798
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength                 = strength;
799
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = strength;
800
0
        break;
801
0
    case BLOCK_128X64:
802
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64]->cdef_strength = strength;
803
0
        break;
804
0
    case BLOCK_64X128:
805
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = strength;
806
0
        break;
807
3.21k
    default:
808
3.21k
        break;
809
3.21k
    }
810
3.21k
}
811
812
257
#define CDEF_DAMPING_FROM_QP(base_q_idx) (3 + ((base_q_idx) >> 6))
813
814
257
void finish_cdef_search(PictureControlSet* pcs) {
815
257
    PictureParentControlSet* ppcs    = pcs->ppcs;
816
257
    FrameHeader*             frm_hdr = &ppcs->frm_hdr;
817
257
    Av1Common*               cm      = ppcs->av1_cm;
818
257
    int32_t                  mi_rows = ppcs->av1_cm->mi_rows;
819
257
    int32_t                  mi_cols = ppcs->av1_cm->mi_cols;
820
821
257
    int32_t  fbr, fbc;
822
257
    uint64_t best_tot_mse = (uint64_t)1 << 63;
823
257
    int32_t  sb_count;
824
257
    int32_t  nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
825
257
    int32_t  nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
826
827
257
    CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls;
828
829
257
    if (cdef_search_ctrls->use_qp_strength) {
830
257
#if OPT_SC_CDEF_QP
831
257
        const bool    allintra  = ppcs->scs->allintra;
832
257
        const uint8_t sc_class1 = ppcs->sc_class1;
833
257
        const uint8_t sc_class5 = ppcs->sc_class5;
834
257
        const uint8_t sc        = allintra ? sc_class5 : sc_class1;
835
257
        int           pred_y, pred_uv;
836
257
        svt_pick_cdef_from_qp(ppcs, sc, &pred_y, &pred_uv);
837
#else
838
        int pred_y, pred_uv;
839
        svt_pick_cdef_from_qp(ppcs, 0, &pred_y, &pred_uv);
840
#endif
841
257
        frm_hdr->cdef_params.cdef_bits           = 0;
842
257
        ppcs->nb_cdef_strengths                  = 1;
843
257
        frm_hdr->cdef_params.cdef_y_strength[0]  = pred_y;
844
257
        frm_hdr->cdef_params.cdef_uv_strength[0] = pred_uv;
845
257
        frm_hdr->cdef_params.cdef_damping        = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx);
846
847
1.16k
        for (fbr = 0; fbr < nvfb; ++fbr) {
848
4.45k
            for (fbc = 0; fbc < nhfb; ++fbc) {
849
3.55k
                const int32_t     sb_idx = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
850
3.55k
                const MbModeInfo* mbmi   = pcs->mi_grid_base[sb_idx];
851
852
3.55k
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
853
3.55k
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
854
0
                    continue;
855
0
                }
856
3.55k
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
857
336
                    continue;
858
336
                }
859
860
3.21k
                propagate_cdef_strength(pcs, sb_idx, 0);
861
3.21k
            }
862
905
        }
863
257
        return;
864
257
    }
865
866
0
    CdefReconControls* cdef_recon_ctrls           = &pcs->ppcs->cdef_recon_ctrls;
867
0
    const int          first_pass_fs_num          = cdef_search_ctrls->first_pass_fs_num;
868
0
    const int          default_second_pass_fs_num = cdef_search_ctrls->default_second_pass_fs_num;
869
870
0
    frm_hdr->cdef_params.cdef_bits           = 0;
871
0
    ppcs->nb_cdef_strengths                  = 1;
872
0
    frm_hdr->cdef_params.cdef_y_strength[0]  = cdef_search_ctrls->pred_y_f;
873
0
    frm_hdr->cdef_params.cdef_uv_strength[0] = cdef_search_ctrls->pred_uv_f;
874
0
    frm_hdr->cdef_params.cdef_damping        = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx);
875
876
0
    if (cdef_search_ctrls->use_reference_cdef_fs) {
877
0
        for (fbr = 0; fbr < nvfb; ++fbr) {
878
0
            for (fbc = 0; fbc < nhfb; ++fbc) {
879
0
                const int32_t     sb_idx = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
880
0
                const MbModeInfo* mbmi   = pcs->mi_grid_base[sb_idx];
881
882
0
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
883
0
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
884
0
                    continue;
885
0
                }
886
0
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
887
0
                    continue;
888
0
                }
889
890
0
                propagate_cdef_strength(pcs, sb_idx, 0);
891
0
            }
892
0
        }
893
0
        return;
894
0
    }
895
896
0
    int32_t* sb_index;
897
0
    EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb);
898
0
    assert(sb_index != NULL);
899
900
0
    uint64_t** mse[2];
901
0
    EB_MALLOC_ARRAY_NO_CHECK(mse[0], nvfb * nhfb);
902
0
    EB_MALLOC_ARRAY_NO_CHECK(mse[1], nvfb * nhfb);
903
0
    assert(mse[0] != NULL);
904
0
    assert(mse[1] != NULL);
905
906
0
    const int32_t start_gi = 0;
907
0
    const int32_t end_gi   = first_pass_fs_num + default_second_pass_fs_num;
908
0
    int32_t       i;
909
0
    int32_t       nb_strengths;
910
0
    int32_t       nb_strength_bits = 0;
911
0
    uint64_t      lambda;
912
0
    uint32_t      fast_lambda, full_lambda = 0;
913
914
0
    svt_aom_lambda_assign(pcs,
915
0
                          &fast_lambda,
916
0
                          &full_lambda,
917
0
                          pcs->ppcs->enhanced_pic->bit_depth,
918
0
                          pcs->ppcs->frm_hdr.quantization_params.base_q_idx,
919
0
                          false);
920
0
    lambda   = full_lambda;
921
0
    sb_count = 0;
922
923
0
    for (fbr = 0; fbr < nvfb; ++fbr) {
924
0
        for (fbc = 0; fbc < nhfb; ++fbc) {
925
0
            const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc];
926
0
            if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
927
0
                ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
928
0
                continue;
929
0
            }
930
0
            if (pcs->skip_cdef_seg[fbr * nhfb + fbc]) {
931
0
                continue;
932
0
            }
933
934
0
            mse[0][sb_count]   = pcs->mse_seg[0][fbr * nhfb + fbc];
935
0
            mse[1][sb_count]   = pcs->mse_seg[1][fbr * nhfb + fbc];
936
0
            sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
937
0
            sb_count++;
938
0
        }
939
0
    }
940
941
    // Scale down the cost of the (0,0) filter strength to bias selection towards off.  When off, we can save the cost of the application
942
0
    if (cdef_recon_ctrls->zero_fs_cost_bias) {
943
0
        const bool is_16bit = (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT);
944
0
        uint16_t   factor;
945
0
        for (i = 0; i < sb_count; i++) {
946
0
            if (is_16bit) {
947
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
948
0
                if (mse[0][i][0] < 5000) {
949
0
                    factor = MIN(factor - 10, 64);
950
0
                } else if (mse[0][i][0] < 10000) {
951
0
                    factor = MIN(factor - 5, 64);
952
0
                } else if (mse[0][i][0] > 25000) {
953
0
                    factor = MIN(factor + 1, 64);
954
0
                }
955
0
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
956
957
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
958
0
                if (mse[1][i][0] < 5000) {
959
0
                    factor = MIN(factor - 10, 64);
960
0
                } else if (mse[1][i][0] < 10000) {
961
0
                    factor = MIN(factor - 5, 64);
962
0
                } else if (mse[1][i][0] > 25000) {
963
0
                    factor = MIN(factor + 1, 64);
964
0
                }
965
0
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
966
0
            } else {
967
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
968
0
                if (mse[0][i][0] > 25000) {
969
0
                    factor = MIN(factor + 2, 64);
970
0
                } else if (mse[0][i][0] > 10000) {
971
0
                    factor = MIN(factor + 1, 64);
972
0
                }
973
0
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
974
975
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
976
0
                if (mse[1][i][0] > 25000) {
977
0
                    factor = MIN(factor + 2, 64);
978
0
                } else if (mse[1][i][0] > 10000) {
979
0
                    factor = MIN(factor + 1, 64);
980
0
                }
981
0
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
982
0
            }
983
0
        }
984
0
    }
985
986
    // Compute cost of (strength=0) to derive pcs->cdef_dist_dev
987
0
    int64_t zero_dist = 0;
988
0
    for (i = 0; i < sb_count; i++) {
989
0
        zero_dist += mse[0][i][0] + mse[1][i][0];
990
0
    }
991
0
    uint64_t zero_cost = RDCOST(lambda, av1_cost_literal(CDEF_STRENGTH_BITS * 2), zero_dist << 4);
992
993
    // Search for different numbers of signalling bits
994
0
    for (i = 0; i <= 3; i++) {
995
0
        int32_t best_lev0[CDEF_MAX_STRENGTHS] = {0};
996
0
        int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0};
997
0
        nb_strengths                          = 1 << i;
998
0
        uint64_t tot_mse                      = joint_strength_search_dual(
999
0
            best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi);
1000
1001
0
        const int      total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2;
1002
0
        const uint64_t cost       = RDCOST(lambda, av1_cost_literal(total_bits), tot_mse * 16);
1003
0
        if (cost < best_tot_mse) {
1004
0
            best_tot_mse     = cost;
1005
0
            nb_strength_bits = i;
1006
0
            for (int32_t j = 0; j < 1 << nb_strength_bits; j++) {
1007
0
                frm_hdr->cdef_params.cdef_y_strength[j]  = best_lev0[j];
1008
0
                frm_hdr->cdef_params.cdef_uv_strength[j] = cdef_search_ctrls->uv_from_y ? best_lev0[j] : best_lev1[j];
1009
0
            }
1010
0
        }
1011
0
    }
1012
1013
0
    pcs->cdef_dist_dev = zero_cost == 0 ? 0 : (int32_t)(1000 - ((1000 * best_tot_mse) / zero_cost));
1014
0
    nb_strengths       = 1 << nb_strength_bits;
1015
1016
0
    frm_hdr->cdef_params.cdef_bits = nb_strength_bits;
1017
0
    ppcs->nb_cdef_strengths        = nb_strengths;
1018
1019
    // Assign each filter block its best strength index
1020
0
    for (i = 0; i < sb_count; i++) {
1021
0
        int32_t  gi;
1022
0
        int32_t  best_gi  = 0;
1023
0
        uint64_t best_mse = (uint64_t)1 << 63;
1024
0
        for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) {
1025
0
            uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]] +
1026
0
                mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]];
1027
0
            if (curr < best_mse) {
1028
0
                best_gi  = gi;
1029
0
                best_mse = curr;
1030
0
            }
1031
0
        }
1032
0
        propagate_cdef_strength(pcs, sb_index[i], (int8_t)best_gi);
1033
0
    }
1034
1035
    // Map search indices back to actual filter strengths
1036
0
    int filter_map[TOTAL_STRENGTHS] = {0};
1037
0
    for (i = 0; i < first_pass_fs_num; i++) {
1038
0
        filter_map[i] = cdef_search_ctrls->default_first_pass_fs[i];
1039
0
    }
1040
0
    for (i = 0; i < default_second_pass_fs_num; i++) {
1041
0
        filter_map[first_pass_fs_num + i] = cdef_search_ctrls->default_second_pass_fs[i];
1042
0
    }
1043
1044
0
    for (i = 0; i < ppcs->nb_cdef_strengths; i++) {
1045
0
        frm_hdr->cdef_params.cdef_y_strength[i]  = filter_map[frm_hdr->cdef_params.cdef_y_strength[i]];
1046
0
        frm_hdr->cdef_params.cdef_uv_strength[i] = filter_map[frm_hdr->cdef_params.cdef_uv_strength[i]];
1047
0
    }
1048
1049
0
    frm_hdr->cdef_params.cdef_damping = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx);
1050
1051
0
    EB_FREE_ARRAY(mse[0]);
1052
0
    EB_FREE_ARRAY(mse[1]);
1053
    EB_FREE_ARRAY(sb_index);
1054
0
}
1055
#else
1056
void finish_cdef_search(PictureControlSet* pcs) {
1057
    PictureParentControlSet* ppcs    = pcs->ppcs;
1058
    FrameHeader*             frm_hdr = &ppcs->frm_hdr;
1059
    Av1Common*               cm      = ppcs->av1_cm;
1060
    int32_t                  mi_rows = ppcs->av1_cm->mi_rows;
1061
    int32_t                  mi_cols = ppcs->av1_cm->mi_cols;
1062
1063
    int32_t  fbr, fbc;
1064
    uint64_t best_tot_mse = (uint64_t)1 << 63;
1065
    int32_t  sb_count;
1066
    int32_t  nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1067
    int32_t  nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1068
    //CDEF Settings
1069
    CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls;
1070
    if (cdef_search_ctrls->use_qp_strength) {
1071
        int pred_y, pred_uv;
1072
1073
        // Predict Y/UV strengths from QP
1074
        svt_pick_cdef_from_qp(ppcs, 0, &pred_y, &pred_uv);
1075
1076
        // Frame-level parameters
1077
        frm_hdr->cdef_params.cdef_bits           = 0; // only one strength index
1078
        ppcs->nb_cdef_strengths                  = 1;
1079
        frm_hdr->cdef_params.cdef_y_strength[0]  = pred_y;
1080
        frm_hdr->cdef_params.cdef_uv_strength[0] = pred_uv;
1081
        frm_hdr->cdef_params.cdef_damping        = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1082
1083
        // Assign strength index 0 to all valid 64x64 blocks
1084
        for (fbr = 0; fbr < nvfb; ++fbr) {
1085
            for (fbc = 0; fbc < nhfb; ++fbc) {
1086
                MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc];
1087
1088
                // Skip duplicated 64x64 blocks inside larger 128x128/128x64/64x128
1089
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
1090
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
1091
                    continue;
1092
                }
1093
1094
                // No filtering if the entire filter block is skipped
1095
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
1096
                    continue;
1097
                }
1098
1099
                mbmi->cdef_strength = 0;
1100
1101
                // Duplicate for large blocks in SVT MI map
1102
                switch (mbmi->bsize) {
1103
                case BLOCK_128X128:
1104
                    pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc + MI_SIZE_64X64]
1105
                        ->cdef_strength = 0;
1106
1107
                    pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc]
1108
                        ->cdef_strength = 0;
1109
1110
                    pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc +
1111
                                      MI_SIZE_64X64]
1112
                        ->cdef_strength = 0;
1113
                    break;
1114
1115
                case BLOCK_128X64:
1116
                    pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc + MI_SIZE_64X64]
1117
                        ->cdef_strength = 0;
1118
                    break;
1119
1120
                case BLOCK_64X128:
1121
                    pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc]
1122
                        ->cdef_strength = 0;
1123
                    break;
1124
1125
                default:
1126
                    break;
1127
                }
1128
            }
1129
        }
1130
        return;
1131
    }
1132
1133
    CdefReconControls* cdef_recon_ctrls           = &pcs->ppcs->cdef_recon_ctrls;
1134
    const int          first_pass_fs_num          = cdef_search_ctrls->first_pass_fs_num;
1135
    const int          default_second_pass_fs_num = cdef_search_ctrls->default_second_pass_fs_num;
1136
1137
    if (cdef_search_ctrls->use_reference_cdef_fs) {
1138
        int32_t* sb_index;
1139
        EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb);
1140
        int32_t best_gi = 0;
1141
        sb_count        = 0;
1142
        assert(sb_index != NULL);
1143
        for (fbr = 0; fbr < nvfb; ++fbr) {
1144
            for (fbc = 0; fbc < nhfb; ++fbc) {
1145
                const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc];
1146
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
1147
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
1148
                    continue;
1149
                }
1150
                // No filtering if the entire filter block is skipped
1151
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
1152
                    continue;
1153
                }
1154
                sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
1155
                sb_count++;
1156
            }
1157
        }
1158
        for (int32_t i = 0; i < sb_count; i++) {
1159
            pcs->mi_grid_base[sb_index[i]]->cdef_strength = (int8_t)best_gi;
1160
            //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
1161
            //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
1162
            BlockSize bsize = pcs->mi_grid_base[sb_index[i]]->bsize;
1163
            switch (bsize) {
1164
            case BLOCK_128X128:
1165
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength                  = (int8_t)best_gi;
1166
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1167
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength =
1168
                    (int8_t)best_gi;
1169
                break;
1170
            case BLOCK_128X64:
1171
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi;
1172
                break;
1173
            case BLOCK_64X128:
1174
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1175
                break;
1176
            default:
1177
                break;
1178
            }
1179
        }
1180
        frm_hdr->cdef_params.cdef_bits = 0;
1181
        ppcs->nb_cdef_strengths        = 1;
1182
        //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
1183
        int32_t pri_damping                      = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1184
        frm_hdr->cdef_params.cdef_damping        = pri_damping;
1185
        frm_hdr->cdef_params.cdef_y_strength[0]  = cdef_search_ctrls->pred_y_f;
1186
        frm_hdr->cdef_params.cdef_uv_strength[0] = cdef_search_ctrls->pred_uv_f;
1187
        EB_FREE_ARRAY(sb_index);
1188
        return;
1189
    }
1190
    int32_t* sb_index;
1191
    // to keep track of the sb_address in units of SBs (not mi_size)
1192
    int32_t* sb_addr;
1193
    EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb);
1194
    EB_MALLOC_ARRAY_NO_CHECK(sb_addr, nvfb * nhfb);
1195
    assert(sb_index != NULL);
1196
    assert(sb_addr != NULL);
1197
1198
    uint64_t** mse[2];
1199
    EB_MALLOC_ARRAY_NO_CHECK(mse[0], nvfb * nhfb);
1200
    EB_MALLOC_ARRAY_NO_CHECK(mse[1], nvfb * nhfb);
1201
    assert(mse[0] != NULL);
1202
    assert(mse[1] != NULL);
1203
1204
    int32_t  start_gi = 0;
1205
    int32_t  end_gi   = first_pass_fs_num + default_second_pass_fs_num;
1206
    int32_t  i;
1207
    int32_t  nb_strengths;
1208
    int32_t  nb_strength_bits;
1209
    uint64_t lambda;
1210
    uint32_t fast_lambda, full_lambda = 0;
1211
1212
    svt_aom_lambda_assign(pcs,
1213
                          &fast_lambda,
1214
                          &full_lambda,
1215
                          pcs->ppcs->enhanced_pic->bit_depth,
1216
                          pcs->ppcs->frm_hdr.quantization_params.base_q_idx,
1217
                          false);
1218
    lambda   = full_lambda;
1219
    sb_count = 0;
1220
    for (fbr = 0; fbr < nvfb; ++fbr) {
1221
        for (fbc = 0; fbc < nhfb; ++fbc) {
1222
            const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc];
1223
            if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
1224
                ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
1225
                continue;
1226
            }
1227
1228
            // No filtering if the entire filter block is skipped
1229
            if (pcs->skip_cdef_seg[fbr * nhfb + fbc]) {
1230
                continue;
1231
            }
1232
            // point to the MSE data
1233
            mse[0][sb_count] = pcs->mse_seg[0][fbr * nhfb + fbc];
1234
            mse[1][sb_count] = pcs->mse_seg[1][fbr * nhfb + fbc];
1235
1236
            sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
1237
            sb_addr[sb_count]  = fbr * nhfb + fbc;
1238
            sb_count++;
1239
        }
1240
    }
1241
1242
    nb_strength_bits = 0;
1243
    // Scale down the cost of the (0,0) filter strength to bias selection towards off.
1244
    // When off, can save the cost of the application.
1245
    if (cdef_recon_ctrls->zero_fs_cost_bias) {
1246
        const bool is_16bit = (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT);
1247
        uint16_t   factor;
1248
        for (i = 0; i < sb_count; i++) {
1249
            if (is_16bit) {
1250
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1251
                if (mse[0][i][0] < 5000) {
1252
                    factor = MIN(factor - 10, 64);
1253
                } else if (mse[0][i][0] < 10000) {
1254
                    factor = MIN(factor - 5, 64);
1255
                } else if (mse[0][i][0] > 25000) {
1256
                    factor = MIN(factor + 1, 64);
1257
                }
1258
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
1259
1260
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1261
                if (mse[1][i][0] < 5000) {
1262
                    factor = MIN(factor - 10, 64);
1263
                } else if (mse[1][i][0] < 10000) {
1264
                    factor = MIN(factor - 5, 64);
1265
                } else if (mse[1][i][0] > 25000) {
1266
                    factor = MIN(factor + 1, 64);
1267
                }
1268
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
1269
            } else {
1270
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1271
                if (mse[0][i][0] > 25000) {
1272
                    factor = MIN(factor + 2, 64);
1273
                } else if (mse[0][i][0] > 10000) {
1274
                    factor = MIN(factor + 1, 64);
1275
                }
1276
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
1277
1278
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1279
                if (mse[1][i][0] > 25000) {
1280
                    factor = MIN(factor + 2, 64);
1281
                } else if (mse[1][i][0] > 10000) {
1282
                    factor = MIN(factor + 1, 64);
1283
                }
1284
1285
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
1286
            }
1287
        }
1288
    }
1289
    // Compute cost of off to use in deriving pcs->cdef_dist_dev
1290
    int64_t zero_dist = 0;
1291
    for (i = 0; i < sb_count; i++) {
1292
        zero_dist += mse[0][i][0] + mse[1][i][0];
1293
    }
1294
    uint64_t zero_cost = RDCOST(lambda, av1_cost_literal(CDEF_STRENGTH_BITS * 2), zero_dist << 4);
1295
    /* Search for different number of signalling bits. */
1296
    for (i = 0; i <= 3; i++) {
1297
        int32_t best_lev0[CDEF_MAX_STRENGTHS] = {0};
1298
        int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0};
1299
        nb_strengths                          = 1 << i;
1300
        uint64_t tot_mse                      = joint_strength_search_dual(
1301
            best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi);
1302
        /* Count superblock signalling cost. */
1303
        const int      total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2;
1304
        const int      rate_cost  = av1_cost_literal(total_bits);
1305
        const uint64_t dist       = tot_mse * 16;
1306
        tot_mse                   = RDCOST(lambda, rate_cost, dist);
1307
        if (tot_mse < best_tot_mse) {
1308
            best_tot_mse     = tot_mse;
1309
            nb_strength_bits = i;
1310
            for (int32_t j = 0; j < 1 << nb_strength_bits; j++) {
1311
                frm_hdr->cdef_params.cdef_y_strength[j]  = best_lev0[j];
1312
                frm_hdr->cdef_params.cdef_uv_strength[j] = cdef_search_ctrls->uv_from_y ? best_lev0[j] : best_lev1[j];
1313
            }
1314
        }
1315
    }
1316
    pcs->cdef_dist_dev = zero_cost == 0 ? 0 : (int32_t)(1000 - ((1000 * best_tot_mse) / zero_cost));
1317
    nb_strengths       = 1 << nb_strength_bits;
1318
1319
    frm_hdr->cdef_params.cdef_bits = nb_strength_bits;
1320
    ppcs->nb_cdef_strengths        = nb_strengths;
1321
    for (i = 0; i < sb_count; i++) {
1322
        int32_t  gi;
1323
        int32_t  best_gi;
1324
        uint64_t best_mse = (uint64_t)1 << 63;
1325
        best_gi           = 0;
1326
        // skip this loop for SBs that are skipped in the search
1327
        for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) {
1328
            uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]];
1329
            curr += mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]];
1330
            if (curr < best_mse) {
1331
                best_gi  = gi;
1332
                best_mse = curr;
1333
            }
1334
        }
1335
1336
        pcs->mi_grid_base[sb_index[i]]->cdef_strength = (int8_t)best_gi;
1337
        //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
1338
        //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
1339
        BlockSize bsize = pcs->mi_grid_base[sb_index[i]]->bsize;
1340
1341
        switch (bsize) {
1342
        case BLOCK_128X128:
1343
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength                  = (int8_t)best_gi;
1344
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1345
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = (int8_t)
1346
                best_gi;
1347
            break;
1348
        case BLOCK_128X64:
1349
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi;
1350
            break;
1351
        case BLOCK_64X128:
1352
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1353
            break;
1354
        default:
1355
            break;
1356
        }
1357
    }
1358
    int filter_map[TOTAL_STRENGTHS] = {0};
1359
    for (i = 0; i < first_pass_fs_num; i++) {
1360
        filter_map[i] = cdef_search_ctrls->default_first_pass_fs[i];
1361
    }
1362
    for (i = first_pass_fs_num; i < (first_pass_fs_num + default_second_pass_fs_num); i++) {
1363
        filter_map[i] = cdef_search_ctrls->default_second_pass_fs[i - first_pass_fs_num];
1364
    }
1365
1366
    for (i = 0; i < ppcs->nb_cdef_strengths; i++) {
1367
        frm_hdr->cdef_params.cdef_y_strength[i]  = filter_map[frm_hdr->cdef_params.cdef_y_strength[i]];
1368
        frm_hdr->cdef_params.cdef_uv_strength[i] = filter_map[frm_hdr->cdef_params.cdef_uv_strength[i]];
1369
    }
1370
    //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
1371
    frm_hdr->cdef_params.cdef_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1372
    EB_FREE_ARRAY(mse[0]);
1373
    EB_FREE_ARRAY(mse[1]);
1374
    EB_FREE_ARRAY(sb_index);
1375
    EB_FREE_ARRAY(sb_addr);
1376
}
1377
#endif