Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/enc_cdef.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 3-Clause Clear License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at https://www.aomedia.org/license. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
 */
11
#include <stdio.h>
12
#include <stdlib.h>
13
#include <math.h>
14
#include <string.h>
15
16
#include "enc_cdef.h"
17
#include <stdint.h>
18
#include "aom_dsp_rtcd.h"
19
#include "svt_log.h"
20
#include "rd_cost.h"
21
#include "rc_process.h"
22
23
static INLINE uint64_t mse_8xn_16bit_c(const uint16_t* src, const uint16_t* dst, const int32_t dstride,
24
0
                                       const int32_t height, uint8_t subsampling_factor) {
25
0
    uint64_t sum = 0;
26
0
    int32_t  i, j;
27
0
    for (i = 0; i < height; i += subsampling_factor) {
28
0
        for (j = 0; j < 8; j++) {
29
0
            int32_t e = dst[i * dstride + j] - src[8 * i + j];
30
0
            sum += e * e;
31
0
        }
32
0
    }
33
0
    return sum;
34
0
}
35
36
static INLINE uint64_t mse_4xn_16bit_c(const uint16_t* src, const uint16_t* dst, const int32_t dstride,
37
0
                                       const int32_t height, uint8_t subsampling_factor) {
38
0
    uint64_t sum = 0;
39
0
    int32_t  i, j;
40
0
    for (i = 0; i < height; i += subsampling_factor) {
41
0
        for (j = 0; j < 4; j++) {
42
0
            int32_t e = dst[i * dstride + j] - src[4 * i + j];
43
0
            sum += e * e;
44
0
        }
45
0
    }
46
0
    return sum;
47
0
}
48
49
static INLINE uint64_t mse_8xn_8bit_c(const uint8_t* src, const uint8_t* dst, const int32_t dstride,
50
0
                                      const int32_t height, uint8_t subsampling_factor) {
51
0
    uint64_t sum = 0;
52
0
    int32_t  i, j;
53
0
    for (i = 0; i < height; i += subsampling_factor) {
54
0
        for (j = 0; j < 8; j++) {
55
0
            int32_t e = dst[i * dstride + j] - src[8 * i + j];
56
0
            sum += e * e;
57
0
        }
58
0
    }
59
0
    return sum;
60
0
}
61
62
static INLINE uint64_t mse_4xn_8bit_c(const uint8_t* src, const uint8_t* dst, const int32_t dstride,
63
0
                                      const int32_t height, uint8_t subsampling_factor) {
64
0
    uint64_t sum = 0;
65
0
    int32_t  i, j;
66
0
    for (i = 0; i < height; i += subsampling_factor) {
67
0
        for (j = 0; j < 4; j++) {
68
0
            int32_t e = dst[i * dstride + j] - src[4 * i + j];
69
0
            sum += e * e;
70
0
        }
71
0
    }
72
0
    return sum;
73
0
}
74
75
/* Compute MSE only on the blocks we filtered. */
76
uint64_t svt_aom_compute_cdef_dist_16bit_c(const uint16_t* dst, int32_t dstride, const uint16_t* src,
77
                                           const CdefList* dlist, int32_t cdef_count, BlockSize bsize,
78
0
                                           int32_t coeff_shift, uint8_t subsampling_factor) {
79
0
    uint64_t sum = 0;
80
0
    int32_t  bi, bx, by;
81
0
    if (bsize == BLOCK_8X8) {
82
0
        for (bi = 0; bi < cdef_count; bi++) {
83
0
            by = dlist[bi].by;
84
0
            bx = dlist[bi].bx;
85
0
            sum += mse_8xn_16bit_c(
86
0
                &src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride, 8, subsampling_factor);
87
0
        }
88
0
    } else if (bsize == BLOCK_4X8) {
89
0
        for (bi = 0; bi < cdef_count; bi++) {
90
0
            by = dlist[bi].by;
91
0
            bx = dlist[bi].bx;
92
0
            sum += mse_4xn_16bit_c(
93
0
                &src[bi << (3 + 2)], &dst[(by << 3) * dstride + (bx << 2)], dstride, 8, subsampling_factor);
94
0
        }
95
0
    } else if (bsize == BLOCK_8X4) {
96
0
        for (bi = 0; bi < cdef_count; bi++) {
97
0
            by = dlist[bi].by;
98
0
            bx = dlist[bi].bx;
99
0
            sum += mse_8xn_16bit_c(
100
0
                &src[bi << (2 + 3)], &dst[(by << 2) * dstride + (bx << 3)], dstride, 4, subsampling_factor);
101
0
        }
102
0
    } else {
103
0
        assert(bsize == BLOCK_4X4);
104
0
        for (bi = 0; bi < cdef_count; bi++) {
105
0
            by = dlist[bi].by;
106
0
            bx = dlist[bi].bx;
107
0
            sum += mse_4xn_16bit_c(
108
0
                &src[bi << (2 + 2)], &dst[(by << 2) * dstride + (bx << 2)], dstride, 4, subsampling_factor);
109
0
        }
110
0
    }
111
0
    return sum >> 2 * coeff_shift;
112
0
}
113
114
uint64_t svt_aom_compute_cdef_dist_8bit_c(const uint8_t* dst8, int32_t dstride, const uint8_t* src8,
115
                                          const CdefList* dlist, int32_t cdef_count, BlockSize bsize,
116
0
                                          int32_t coeff_shift, uint8_t subsampling_factor) {
117
0
    uint64_t sum = 0;
118
0
    int32_t  bi, bx, by;
119
0
    if (bsize == BLOCK_8X8) {
120
0
        for (bi = 0; bi < cdef_count; bi++) {
121
0
            by = dlist[bi].by;
122
0
            bx = dlist[bi].bx;
123
0
            sum += mse_8xn_8bit_c(
124
0
                &src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride, 8, subsampling_factor);
125
0
        }
126
0
    } else if (bsize == BLOCK_4X8) {
127
0
        for (bi = 0; bi < cdef_count; bi++) {
128
0
            by = dlist[bi].by;
129
0
            bx = dlist[bi].bx;
130
0
            sum += mse_4xn_8bit_c(
131
0
                &src8[bi << (3 + 2)], &dst8[(by << 3) * dstride + (bx << 2)], dstride, 8, subsampling_factor);
132
0
        }
133
0
    } else if (bsize == BLOCK_8X4) {
134
0
        for (bi = 0; bi < cdef_count; bi++) {
135
0
            by = dlist[bi].by;
136
0
            bx = dlist[bi].bx;
137
0
            sum += mse_8xn_8bit_c(
138
0
                &src8[bi << (2 + 3)], &dst8[(by << 2) * dstride + (bx << 3)], dstride, 4, subsampling_factor);
139
0
        }
140
0
    } else {
141
0
        assert(bsize == BLOCK_4X4);
142
0
        for (bi = 0; bi < cdef_count; bi++) {
143
0
            by = dlist[bi].by;
144
0
            bx = dlist[bi].bx;
145
0
            sum += mse_4xn_8bit_c(
146
0
                &src8[bi << (2 + 2)], &dst8[(by << 2) * dstride + (bx << 2)], dstride, 4, subsampling_factor);
147
0
        }
148
0
    }
149
0
    return sum >> 2 * coeff_shift;
150
0
}
151
152
3.82k
static int32_t svt_sb_all_skip(PictureControlSet* pcs, const Av1Common* const cm, int32_t mi_row, int32_t mi_col) {
153
3.82k
    int32_t maxc, maxr;
154
3.82k
    maxc = cm->mi_cols - mi_col;
155
3.82k
    maxr = cm->mi_rows - mi_row;
156
157
3.82k
    maxr = AOMMIN(maxr, MI_SIZE_64X64);
158
3.82k
    maxc = AOMMIN(maxc, MI_SIZE_64X64);
159
160
10.0k
    for (int32_t r = 0; r < maxr; r++) {
161
96.1k
        for (int32_t c = 0; c < maxc; c++) {
162
89.9k
            if (!(pcs->mi_grid_base[(mi_row + r) * pcs->mi_stride + mi_col + c]->block_mi.skip)) {
163
3.38k
                return 0;
164
3.38k
            }
165
89.9k
        }
166
9.58k
    }
167
438
    return 1;
168
3.82k
}
169
170
int32_t svt_sb_compute_cdef_list(PictureControlSet* pcs, const Av1Common* const cm, int32_t mi_row, int32_t mi_col,
171
0
                                 CdefList* dlist, BlockSize bs) {
172
0
    MbModeInfo** grid      = pcs->mi_grid_base;
173
0
    int32_t      mi_stride = pcs->mi_stride;
174
175
0
    int32_t maxc = cm->mi_cols - mi_col;
176
0
    int32_t maxr = cm->mi_rows - mi_row;
177
178
0
    if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
179
0
        maxc = AOMMIN(maxc, MI_SIZE_128X128);
180
0
    } else {
181
0
        maxc = AOMMIN(maxc, MI_SIZE_64X64);
182
0
    }
183
0
    if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
184
0
        maxr = AOMMIN(maxr, MI_SIZE_128X128);
185
0
    } else {
186
0
        maxr = AOMMIN(maxr, MI_SIZE_64X64);
187
0
    }
188
189
0
    const int32_t r_step  = mi_size_high[BLOCK_8X8];
190
0
    const int32_t c_step  = mi_size_wide[BLOCK_8X8];
191
0
    const int32_t r_shift = (r_step == 2);
192
0
    const int32_t c_shift = (c_step == 2);
193
194
0
    assert(r_step == 1 || r_step == 2);
195
0
    assert(c_step == 1 || c_step == 2);
196
197
0
    int32_t count = 0;
198
0
    for (int32_t r = 0; r < maxr; r += r_step) {
199
0
        for (int32_t c = 0; c < maxc; c += c_step) {
200
0
            if (!grid[(mi_row + r) * mi_stride + (mi_col + c)]->block_mi.skip ||
201
0
                !grid[(mi_row + r) * mi_stride + (mi_col + c + 1)]->block_mi.skip ||
202
0
                !grid[(mi_row + r + 1) * mi_stride + (mi_col + c)]->block_mi.skip ||
203
0
                !grid[(mi_row + r + 1) * mi_stride + (mi_col + c + 1)]->block_mi.skip) {
204
0
                dlist[count].by = (uint8_t)(r >> r_shift);
205
0
                dlist[count].bx = (uint8_t)(c >> c_shift);
206
0
                count++;
207
0
            }
208
0
        }
209
0
    }
210
0
    return count;
211
0
}
212
213
0
static inline void svt_aom_fill_rect(uint16_t* dst, int32_t dstride, int32_t v, int32_t h, uint16_t x) {
214
0
    for (int32_t i = 0; i < v; i++) {
215
0
        for (int32_t j = 0; j < h; j++) {
216
0
            dst[i * dstride + j] = x;
217
0
        }
218
0
    }
219
0
}
220
221
static inline void svt_aom_copy_rect(uint16_t* dst, int32_t dstride, const uint16_t* src, int32_t sstride, int32_t v,
222
0
                                     int32_t h) {
223
0
    for (int32_t i = 0; i < v; i++) {
224
0
        svt_memcpy(dst, src, sizeof(dst[0]) * h);
225
0
        dst += dstride;
226
0
        src += sstride;
227
0
    }
228
0
}
229
230
/*
231
Loop over all 64x64 filter blocks and perform the CDEF filtering for each block, using
232
the filter strength pairs chosen in finish_cdef_search().
233
*/
234
0
void svt_av1_cdef_frame(SequenceControlSet* scs, PictureControlSet* pcs) {
235
0
    PictureParentControlSet* ppcs     = pcs->ppcs;
236
0
    Av1Common*               cm       = ppcs->av1_cm;
237
0
    FrameHeader*             frm_hdr  = &ppcs->frm_hdr;
238
0
    bool                     is_16bit = scs->is_16bit_pipeline;
239
240
0
    EbPictureBufferDesc* recon_pic;
241
0
    svt_aom_get_recon_pic(pcs, &recon_pic, is_16bit);
242
243
0
    const int32_t num_planes = av1_num_planes(&scs->seq_header.color_config);
244
0
    DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
245
0
    uint16_t*      linebuf[3];
246
0
    uint16_t*      colbuf[3];
247
0
    CdefList       dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
248
0
    uint8_t *      row_cdef, *prev_row_cdef, *curr_row_cdef;
249
0
    int32_t        cdef_count;
250
0
    const uint32_t sb_size = scs->super_block_size;
251
0
    int32_t        mi_wide_l2[3];
252
0
    int32_t        mi_high_l2[3];
253
0
    int32_t        xdec[3];
254
0
    int32_t        ydec[3];
255
0
    int32_t        coeff_shift = AOMMAX(scs->static_config.encoder_bit_depth - 8, 0);
256
0
    const int32_t  nvfb        = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
257
0
    const int32_t  nhfb        = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
258
0
    const uint32_t cdef_size   = sizeof(*row_cdef) * (nhfb + 2) * 2;
259
260
0
    row_cdef = (uint8_t*)svt_aom_malloc(cdef_size);
261
0
    assert(row_cdef != NULL);
262
0
    memset(row_cdef, 1, cdef_size);
263
0
    prev_row_cdef = row_cdef + 1;
264
0
    curr_row_cdef = prev_row_cdef + nhfb + 2;
265
0
    for (int32_t pli = 0; pli < num_planes; pli++) {
266
0
        int32_t subsampling_x = (pli == 0) ? 0 : 1;
267
0
        int32_t subsampling_y = (pli == 0) ? 0 : 1;
268
0
        xdec[pli]             = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
269
0
        ydec[pli]             = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
270
0
        mi_wide_l2[pli]       = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
271
0
        mi_high_l2[pli]       = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
272
0
    }
273
274
0
    const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
275
0
    for (int32_t pli = 0; pli < num_planes; pli++) {
276
0
        linebuf[pli] = (uint16_t*)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
277
0
        colbuf[pli]  = (uint16_t*)svt_aom_malloc(
278
0
            sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) * CDEF_HBORDER);
279
0
    }
280
0
#if OPT_CDEF_SKIP_CHROMA_BORDER
281
    // Frame-level check: if every UV strength entry is 0, no chroma block
282
    // will ever be filtered.  In that case skip all chroma border copies
283
    // (including linebuf/colbuf saves) for the entire frame
284
0
    bool chroma_filter_off = (num_planes > 1);
285
0
    if (chroma_filter_off) {
286
0
        for (int32_t i = 0; i < ppcs->nb_cdef_strengths; i++) {
287
0
            if (frm_hdr->cdef_params.cdef_uv_strength[i] != 0) {
288
0
                chroma_filter_off = false;
289
0
                break;
290
0
            }
291
0
        }
292
0
    }
293
0
    const int32_t active_planes = chroma_filter_off ? 1 : num_planes;
294
0
#endif
295
0
    for (int32_t fbr = 0; fbr < nvfb; fbr++) {
296
0
        int32_t cdef_left = 1;
297
0
        for (int32_t fbc = 0; fbc < nhfb; fbc++) {
298
0
            int32_t level, sec_strength;
299
0
            int32_t uv_level, uv_sec_strength;
300
0
            int32_t nhb, nvb;
301
0
            int32_t cstart     = 0;
302
0
            curr_row_cdef[fbc] = 0;
303
0
            assert(pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] != NULL &&
304
0
                   "CDEF ERROR: Skipping Current FB");
305
0
            assert(pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength != -1 &&
306
0
                   "CDEF ERROR: Skipping Current FB");
307
0
            if (!cdef_left) {
308
0
                cstart =
309
0
                    -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input.
310
0
            }
311
312
0
            nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
313
0
            nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
314
0
            int32_t frame_top, frame_left, frame_bottom, frame_right;
315
316
0
            int32_t mi_row = MI_SIZE_64X64 * fbr;
317
0
            int32_t mi_col = MI_SIZE_64X64 * fbc;
318
            // for the current filter block, it's top left corner mi structure (mi_tl)
319
            // is first accessed to check whether the top and left boundaries are
320
            // frame boundaries. Then bottom-left and top-right mi structures are
321
            // accessed to check whether the bottom and right boundaries
322
            // (respectively) are frame boundaries.
323
            //
324
            // Note that we can't just check the bottom-right mi structure - eg. if
325
            // we're at the right-hand edge of the frame but not the bottom, then
326
            // the bottom-right mi is NULL but the bottom-left is not.
327
0
            frame_top  = (mi_row == 0) ? 1 : 0;
328
0
            frame_left = (mi_col == 0) ? 1 : 0;
329
330
0
            if (fbr != nvfb - 1) {
331
0
                frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
332
0
            } else {
333
0
                frame_bottom = 1;
334
0
            }
335
336
0
            if (fbc != nhfb - 1) {
337
0
                frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
338
0
            } else {
339
0
                frame_right = 1;
340
0
            }
341
342
            // Find the index of the CDEF strength for the filter block
343
0
            const int32_t mbmi_cdef_strength =
344
0
                pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]->cdef_strength;
345
0
            level        = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
346
0
            sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
347
            // Secondary luma strength takes values in {0, 1, 2, 4}. If sec_strength is equal to 3 from the step above, change it to 4.
348
0
            sec_strength += sec_strength == 3;
349
            // Set primary and secondary chroma strengths.
350
0
            uv_level        = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
351
0
            uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
352
            // Secondary chroma strength takes values in {0, 1, 2, 4}. If sec_strength is equal to 3 from the step above, change it to 4.
353
0
            uv_sec_strength += uv_sec_strength == 3;
354
0
            if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
355
0
                (cdef_count = svt_sb_compute_cdef_list(
356
0
                     pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
357
0
                cdef_left = 0;
358
0
                continue;
359
0
            }
360
361
0
            int dirinit = !(ppcs->cdef_search_ctrls.use_reference_cdef_fs || ppcs->cdef_search_ctrls.use_qp_strength);
362
            // When SB 128 is used, the search for certain blocks is skipped, so dir/var info is not generated
363
            // In those cases, must generate info here
364
0
            if (sb_size == 128) {
365
0
                const uint32_t    lc    = MI_SIZE_64X64 * fbc;
366
0
                const uint32_t    lr    = MI_SIZE_64X64 * fbr;
367
0
                const MbModeInfo* mbmi  = pcs->mi_grid_base[lr * cm->mi_stride + lc];
368
0
                const BlockSize   bsize = mbmi->bsize;
369
0
                if (((fbc & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_128X64)) ||
370
0
                    ((fbr & 1) && (bsize == BLOCK_128X128 || bsize == BLOCK_64X128))) {
371
0
                    dirinit = 0;
372
0
                }
373
0
            }
374
0
            uint8_t (*dir)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fbr * nhfb + fbc].dir;
375
0
            int32_t (*var)[CDEF_NBLOCKS][CDEF_NBLOCKS] = &pcs->cdef_dir_data[fbr * nhfb + fbc].var;
376
0
            curr_row_cdef[fbc]                         = 1;
377
0
#if OPT_CDEF_SKIP_CHROMA_BORDER
378
0
            for (int32_t pli = 0; pli < active_planes; pli++) {
379
#else
380
            for (int32_t pli = 0; pli < num_planes; pli++) {
381
#endif
382
0
                int32_t coffset;
383
0
                int32_t rend, cend;
384
0
                int32_t pri_damping = frm_hdr->cdef_params.cdef_damping;
385
0
                int32_t sec_damping = pri_damping;
386
0
                int32_t hsize       = nhb << mi_wide_l2[pli];
387
0
                int32_t vsize       = nvb << mi_high_l2[pli];
388
0
                if (fbc == nhfb - 1) {
389
0
                    cend = hsize;
390
0
                } else {
391
0
                    cend = hsize + CDEF_HBORDER;
392
0
                }
393
394
0
                if (fbr == nvfb - 1) {
395
0
                    rend = vsize;
396
0
                } else {
397
0
                    rend = vsize + CDEF_VBORDER;
398
0
                }
399
400
0
                coffset             = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
401
0
                EbByte   rec_buff   = recon_pic->buffer[pli];
402
0
                uint32_t rec_stride = recon_pic->stride[pli];
403
0
                if (pli) {
404
0
                    level        = uv_level;
405
0
                    sec_strength = uv_sec_strength;
406
0
                }
407
408
                /* Copy in the pixels we need from the current superblock for
409
                   deringing.*/
410
0
                svt_aom_copy_sb8_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
411
0
                                    CDEF_BSTRIDE,
412
0
                                    rec_buff,
413
0
                                    (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr,
414
0
                                    coffset + cstart,
415
0
                                    rec_stride,
416
0
                                    rend,
417
0
                                    cend - cstart,
418
0
                                    is_16bit);
419
0
                if (!prev_row_cdef[fbc]) {
420
0
                    svt_aom_copy_sb8_16(&src[CDEF_HBORDER],
421
0
                                        CDEF_BSTRIDE,
422
0
                                        rec_buff,
423
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
424
0
                                        coffset,
425
0
                                        rec_stride,
426
0
                                        CDEF_VBORDER,
427
0
                                        hsize,
428
0
                                        is_16bit);
429
0
                } else if (fbr > 0) {
430
0
                    svt_aom_copy_rect(
431
0
                        &src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset], stride, CDEF_VBORDER, hsize);
432
0
                } else {
433
0
                    svt_aom_fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE);
434
0
                }
435
436
0
                if (!prev_row_cdef[fbc - 1]) {
437
0
                    svt_aom_copy_sb8_16(src,
438
0
                                        CDEF_BSTRIDE,
439
0
                                        rec_buff,
440
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
441
0
                                        coffset - CDEF_HBORDER,
442
0
                                        rec_stride,
443
0
                                        CDEF_VBORDER,
444
0
                                        CDEF_HBORDER,
445
0
                                        is_16bit);
446
0
                } else if (fbr > 0 && fbc > 0) {
447
0
                    svt_aom_copy_rect(
448
0
                        src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER], stride, CDEF_VBORDER, CDEF_HBORDER);
449
0
                } else {
450
0
                    svt_aom_fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
451
0
                }
452
453
0
                if (!prev_row_cdef[fbc + 1]) {
454
0
                    svt_aom_copy_sb8_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
455
0
                                        CDEF_BSTRIDE,
456
0
                                        rec_buff,
457
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
458
0
                                        coffset + hsize,
459
0
                                        rec_stride,
460
0
                                        CDEF_VBORDER,
461
0
                                        CDEF_HBORDER,
462
0
                                        is_16bit);
463
0
                } else if (fbr > 0 && fbc < nhfb - 1) {
464
0
                    svt_aom_copy_rect(&src[hsize + CDEF_HBORDER],
465
0
                                      CDEF_BSTRIDE,
466
0
                                      &linebuf[pli][coffset + hsize],
467
0
                                      stride,
468
0
                                      CDEF_VBORDER,
469
0
                                      CDEF_HBORDER);
470
0
                } else {
471
0
                    svt_aom_fill_rect(
472
0
                        &src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
473
0
                }
474
475
0
                if (cdef_left) {
476
                    /* If we deringed the superblock on the left then we need to copy in
477
                       saved pixels. */
478
0
                    svt_aom_copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER, rend + CDEF_VBORDER, CDEF_HBORDER);
479
0
                }
480
481
                /* Saving pixels in case we need to dering the superblock on the
482
                    right. */
483
0
                if (fbc < nhfb - 1) {
484
0
                    svt_aom_copy_rect(
485
0
                        colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE, rend + CDEF_VBORDER, CDEF_HBORDER);
486
0
                }
487
488
0
                if (fbr < nvfb - 1) {
489
0
                    svt_aom_copy_sb8_16(&linebuf[pli][coffset],
490
0
                                        stride,
491
0
                                        rec_buff,
492
0
                                        (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
493
0
                                        coffset,
494
0
                                        rec_stride,
495
0
                                        CDEF_VBORDER,
496
0
                                        hsize,
497
0
                                        is_16bit);
498
0
                }
499
500
0
                if (frame_top) {
501
0
                    svt_aom_fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
502
0
                }
503
0
                if (frame_left) {
504
0
                    svt_aom_fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
505
0
                }
506
0
                if (frame_bottom) {
507
0
                    svt_aom_fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE],
508
0
                                      CDEF_BSTRIDE,
509
0
                                      CDEF_VBORDER,
510
0
                                      hsize + 2 * CDEF_HBORDER,
511
0
                                      CDEF_VERY_LARGE);
512
0
                }
513
0
                if (frame_right) {
514
0
                    svt_aom_fill_rect(&src[hsize + CDEF_HBORDER],
515
0
                                      CDEF_BSTRIDE,
516
0
                                      vsize + 2 * CDEF_VBORDER,
517
0
                                      CDEF_HBORDER,
518
0
                                      CDEF_VERY_LARGE);
519
0
                }
520
                // if ppcs->cdef_ctrls.use_reference_cdef_fs is true, then search was not performed
521
                // Therefore, need to make sure dir and var are initialized
522
0
                if (level || sec_strength || !dirinit) {
523
0
                    svt_cdef_filter_fb(
524
0
                        is_16bit ? NULL
525
0
                                 : &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
526
0
                                             (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
527
0
                        is_16bit ? &((uint16_t*)rec_buff)[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
528
0
                                                          (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])]
529
0
                                 : NULL,
530
0
                        rec_stride,
531
0
                        &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER],
532
0
                        xdec[pli],
533
0
                        ydec[pli],
534
0
                        *dir,
535
0
                        &dirinit,
536
0
                        *var,
537
0
                        pli,
538
0
                        dlist,
539
0
                        cdef_count,
540
0
                        level,
541
0
                        sec_strength,
542
0
                        pri_damping,
543
0
                        sec_damping,
544
0
                        coeff_shift,
545
0
                        1); // no subsampling
546
0
                }
547
0
            }
548
0
            cdef_left = 1; //CHKN filtered data is written back directy to recFrame.
549
0
        }
550
0
        {
551
0
            uint8_t* tmp  = prev_row_cdef;
552
0
            prev_row_cdef = curr_row_cdef;
553
0
            curr_row_cdef = tmp;
554
0
        }
555
0
    }
556
0
    svt_aom_free(row_cdef);
557
0
    for (int32_t pli = 0; pli < num_planes; pli++) {
558
0
        svt_aom_free(linebuf[pli]);
559
0
        svt_aom_free(colbuf[pli]);
560
0
    }
561
0
}
562
563
///-------search
564
/*
565
 * Search for the best luma+chroma strength to add as an option, knowing we
566
 * already selected nb_strengths options
567
 *
568
 * Params:
569
 *
570
 * lev0 : Array of indices of selected luma strengths.
571
 * lev1 : Array of indices of selected chroma strengths.
572
 * nb_strengths : Number of selected (Luma_strength, Chroma_strength) pairs.
573
 * mse : Array of luma and chroma filtering mse values.
574
 * sb_count : Number of filter blocks in the frame.
575
 * start_gi : starting strength index for the search of the additional strengths.
576
 * end_gi : End index for the for the search of the additional strengths.
577
*/
578
uint64_t svt_search_one_dual_c(int* lev0, int* lev1, int nb_strengths, uint64_t** mse[2], int sb_count, int start_gi,
579
0
                               int end_gi) {
580
0
    uint64_t      tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
581
0
    int32_t       i, j;
582
0
    uint64_t      best_tot_mse    = (uint64_t)1 << 63;
583
0
    int32_t       best_id0        = 0;
584
0
    int32_t       best_id1        = 0;
585
0
    const int32_t total_strengths = end_gi;
586
0
    memset(tot_mse, 0, sizeof(tot_mse));
587
    /* Loop over the filter blocks in the frame */
588
0
    for (i = 0; i < sb_count; i++) {
589
0
        int32_t  gi;
590
0
        uint64_t best_mse = (uint64_t)1 << 63;
591
        /* Loop over the already selected nb_strengths (Luma_strength,
592
           Chroma_strength) pairs, and find the pair that has the smallest mse
593
           (best_mse) for the current filter block.*/
594
        /* Find best mse among already selected options. */
595
0
        for (gi = 0; gi < nb_strengths; gi++) {
596
0
            uint64_t curr = mse[0][i][lev0[gi]];
597
0
            curr += mse[1][i][lev1[gi]];
598
0
            if (curr < best_mse) {
599
0
                best_mse = curr;
600
0
            }
601
0
        }
602
        /* Loop over the set of available (Luma_strength, Chroma_strength)
603
           pairs, identify any that provide an mse better than best_mse from the
604
           step above for the current filter block, and update any corresponding
605
           total mse (tot_mse[j][k]). */
606
        /* Find best mse when adding each possible new option. */
607
0
        for (j = start_gi; j < total_strengths; j++) {
608
0
            int32_t k;
609
0
            for (k = start_gi; k < total_strengths; k++) {
610
0
                uint64_t best = best_mse;
611
0
                uint64_t curr = mse[0][i][j];
612
0
                curr += mse[1][i][k];
613
0
                if (curr < best) {
614
0
                    best = curr;
615
0
                }
616
0
                tot_mse[j][k] += best;
617
0
            }
618
0
        }
619
0
    }
620
    /* Loop over the additionally searched (Luma_strength, Chroma_strength) pairs
621
       from the step above, and identify any such pair that provided the best mse for
622
       the whole frame. The identified pair would be added to the set of already selected pairs. */
623
0
    for (j = start_gi; j < total_strengths; j++) { // Loop over the additionally searched luma strengths
624
0
        int32_t k;
625
0
        for (k = start_gi; k < total_strengths; k++) { // Loop over the additionally searched chroma strengths
626
0
            if (tot_mse[j][k] < best_tot_mse) {
627
0
                best_tot_mse = tot_mse[j][k];
628
0
                best_id0     = j; // index for the best luma strength
629
0
                best_id1     = k; // index for the best chroma strength
630
0
            }
631
0
        }
632
0
    }
633
0
    lev0[nb_strengths] = best_id0; // Add the identified luma strength to the list of selected luma strengths
634
0
    lev1[nb_strengths] = best_id1; // Add the identified chroma strength to the list of selected chroma strengths
635
0
    return best_tot_mse;
636
0
}
637
638
/*
639
 * Search for the set of luma+chroma strengths that minimizes mse.
640
 *
641
 * Params:
642
 *
643
 * best_lev0 : Array of indices of selected luma strengths.
644
 * best_lev1 : Array of indices of selected chroma strengths.
645
 * nb_strengths : Number of selected (Luma_strength, Chroma_strength) pairs.
646
 * mse : Array of luma and chroma filtering mse values.
647
 * sb_count : Number of filter blocks in the frame.
648
 * start_gi : starting strength index for the search of the additional strengths.
649
 * end_gi : End index for the for the search of the additional strengths.
650
*/
651
static uint64_t joint_strength_search_dual(int32_t* best_lev0, int32_t* best_lev1, int32_t nb_strengths,
652
0
                                           uint64_t** mse[2], int32_t sb_count, int32_t start_gi, int32_t end_gi) {
653
0
    uint64_t best_tot_mse;
654
0
    int32_t  i;
655
0
    best_tot_mse = (uint64_t)1 << 63;
656
    /* Greedy search: add one strength options at a time.
657
658
    Determine nb_strengths (Luma_strength, Chroma_strength) pairs.
659
    The list of nb_strengths pairs is determined by adding one such pair at
660
    a time through the call to the function search_one_dual. When the
661
    function search_one_dual is called, the search accounts for the
662
    strength pairs that have already been added in the previous iteration of
663
    the loop below. The loop below returns in the end best_tot_mse
664
    representing the best filtering mse for the whole frame based on the
665
    selected list of best (Luma_strength, Chroma_strength) pairs.
666
    */
667
0
    for (i = 0; i < nb_strengths; i++) {
668
0
        best_tot_mse = svt_search_one_dual(best_lev0, best_lev1, i, mse, sb_count, start_gi, end_gi);
669
0
    }
670
    /* Performing further refinements on the search based on the results
671
    from the step above. Trying to refine the greedy search by reconsidering each
672
    already-selected option. */
673
0
    for (i = 0; i < 4 * nb_strengths; i++) {
674
0
        int32_t j;
675
0
        for (j = 0; j < nb_strengths - 1; j++) {
676
0
            best_lev0[j] = best_lev0[j + 1];
677
0
            best_lev1[j] = best_lev1[j + 1];
678
0
        }
679
0
        best_tot_mse = svt_search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count, start_gi, end_gi);
680
0
    }
681
0
    return best_tot_mse;
682
0
}
683
684
// This kernel is ported/adapted from libaom (AV1 reference implementation).
685
// Original logic inspired by aom_pick_cdef_from_qp().
686
// Adjusted to match SVT-AV1 data structures and pipeline integration.
687
static void svt_pick_cdef_from_qp(PictureParentControlSet* ppcs, int32_t is_screen_content, int32_t* pred_y_strength,
688
274
                                  int32_t* pred_uv_strength) {
689
274
    FrameHeader*  frm_hdr    = &ppcs->frm_hdr;
690
274
    const uint8_t bit_depth  = ppcs->enhanced_pic->bit_depth;
691
274
    const int32_t base_q_idx = frm_hdr->quantization_params.base_q_idx;
692
693
274
    int32_t q = svt_aom_ac_quant_qtx(base_q_idx, 0, bit_depth);
694
274
    q >>= (bit_depth - 8);
695
696
274
    int32_t y_f1 = 0, y_f2 = 0;
697
274
    int32_t uv_f1 = 0, uv_f2 = 0;
698
699
274
    const int32_t is_intra = (frm_hdr->frame_type == KEY_FRAME || frm_hdr->frame_type == INTRA_ONLY_FRAME);
700
701
274
    if (is_screen_content) {
702
0
        y_f1 = (int32_t)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
703
704
0
        y_f2 = (int32_t)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
705
706
0
        uv_f1 = (int32_t)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
707
708
0
        uv_f2 = (int32_t)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
709
274
    } else if (!is_intra) {
710
0
        y_f1 = (int32_t)roundf(q * q * -0.0000023593946f + q * 0.0068615186f + 0.02709886f);
711
712
0
        y_f2 = (int32_t)roundf(q * q * -0.00000057629734f + q * 0.0013993345f + 0.03831067f);
713
714
0
        uv_f1 = (int32_t)roundf(q * q * -0.0000007095069f + q * 0.0034628846f + 0.00887099f);
715
716
0
        uv_f2 = (int32_t)roundf(q * q * 0.00000023874085f + q * 0.00028223585f + 0.05576307f);
717
274
    } else { // Intra
718
274
        y_f1 = (int32_t)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f);
719
720
274
        y_f2 = (int32_t)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f);
721
722
274
        uv_f1 = (int32_t)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f);
723
724
274
        uv_f2 = (int32_t)roundf(q * q * 0.0000032651783f + q * 0.00035520183f + 0.00228092f);
725
274
    }
726
727
    // Clamp to AV1 limits
728
274
    y_f1  = clamp(y_f1, 0, 15);
729
274
    y_f2  = clamp(y_f2, 0, 3);
730
274
    uv_f1 = clamp(uv_f1, 0, 15);
731
274
    uv_f2 = clamp(uv_f2, 0, 3);
732
733
    // Pack primary + secondary
734
274
    *pred_y_strength  = y_f1 * CDEF_SEC_STRENGTHS + y_f2;
735
274
    *pred_uv_strength = uv_f1 * CDEF_SEC_STRENGTHS + uv_f2;
736
274
}
737
738
#if CLN_FINISH_CDEF
739
740
// Propagate cdef_strength to all 64x64 mi
741
3.38k
static INLINE void propagate_cdef_strength(PictureControlSet* pcs, int32_t sb_index, int8_t strength) {
742
3.38k
    MbModeInfo* mbmi    = pcs->mi_grid_base[sb_index];
743
3.38k
    mbmi->cdef_strength = strength;
744
3.38k
    switch (mbmi->bsize) {
745
0
    case BLOCK_128X128:
746
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64]->cdef_strength                                  = strength;
747
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength                 = strength;
748
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = strength;
749
0
        break;
750
0
    case BLOCK_128X64:
751
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64]->cdef_strength = strength;
752
0
        break;
753
0
    case BLOCK_64X128:
754
0
        pcs->mi_grid_base[sb_index + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = strength;
755
0
        break;
756
3.38k
    default:
757
3.38k
        break;
758
3.38k
    }
759
3.38k
}
760
761
274
#define CDEF_DAMPING_FROM_QP(base_q_idx) (3 + ((base_q_idx) >> 6))
762
763
274
void finish_cdef_search(PictureControlSet* pcs) {
764
274
    PictureParentControlSet* ppcs    = pcs->ppcs;
765
274
    FrameHeader*             frm_hdr = &ppcs->frm_hdr;
766
274
    Av1Common*               cm      = ppcs->av1_cm;
767
274
    int32_t                  mi_rows = ppcs->av1_cm->mi_rows;
768
274
    int32_t                  mi_cols = ppcs->av1_cm->mi_cols;
769
770
274
    int32_t  fbr, fbc;
771
274
    uint64_t best_tot_mse = (uint64_t)1 << 63;
772
274
    int32_t  sb_count;
773
274
    int32_t  nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
774
274
    int32_t  nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
775
776
274
    CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls;
777
778
274
    if (cdef_search_ctrls->use_qp_strength) {
779
274
#if OPT_SC_CDEF_QP
780
274
        const bool    allintra  = ppcs->scs->allintra;
781
274
        const uint8_t sc_class1 = ppcs->sc_class1;
782
274
        const uint8_t sc_class5 = ppcs->sc_class5;
783
274
        const uint8_t sc        = allintra ? sc_class5 : sc_class1;
784
274
        int           pred_y, pred_uv;
785
274
        svt_pick_cdef_from_qp(ppcs, sc, &pred_y, &pred_uv);
786
#else
787
        int pred_y, pred_uv;
788
        svt_pick_cdef_from_qp(ppcs, 0, &pred_y, &pred_uv);
789
#endif
790
274
        frm_hdr->cdef_params.cdef_bits           = 0;
791
274
        ppcs->nb_cdef_strengths                  = 1;
792
274
        frm_hdr->cdef_params.cdef_y_strength[0]  = pred_y;
793
274
        frm_hdr->cdef_params.cdef_uv_strength[0] = pred_uv;
794
274
        frm_hdr->cdef_params.cdef_damping        = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx);
795
796
1.26k
        for (fbr = 0; fbr < nvfb; ++fbr) {
797
4.81k
            for (fbc = 0; fbc < nhfb; ++fbc) {
798
3.82k
                const int32_t     sb_idx = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
799
3.82k
                const MbModeInfo* mbmi   = pcs->mi_grid_base[sb_idx];
800
801
3.82k
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
802
3.82k
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
803
0
                    continue;
804
0
                }
805
3.82k
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
806
438
                    continue;
807
438
                }
808
809
3.38k
                propagate_cdef_strength(pcs, sb_idx, 0);
810
3.38k
            }
811
988
        }
812
274
        return;
813
274
    }
814
815
0
    CdefReconControls* cdef_recon_ctrls           = &pcs->ppcs->cdef_recon_ctrls;
816
0
    const int          first_pass_fs_num          = cdef_search_ctrls->first_pass_fs_num;
817
0
    const int          default_second_pass_fs_num = cdef_search_ctrls->default_second_pass_fs_num;
818
819
0
    frm_hdr->cdef_params.cdef_bits           = 0;
820
0
    ppcs->nb_cdef_strengths                  = 1;
821
0
    frm_hdr->cdef_params.cdef_y_strength[0]  = cdef_search_ctrls->pred_y_f;
822
0
    frm_hdr->cdef_params.cdef_uv_strength[0] = cdef_search_ctrls->pred_uv_f;
823
0
    frm_hdr->cdef_params.cdef_damping        = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx);
824
825
0
    if (cdef_search_ctrls->use_reference_cdef_fs) {
826
0
        for (fbr = 0; fbr < nvfb; ++fbr) {
827
0
            for (fbc = 0; fbc < nhfb; ++fbc) {
828
0
                const int32_t     sb_idx = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
829
0
                const MbModeInfo* mbmi   = pcs->mi_grid_base[sb_idx];
830
831
0
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
832
0
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
833
0
                    continue;
834
0
                }
835
0
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
836
0
                    continue;
837
0
                }
838
839
0
                propagate_cdef_strength(pcs, sb_idx, 0);
840
0
            }
841
0
        }
842
0
        return;
843
0
    }
844
845
0
    int32_t* sb_index;
846
0
    EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb);
847
0
    assert(sb_index != NULL);
848
849
0
    uint64_t** mse[2];
850
0
    EB_MALLOC_ARRAY_NO_CHECK(mse[0], nvfb * nhfb);
851
0
    EB_MALLOC_ARRAY_NO_CHECK(mse[1], nvfb * nhfb);
852
0
    assert(mse[0] != NULL);
853
0
    assert(mse[1] != NULL);
854
855
0
    const int32_t start_gi = 0;
856
0
    const int32_t end_gi   = first_pass_fs_num + default_second_pass_fs_num;
857
0
    int32_t       i;
858
0
    int32_t       nb_strengths;
859
0
    int32_t       nb_strength_bits = 0;
860
0
    uint64_t      lambda;
861
0
    uint32_t      fast_lambda, full_lambda = 0;
862
863
0
    svt_aom_lambda_assign(pcs,
864
0
                          &fast_lambda,
865
0
                          &full_lambda,
866
0
                          pcs->ppcs->enhanced_pic->bit_depth,
867
0
                          pcs->ppcs->frm_hdr.quantization_params.base_q_idx,
868
0
                          false);
869
0
    lambda   = full_lambda;
870
0
    sb_count = 0;
871
872
0
    for (fbr = 0; fbr < nvfb; ++fbr) {
873
0
        for (fbc = 0; fbc < nhfb; ++fbc) {
874
0
            const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc];
875
0
            if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
876
0
                ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
877
0
                continue;
878
0
            }
879
0
            if (pcs->skip_cdef_seg[fbr * nhfb + fbc]) {
880
0
                continue;
881
0
            }
882
883
0
            mse[0][sb_count]   = pcs->mse_seg[0][fbr * nhfb + fbc];
884
0
            mse[1][sb_count]   = pcs->mse_seg[1][fbr * nhfb + fbc];
885
0
            sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
886
0
            sb_count++;
887
0
        }
888
0
    }
889
890
    // Scale down the cost of the (0,0) filter strength to bias selection towards off.  When off, we can save the cost of the application
891
0
    if (cdef_recon_ctrls->zero_fs_cost_bias) {
892
0
        const bool is_16bit = (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT);
893
0
        uint16_t   factor;
894
0
        for (i = 0; i < sb_count; i++) {
895
0
            if (is_16bit) {
896
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
897
0
                if (mse[0][i][0] < 5000) {
898
0
                    factor = MIN(factor - 10, 64);
899
0
                } else if (mse[0][i][0] < 10000) {
900
0
                    factor = MIN(factor - 5, 64);
901
0
                } else if (mse[0][i][0] > 25000) {
902
0
                    factor = MIN(factor + 1, 64);
903
0
                }
904
0
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
905
906
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
907
0
                if (mse[1][i][0] < 5000) {
908
0
                    factor = MIN(factor - 10, 64);
909
0
                } else if (mse[1][i][0] < 10000) {
910
0
                    factor = MIN(factor - 5, 64);
911
0
                } else if (mse[1][i][0] > 25000) {
912
0
                    factor = MIN(factor + 1, 64);
913
0
                }
914
0
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
915
0
            } else {
916
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
917
0
                if (mse[0][i][0] > 25000) {
918
0
                    factor = MIN(factor + 2, 64);
919
0
                } else if (mse[0][i][0] > 10000) {
920
0
                    factor = MIN(factor + 1, 64);
921
0
                }
922
0
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
923
924
0
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
925
0
                if (mse[1][i][0] > 25000) {
926
0
                    factor = MIN(factor + 2, 64);
927
0
                } else if (mse[1][i][0] > 10000) {
928
0
                    factor = MIN(factor + 1, 64);
929
0
                }
930
0
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
931
0
            }
932
0
        }
933
0
    }
934
935
    // Compute cost of (strength=0) to derive pcs->cdef_dist_dev
936
0
    int64_t zero_dist = 0;
937
0
    for (i = 0; i < sb_count; i++) {
938
0
        zero_dist += mse[0][i][0] + mse[1][i][0];
939
0
    }
940
0
    uint64_t zero_cost = RDCOST(lambda, av1_cost_literal(CDEF_STRENGTH_BITS * 2), zero_dist << 4);
941
942
    // Search for different numbers of signalling bits
943
0
    for (i = 0; i <= 3; i++) {
944
0
        int32_t best_lev0[CDEF_MAX_STRENGTHS] = {0};
945
0
        int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0};
946
0
        nb_strengths                          = 1 << i;
947
0
        uint64_t tot_mse                      = joint_strength_search_dual(
948
0
            best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi);
949
950
0
        const int      total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2;
951
0
        const uint64_t cost       = RDCOST(lambda, av1_cost_literal(total_bits), tot_mse * 16);
952
0
        if (cost < best_tot_mse) {
953
0
            best_tot_mse     = cost;
954
0
            nb_strength_bits = i;
955
0
            for (int32_t j = 0; j < 1 << nb_strength_bits; j++) {
956
0
                frm_hdr->cdef_params.cdef_y_strength[j]  = best_lev0[j];
957
0
                frm_hdr->cdef_params.cdef_uv_strength[j] = cdef_search_ctrls->uv_from_y ? best_lev0[j] : best_lev1[j];
958
0
            }
959
0
        }
960
0
    }
961
962
0
    pcs->cdef_dist_dev = zero_cost == 0 ? 0 : (int32_t)(1000 - ((1000 * best_tot_mse) / zero_cost));
963
0
    nb_strengths       = 1 << nb_strength_bits;
964
965
0
    frm_hdr->cdef_params.cdef_bits = nb_strength_bits;
966
0
    ppcs->nb_cdef_strengths        = nb_strengths;
967
968
    // Assign each filter block its best strength index
969
0
    for (i = 0; i < sb_count; i++) {
970
0
        int32_t  gi;
971
0
        int32_t  best_gi  = 0;
972
0
        uint64_t best_mse = (uint64_t)1 << 63;
973
0
        for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) {
974
0
            uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]] +
975
0
                mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]];
976
0
            if (curr < best_mse) {
977
0
                best_gi  = gi;
978
0
                best_mse = curr;
979
0
            }
980
0
        }
981
0
        propagate_cdef_strength(pcs, sb_index[i], (int8_t)best_gi);
982
0
    }
983
984
    // Map search indices back to actual filter strengths
985
0
    int filter_map[TOTAL_STRENGTHS] = {0};
986
0
    for (i = 0; i < first_pass_fs_num; i++) {
987
0
        filter_map[i] = cdef_search_ctrls->default_first_pass_fs[i];
988
0
    }
989
0
    for (i = 0; i < default_second_pass_fs_num; i++) {
990
0
        filter_map[first_pass_fs_num + i] = cdef_search_ctrls->default_second_pass_fs[i];
991
0
    }
992
993
0
    for (i = 0; i < ppcs->nb_cdef_strengths; i++) {
994
0
        frm_hdr->cdef_params.cdef_y_strength[i]  = filter_map[frm_hdr->cdef_params.cdef_y_strength[i]];
995
0
        frm_hdr->cdef_params.cdef_uv_strength[i] = filter_map[frm_hdr->cdef_params.cdef_uv_strength[i]];
996
0
    }
997
998
0
    frm_hdr->cdef_params.cdef_damping = CDEF_DAMPING_FROM_QP(frm_hdr->quantization_params.base_q_idx);
999
1000
0
    EB_FREE_ARRAY(mse[0]);
1001
0
    EB_FREE_ARRAY(mse[1]);
1002
    EB_FREE_ARRAY(sb_index);
1003
0
}
1004
#else
1005
void finish_cdef_search(PictureControlSet* pcs) {
1006
    PictureParentControlSet* ppcs    = pcs->ppcs;
1007
    FrameHeader*             frm_hdr = &ppcs->frm_hdr;
1008
    Av1Common*               cm      = ppcs->av1_cm;
1009
    int32_t                  mi_rows = ppcs->av1_cm->mi_rows;
1010
    int32_t                  mi_cols = ppcs->av1_cm->mi_cols;
1011
1012
    int32_t  fbr, fbc;
1013
    uint64_t best_tot_mse = (uint64_t)1 << 63;
1014
    int32_t  sb_count;
1015
    int32_t  nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1016
    int32_t  nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1017
    //CDEF Settings
1018
    CdefSearchControls* cdef_search_ctrls = &pcs->ppcs->cdef_search_ctrls;
1019
    if (cdef_search_ctrls->use_qp_strength) {
1020
        int pred_y, pred_uv;
1021
1022
        // Predict Y/UV strengths from QP
1023
        svt_pick_cdef_from_qp(ppcs, 0, &pred_y, &pred_uv);
1024
1025
        // Frame-level parameters
1026
        frm_hdr->cdef_params.cdef_bits           = 0; // only one strength index
1027
        ppcs->nb_cdef_strengths                  = 1;
1028
        frm_hdr->cdef_params.cdef_y_strength[0]  = pred_y;
1029
        frm_hdr->cdef_params.cdef_uv_strength[0] = pred_uv;
1030
        frm_hdr->cdef_params.cdef_damping        = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1031
1032
        // Assign strength index 0 to all valid 64x64 blocks
1033
        for (fbr = 0; fbr < nvfb; ++fbr) {
1034
            for (fbc = 0; fbc < nhfb; ++fbc) {
1035
                MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc];
1036
1037
                // Skip duplicated 64x64 blocks inside larger 128x128/128x64/64x128
1038
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
1039
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
1040
                    continue;
1041
                }
1042
1043
                // No filtering if the entire filter block is skipped
1044
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
1045
                    continue;
1046
                }
1047
1048
                mbmi->cdef_strength = 0;
1049
1050
                // Duplicate for large blocks in SVT MI map
1051
                switch (mbmi->bsize) {
1052
                case BLOCK_128X128:
1053
                    pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc + MI_SIZE_64X64]
1054
                        ->cdef_strength = 0;
1055
1056
                    pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc]
1057
                        ->cdef_strength = 0;
1058
1059
                    pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc +
1060
                                      MI_SIZE_64X64]
1061
                        ->cdef_strength = 0;
1062
                    break;
1063
1064
                case BLOCK_128X64:
1065
                    pcs->mi_grid_base[MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc + MI_SIZE_64X64]
1066
                        ->cdef_strength = 0;
1067
                    break;
1068
1069
                case BLOCK_64X128:
1070
                    pcs->mi_grid_base[(MI_SIZE_64X64 * fbr + MI_SIZE_64X64) * pcs->mi_stride + MI_SIZE_64X64 * fbc]
1071
                        ->cdef_strength = 0;
1072
                    break;
1073
1074
                default:
1075
                    break;
1076
                }
1077
            }
1078
        }
1079
        return;
1080
    }
1081
1082
    CdefReconControls* cdef_recon_ctrls           = &pcs->ppcs->cdef_recon_ctrls;
1083
    const int          first_pass_fs_num          = cdef_search_ctrls->first_pass_fs_num;
1084
    const int          default_second_pass_fs_num = cdef_search_ctrls->default_second_pass_fs_num;
1085
1086
    if (cdef_search_ctrls->use_reference_cdef_fs) {
1087
        int32_t* sb_index;
1088
        EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb);
1089
        int32_t best_gi = 0;
1090
        sb_count        = 0;
1091
        assert(sb_index != NULL);
1092
        for (fbr = 0; fbr < nvfb; ++fbr) {
1093
            for (fbc = 0; fbc < nhfb; ++fbc) {
1094
                const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc];
1095
                if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
1096
                    ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
1097
                    continue;
1098
                }
1099
                // No filtering if the entire filter block is skipped
1100
                if (svt_sb_all_skip(pcs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) {
1101
                    continue;
1102
                }
1103
                sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
1104
                sb_count++;
1105
            }
1106
        }
1107
        for (int32_t i = 0; i < sb_count; i++) {
1108
            pcs->mi_grid_base[sb_index[i]]->cdef_strength = (int8_t)best_gi;
1109
            //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
1110
            //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
1111
            BlockSize bsize = pcs->mi_grid_base[sb_index[i]]->bsize;
1112
            switch (bsize) {
1113
            case BLOCK_128X128:
1114
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength                  = (int8_t)best_gi;
1115
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1116
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength =
1117
                    (int8_t)best_gi;
1118
                break;
1119
            case BLOCK_128X64:
1120
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi;
1121
                break;
1122
            case BLOCK_64X128:
1123
                pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1124
                break;
1125
            default:
1126
                break;
1127
            }
1128
        }
1129
        frm_hdr->cdef_params.cdef_bits = 0;
1130
        ppcs->nb_cdef_strengths        = 1;
1131
        //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
1132
        int32_t pri_damping                      = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1133
        frm_hdr->cdef_params.cdef_damping        = pri_damping;
1134
        frm_hdr->cdef_params.cdef_y_strength[0]  = cdef_search_ctrls->pred_y_f;
1135
        frm_hdr->cdef_params.cdef_uv_strength[0] = cdef_search_ctrls->pred_uv_f;
1136
        EB_FREE_ARRAY(sb_index);
1137
        return;
1138
    }
1139
    int32_t* sb_index;
1140
    // to keep track of the sb_address in units of SBs (not mi_size)
1141
    int32_t* sb_addr;
1142
    EB_MALLOC_ARRAY_NO_CHECK(sb_index, nvfb * nhfb);
1143
    EB_MALLOC_ARRAY_NO_CHECK(sb_addr, nvfb * nhfb);
1144
    assert(sb_index != NULL);
1145
    assert(sb_addr != NULL);
1146
1147
    uint64_t** mse[2];
1148
    EB_MALLOC_ARRAY_NO_CHECK(mse[0], nvfb * nhfb);
1149
    EB_MALLOC_ARRAY_NO_CHECK(mse[1], nvfb * nhfb);
1150
    assert(mse[0] != NULL);
1151
    assert(mse[1] != NULL);
1152
1153
    int32_t  start_gi = 0;
1154
    int32_t  end_gi   = first_pass_fs_num + default_second_pass_fs_num;
1155
    int32_t  i;
1156
    int32_t  nb_strengths;
1157
    int32_t  nb_strength_bits;
1158
    uint64_t lambda;
1159
    uint32_t fast_lambda, full_lambda = 0;
1160
1161
    svt_aom_lambda_assign(pcs,
1162
                          &fast_lambda,
1163
                          &full_lambda,
1164
                          pcs->ppcs->enhanced_pic->bit_depth,
1165
                          pcs->ppcs->frm_hdr.quantization_params.base_q_idx,
1166
                          false);
1167
    lambda   = full_lambda;
1168
    sb_count = 0;
1169
    for (fbr = 0; fbr < nvfb; ++fbr) {
1170
        for (fbc = 0; fbc < nhfb; ++fbc) {
1171
            const MbModeInfo* mbmi = pcs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc];
1172
            if (((fbc & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64)) ||
1173
                ((fbr & 1) && (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_64X128))) {
1174
                continue;
1175
            }
1176
1177
            // No filtering if the entire filter block is skipped
1178
            if (pcs->skip_cdef_seg[fbr * nhfb + fbc]) {
1179
                continue;
1180
            }
1181
            // point to the MSE data
1182
            mse[0][sb_count] = pcs->mse_seg[0][fbr * nhfb + fbc];
1183
            mse[1][sb_count] = pcs->mse_seg[1][fbr * nhfb + fbc];
1184
1185
            sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs->mi_stride + MI_SIZE_64X64 * fbc;
1186
            sb_addr[sb_count]  = fbr * nhfb + fbc;
1187
            sb_count++;
1188
        }
1189
    }
1190
1191
    nb_strength_bits = 0;
1192
    // Scale down the cost of the (0,0) filter strength to bias selection towards off.
1193
    // When off, can save the cost of the application.
1194
    if (cdef_recon_ctrls->zero_fs_cost_bias) {
1195
        const bool is_16bit = (pcs->scs->static_config.encoder_bit_depth > EB_EIGHT_BIT);
1196
        uint16_t   factor;
1197
        for (i = 0; i < sb_count; i++) {
1198
            if (is_16bit) {
1199
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1200
                if (mse[0][i][0] < 5000) {
1201
                    factor = MIN(factor - 10, 64);
1202
                } else if (mse[0][i][0] < 10000) {
1203
                    factor = MIN(factor - 5, 64);
1204
                } else if (mse[0][i][0] > 25000) {
1205
                    factor = MIN(factor + 1, 64);
1206
                }
1207
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
1208
1209
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1210
                if (mse[1][i][0] < 5000) {
1211
                    factor = MIN(factor - 10, 64);
1212
                } else if (mse[1][i][0] < 10000) {
1213
                    factor = MIN(factor - 5, 64);
1214
                } else if (mse[1][i][0] > 25000) {
1215
                    factor = MIN(factor + 1, 64);
1216
                }
1217
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
1218
            } else {
1219
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1220
                if (mse[0][i][0] > 25000) {
1221
                    factor = MIN(factor + 2, 64);
1222
                } else if (mse[0][i][0] > 10000) {
1223
                    factor = MIN(factor + 1, 64);
1224
                }
1225
                mse[0][i][0] = (factor * mse[0][i][0]) >> 6;
1226
1227
                factor = cdef_recon_ctrls->zero_fs_cost_bias;
1228
                if (mse[1][i][0] > 25000) {
1229
                    factor = MIN(factor + 2, 64);
1230
                } else if (mse[1][i][0] > 10000) {
1231
                    factor = MIN(factor + 1, 64);
1232
                }
1233
1234
                mse[1][i][0] = (factor * mse[1][i][0]) >> 6;
1235
            }
1236
        }
1237
    }
1238
    // Compute cost of off to use in deriving pcs->cdef_dist_dev
1239
    int64_t zero_dist = 0;
1240
    for (i = 0; i < sb_count; i++) {
1241
        zero_dist += mse[0][i][0] + mse[1][i][0];
1242
    }
1243
    uint64_t zero_cost = RDCOST(lambda, av1_cost_literal(CDEF_STRENGTH_BITS * 2), zero_dist << 4);
1244
    /* Search for different number of signalling bits. */
1245
    for (i = 0; i <= 3; i++) {
1246
        int32_t best_lev0[CDEF_MAX_STRENGTHS] = {0};
1247
        int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0};
1248
        nb_strengths                          = 1 << i;
1249
        uint64_t tot_mse                      = joint_strength_search_dual(
1250
            best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi);
1251
        /* Count superblock signalling cost. */
1252
        const int      total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2;
1253
        const int      rate_cost  = av1_cost_literal(total_bits);
1254
        const uint64_t dist       = tot_mse * 16;
1255
        tot_mse                   = RDCOST(lambda, rate_cost, dist);
1256
        if (tot_mse < best_tot_mse) {
1257
            best_tot_mse     = tot_mse;
1258
            nb_strength_bits = i;
1259
            for (int32_t j = 0; j < 1 << nb_strength_bits; j++) {
1260
                frm_hdr->cdef_params.cdef_y_strength[j]  = best_lev0[j];
1261
                frm_hdr->cdef_params.cdef_uv_strength[j] = cdef_search_ctrls->uv_from_y ? best_lev0[j] : best_lev1[j];
1262
            }
1263
        }
1264
    }
1265
    pcs->cdef_dist_dev = zero_cost == 0 ? 0 : (int32_t)(1000 - ((1000 * best_tot_mse) / zero_cost));
1266
    nb_strengths       = 1 << nb_strength_bits;
1267
1268
    frm_hdr->cdef_params.cdef_bits = nb_strength_bits;
1269
    ppcs->nb_cdef_strengths        = nb_strengths;
1270
    for (i = 0; i < sb_count; i++) {
1271
        int32_t  gi;
1272
        int32_t  best_gi;
1273
        uint64_t best_mse = (uint64_t)1 << 63;
1274
        best_gi           = 0;
1275
        // skip this loop for SBs that are skipped in the search
1276
        for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) {
1277
            uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]];
1278
            curr += mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]];
1279
            if (curr < best_mse) {
1280
                best_gi  = gi;
1281
                best_mse = curr;
1282
            }
1283
        }
1284
1285
        pcs->mi_grid_base[sb_index[i]]->cdef_strength = (int8_t)best_gi;
1286
        //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
1287
        //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
1288
        BlockSize bsize = pcs->mi_grid_base[sb_index[i]]->bsize;
1289
1290
        switch (bsize) {
1291
        case BLOCK_128X128:
1292
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength                  = (int8_t)best_gi;
1293
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1294
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride + MI_SIZE_64X64]->cdef_strength = (int8_t)
1295
                best_gi;
1296
            break;
1297
        case BLOCK_128X64:
1298
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->cdef_strength = (int8_t)best_gi;
1299
            break;
1300
        case BLOCK_64X128:
1301
            pcs->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs->mi_stride]->cdef_strength = (int8_t)best_gi;
1302
            break;
1303
        default:
1304
            break;
1305
        }
1306
    }
1307
    int filter_map[TOTAL_STRENGTHS] = {0};
1308
    for (i = 0; i < first_pass_fs_num; i++) {
1309
        filter_map[i] = cdef_search_ctrls->default_first_pass_fs[i];
1310
    }
1311
    for (i = first_pass_fs_num; i < (first_pass_fs_num + default_second_pass_fs_num); i++) {
1312
        filter_map[i] = cdef_search_ctrls->default_second_pass_fs[i - first_pass_fs_num];
1313
    }
1314
1315
    for (i = 0; i < ppcs->nb_cdef_strengths; i++) {
1316
        frm_hdr->cdef_params.cdef_y_strength[i]  = filter_map[frm_hdr->cdef_params.cdef_y_strength[i]];
1317
        frm_hdr->cdef_params.cdef_uv_strength[i] = filter_map[frm_hdr->cdef_params.cdef_uv_strength[i]];
1318
    }
1319
    //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
1320
    frm_hdr->cdef_params.cdef_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1321
    EB_FREE_ARRAY(mse[0]);
1322
    EB_FREE_ARRAY(mse[1]);
1323
    EB_FREE_ARRAY(sb_index);
1324
    EB_FREE_ARRAY(sb_addr);
1325
}
1326
#endif