Coverage Report

Created: 2025-06-22 08:04

/src/aom/av1/encoder/pickcdef.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
#include <stdbool.h>
14
#include <string.h>
15
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/aom_scale_rtcd.h"
18
19
#include "aom/aom_integer.h"
20
#include "av1/common/av1_common_int.h"
21
#include "av1/common/reconinter.h"
22
#include "av1/encoder/encoder.h"
23
#include "av1/encoder/ethread.h"
24
#include "av1/encoder/pickcdef.h"
25
#include "av1/encoder/mcomp.h"
26
27
// Get primary and secondary filter strength for the given strength index and
28
// search method
29
static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
30
                                             int *pri_strength,
31
                                             int *sec_strength,
32
0
                                             int strength_idx) {
33
0
  const int tot_sec_filter =
34
0
      (pick_method == CDEF_FAST_SEARCH_LVL5)
35
0
          ? REDUCED_SEC_STRENGTHS_LVL5
36
0
          : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
37
0
                                                    : CDEF_SEC_STRENGTHS);
38
0
  const int pri_idx = strength_idx / tot_sec_filter;
39
0
  const int sec_idx = strength_idx % tot_sec_filter;
40
0
  *pri_strength = pri_idx;
41
0
  *sec_strength = sec_idx;
42
0
  if (pick_method == CDEF_FULL_SEARCH) return;
43
44
0
  switch (pick_method) {
45
0
    case CDEF_FAST_SEARCH_LVL1:
46
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
47
0
      *pri_strength = priconv_lvl1[pri_idx];
48
0
      break;
49
0
    case CDEF_FAST_SEARCH_LVL2:
50
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
51
0
      *pri_strength = priconv_lvl2[pri_idx];
52
0
      break;
53
0
    case CDEF_FAST_SEARCH_LVL3:
54
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
55
0
      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
56
0
      *pri_strength = priconv_lvl2[pri_idx];
57
0
      *sec_strength = secconv_lvl3[sec_idx];
58
0
      break;
59
0
    case CDEF_FAST_SEARCH_LVL4:
60
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
61
0
      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
62
0
      *pri_strength = priconv_lvl4[pri_idx];
63
0
      *sec_strength = secconv_lvl3[sec_idx];
64
0
      break;
65
0
    case CDEF_FAST_SEARCH_LVL5:
66
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
67
0
      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
68
0
      *pri_strength = priconv_lvl5[pri_idx];
69
0
      *sec_strength = secconv_lvl5[sec_idx];
70
0
      break;
71
0
    default: assert(0 && "Invalid CDEF search method");
72
0
  }
73
0
}
74
75
// Store CDEF filter strength calculated from strength index for given search
76
// method
77
#define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
78
0
  do {                                                                       \
79
0
    get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,   \
80
0
                              (strength_idx));                               \
81
0
    cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;        \
82
0
  } while (0)
83
84
/* Search for the best strength to add as an option, knowing we
85
   already selected nb_strengths options. */
86
static uint64_t search_one(int *lev, int nb_strengths,
87
                           uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
88
0
                           CDEF_PICK_METHOD pick_method) {
89
0
  uint64_t tot_mse[TOTAL_STRENGTHS];
90
0
  const int total_strengths = nb_cdef_strengths[pick_method];
91
0
  int i, j;
92
0
  uint64_t best_tot_mse = (uint64_t)1 << 63;
93
0
  int best_id = 0;
94
0
  memset(tot_mse, 0, sizeof(tot_mse));
95
0
  for (i = 0; i < sb_count; i++) {
96
0
    int gi;
97
0
    uint64_t best_mse = (uint64_t)1 << 63;
98
    /* Find best mse among already selected options. */
99
0
    for (gi = 0; gi < nb_strengths; gi++) {
100
0
      if (mse[i][lev[gi]] < best_mse) {
101
0
        best_mse = mse[i][lev[gi]];
102
0
      }
103
0
    }
104
    /* Find best mse when adding each possible new option. */
105
0
    for (j = 0; j < total_strengths; j++) {
106
0
      uint64_t best = best_mse;
107
0
      if (mse[i][j] < best) best = mse[i][j];
108
0
      tot_mse[j] += best;
109
0
    }
110
0
  }
111
0
  for (j = 0; j < total_strengths; j++) {
112
0
    if (tot_mse[j] < best_tot_mse) {
113
0
      best_tot_mse = tot_mse[j];
114
0
      best_id = j;
115
0
    }
116
0
  }
117
0
  lev[nb_strengths] = best_id;
118
0
  return best_tot_mse;
119
0
}
120
121
/* Search for the best luma+chroma strength to add as an option, knowing we
122
   already selected nb_strengths options. */
123
static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
124
                                uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
125
0
                                CDEF_PICK_METHOD pick_method) {
126
0
  uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
127
0
  int i, j;
128
0
  uint64_t best_tot_mse = (uint64_t)1 << 63;
129
0
  int best_id0 = 0;
130
0
  int best_id1 = 0;
131
0
  const int total_strengths = nb_cdef_strengths[pick_method];
132
0
  memset(tot_mse, 0, sizeof(tot_mse));
133
0
  for (i = 0; i < sb_count; i++) {
134
0
    int gi;
135
0
    uint64_t best_mse = (uint64_t)1 << 63;
136
    /* Find best mse among already selected options. */
137
0
    for (gi = 0; gi < nb_strengths; gi++) {
138
0
      uint64_t curr = mse[0][i][lev0[gi]];
139
0
      curr += mse[1][i][lev1[gi]];
140
0
      if (curr < best_mse) {
141
0
        best_mse = curr;
142
0
      }
143
0
    }
144
    /* Find best mse when adding each possible new option. */
145
0
    for (j = 0; j < total_strengths; j++) {
146
0
      int k;
147
0
      for (k = 0; k < total_strengths; k++) {
148
0
        uint64_t best = best_mse;
149
0
        uint64_t curr = mse[0][i][j];
150
0
        curr += mse[1][i][k];
151
0
        if (curr < best) best = curr;
152
0
        tot_mse[j][k] += best;
153
0
      }
154
0
    }
155
0
  }
156
0
  for (j = 0; j < total_strengths; j++) {
157
0
    int k;
158
0
    for (k = 0; k < total_strengths; k++) {
159
0
      if (tot_mse[j][k] < best_tot_mse) {
160
0
        best_tot_mse = tot_mse[j][k];
161
0
        best_id0 = j;
162
0
        best_id1 = k;
163
0
      }
164
0
    }
165
0
  }
166
0
  lev0[nb_strengths] = best_id0;
167
0
  lev1[nb_strengths] = best_id1;
168
0
  return best_tot_mse;
169
0
}
170
171
/* Search for the set of strengths that minimizes mse. */
172
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
173
                                      uint64_t mse[][TOTAL_STRENGTHS],
174
                                      int sb_count,
175
0
                                      CDEF_PICK_METHOD pick_method) {
176
0
  uint64_t best_tot_mse;
177
0
  int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
178
0
              pick_method <= CDEF_FAST_SEARCH_LVL5);
179
0
  int i;
180
0
  best_tot_mse = (uint64_t)1 << 63;
181
  /* Greedy search: add one strength options at a time. */
182
0
  for (i = 0; i < nb_strengths; i++) {
183
0
    best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
184
0
  }
185
  /* Trying to refine the greedy search by reconsidering each
186
     already-selected option. */
187
0
  if (!fast) {
188
0
    for (i = 0; i < 4 * nb_strengths; i++) {
189
0
      int j;
190
0
      for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
191
0
      best_tot_mse =
192
0
          search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
193
0
    }
194
0
  }
195
0
  return best_tot_mse;
196
0
}
197
198
/* Search for the set of luma+chroma strengths that minimizes mse. */
199
static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
200
                                           int nb_strengths,
201
                                           uint64_t (**mse)[TOTAL_STRENGTHS],
202
                                           int sb_count,
203
0
                                           CDEF_PICK_METHOD pick_method) {
204
0
  uint64_t best_tot_mse;
205
0
  int i;
206
0
  best_tot_mse = (uint64_t)1 << 63;
207
  /* Greedy search: add one strength options at a time. */
208
0
  for (i = 0; i < nb_strengths; i++) {
209
0
    best_tot_mse =
210
0
        search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
211
0
  }
212
  /* Trying to refine the greedy search by reconsidering each
213
     already-selected option. */
214
0
  for (i = 0; i < 4 * nb_strengths; i++) {
215
0
    int j;
216
0
    for (j = 0; j < nb_strengths - 1; j++) {
217
0
      best_lev0[j] = best_lev0[j + 1];
218
0
      best_lev1[j] = best_lev1[j + 1];
219
0
    }
220
0
    best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
221
0
                                   sb_count, pick_method);
222
0
  }
223
0
  return best_tot_mse;
224
0
}
225
226
static inline void init_src_params(int *src_stride, int *width, int *height,
227
                                   int *width_log2, int *height_log2,
228
0
                                   BLOCK_SIZE bsize) {
229
0
  *src_stride = block_size_wide[bsize];
230
0
  *width = block_size_wide[bsize];
231
0
  *height = block_size_high[bsize];
232
0
  *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
233
0
  *height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize];
234
0
}
235
#if CONFIG_AV1_HIGHBITDEPTH
236
/* Compute MSE only on the blocks we filtered. */
237
static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
238
                                         cdef_list *dlist, int cdef_count,
239
                                         BLOCK_SIZE bsize, int coeff_shift,
240
0
                                         int row, int col) {
241
0
  assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
242
0
         bsize == BLOCK_8X8);
243
0
  uint64_t sum = 0;
244
0
  int bi, bx, by;
245
0
  uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
246
0
  uint16_t *dst_buff = &dst16[row * dstride + col];
247
0
  int src_stride, width, height, width_log2, height_log2;
248
0
  init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
249
0
                  bsize);
250
0
  for (bi = 0; bi < cdef_count; bi++) {
251
0
    by = dlist[bi].by;
252
0
    bx = dlist[bi].bx;
253
0
    sum += aom_mse_wxh_16bit_highbd(
254
0
        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
255
0
        &src[bi << (height_log2 + width_log2)], src_stride, width, height);
256
0
  }
257
0
  return sum >> 2 * coeff_shift;
258
0
}
259
#endif
260
261
// Checks dual and quad block processing is applicable for block widths 8 and 4
262
// respectively.
263
static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width,
264
0
                                             int cdef_count, int bi, int iter) {
265
0
  assert(width == 8 || width == 4);
266
0
  const int blk_offset = (width == 8) ? 1 : 3;
267
0
  if ((iter + blk_offset) >= cdef_count) return 0;
268
269
0
  if (dlist[bi].by == dlist[bi + blk_offset].by &&
270
0
      dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
271
0
    return 1;
272
273
0
  return 0;
274
0
}
275
276
static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
277
                                  cdef_list *dlist, int cdef_count,
278
                                  BLOCK_SIZE bsize, int coeff_shift, int row,
279
0
                                  int col) {
280
0
  assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
281
0
         bsize == BLOCK_8X8);
282
0
  uint64_t sum = 0;
283
0
  int bi, bx, by;
284
0
  int iter = 0;
285
0
  int inc = 1;
286
0
  uint8_t *dst8 = (uint8_t *)dst;
287
0
  uint8_t *dst_buff = &dst8[row * dstride + col];
288
0
  int src_stride, width, height, width_log2, height_log2;
289
0
  init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
290
0
                  bsize);
291
292
0
  const int num_blks = 16 / width;
293
0
  for (bi = 0; bi < cdef_count; bi += inc) {
294
0
    by = dlist[bi].by;
295
0
    bx = dlist[bi].bx;
296
0
    uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
297
0
    uint8_t *dst_tmp =
298
0
        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
299
300
0
    if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
301
0
      sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
302
0
      iter += num_blks;
303
0
      inc = num_blks;
304
0
    } else {
305
0
      sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
306
0
                               height);
307
0
      iter += 1;
308
0
      inc = 1;
309
0
    }
310
0
  }
311
312
0
  return sum >> 2 * coeff_shift;
313
0
}
314
315
// Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
316
// region is outside frame boundary
317
static inline void fill_borders_for_fbs_on_frame_boundary(
318
    uint16_t *inbuf, int hfilt_size, int vfilt_size,
319
    bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
320
0
    bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
321
0
  if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
322
0
      !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
323
0
    return;
324
0
  if (is_fb_on_frm_bottom_boundary) {
325
    // Fill bottom region of the block
326
0
    const int buf_offset =
327
0
        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
328
0
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
329
0
              CDEF_VERY_LARGE);
330
0
  }
331
0
  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
332
0
    const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
333
    // Fill bottom-left region of the block
334
0
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
335
0
              CDEF_VERY_LARGE);
336
0
  }
337
0
  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
338
0
    const int buf_offset =
339
0
        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
340
    // Fill bottom-right region of the block
341
0
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
342
0
              CDEF_VERY_LARGE);
343
0
  }
344
0
  if (is_fb_on_frm_top_boundary) {
345
    // Fill top region of the block
346
0
    fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
347
0
              CDEF_VERY_LARGE);
348
0
  }
349
0
  if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
350
    // Fill top-left region of the block
351
0
    fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
352
0
  }
353
0
  if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
354
0
    const int buf_offset = hfilt_size + CDEF_HBORDER;
355
    // Fill top-right region of the block
356
0
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
357
0
              CDEF_VERY_LARGE);
358
0
  }
359
0
  if (is_fb_on_frm_left_boundary) {
360
0
    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
361
    // Fill left region of the block
362
0
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
363
0
              CDEF_VERY_LARGE);
364
0
  }
365
0
  if (is_fb_on_frm_right_boundary) {
366
0
    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
367
    // Fill right region of the block
368
0
    fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
369
0
              vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
370
0
  }
371
0
}
372
373
// Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
374
// after CDEF filtering in single function call
375
static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
376
    cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
377
0
    int subsampling_y) {
378
  // TODO(Ranjit): Extend the optimization for 422
379
0
  if (subsampling_x != subsampling_y) return 1;
380
381
  // Combining more blocks seems to increase encode time due to increase in
382
  // control code
383
0
  if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
384
0
      dlist[bi].bx + 3 == dlist[bi + 3].bx) {
385
    /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
386
     * logic if y co-ordinates match and x co-ordinates are
387
     * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
388
0
    return 4;
389
0
  }
390
0
  if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
391
0
      dlist[bi].bx + 1 == dlist[bi + 1].bx) {
392
    /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
393
     * logic if their y co-ordinates match and x co-ordinates are
394
     * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
395
0
    return 2;
396
0
  }
397
0
  return 1;
398
0
}
399
400
// Returns the block error after CDEF filtering for a given strength
401
static inline uint64_t get_filt_error(
402
    const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
403
    cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
404
    int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
405
    int ref_stride, int row, int col, int pri_strength, int sec_strength,
406
0
    int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
407
0
  uint64_t curr_sse = 0;
408
0
  const BLOCK_SIZE plane_bsize =
409
0
      get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
410
0
  const int bw_log2 = 3 - pd->subsampling_x;
411
0
  const int bh_log2 = 3 - pd->subsampling_y;
412
413
  // TODO(Ranjit): Extend this optimization for HBD
414
0
  if (!cdef_search_ctx->use_highbitdepth) {
415
    // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
416
    // error at CDEF block level
417
0
    const int tot_blk_count =
418
0
        (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
419
0
        (bw_log2 + bh_log2);
420
0
    if (cdef_count == tot_blk_count) {
421
      // Calculate the offset in the buffer based on block position
422
0
      const FULLPEL_MV this_mv = { row, col };
423
0
      const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
424
0
      if (pri_strength == 0 && sec_strength == 0) {
425
        // When CDEF strength is zero, filtering is not applied. Hence
426
        // error is calculated between source and unfiltered pixels
427
0
        curr_sse =
428
0
            aom_sse(&ref_buffer[buf_offset], ref_stride,
429
0
                    get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
430
0
                    block_size_wide[plane_bsize], block_size_high[plane_bsize]);
431
0
      } else {
432
0
        DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
433
434
0
        av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
435
0
                           cdef_search_ctx->xdec[pli],
436
0
                           cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
437
0
                           dlist, cdef_count, pri_strength,
438
0
                           sec_strength + (sec_strength == 3),
439
0
                           cdef_search_ctx->damping, coeff_shift);
440
0
        curr_sse =
441
0
            aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
442
0
                    (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
443
0
                    block_size_high[plane_bsize]);
444
0
      }
445
0
    } else {
446
      // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
447
      // functions produce 8-bit output and the error is calculated in 8-bit
448
      // domain
449
0
      if (pri_strength == 0 && sec_strength == 0) {
450
0
        int num_error_calc_filt_units = 1;
451
0
        for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
452
0
          const uint8_t by = dlist[bi].by;
453
0
          const uint8_t bx = dlist[bi].bx;
454
0
          const int16_t by_pos = (by << bh_log2);
455
0
          const int16_t bx_pos = (bx << bw_log2);
456
          // Calculate the offset in the buffer based on block position
457
0
          const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
458
0
          const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
459
0
          num_error_calc_filt_units = get_error_calc_width_in_filt_units(
460
0
              dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
461
0
          curr_sse += aom_sse(
462
0
              &ref_buffer[buf_offset], ref_stride,
463
0
              get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
464
0
              num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
465
0
        }
466
0
      } else {
467
0
        DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
468
0
        av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
469
0
                           cdef_search_ctx->xdec[pli],
470
0
                           cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
471
0
                           dlist, cdef_count, pri_strength,
472
0
                           sec_strength + (sec_strength == 3),
473
0
                           cdef_search_ctx->damping, coeff_shift);
474
0
        int num_error_calc_filt_units = 1;
475
0
        for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
476
0
          const uint8_t by = dlist[bi].by;
477
0
          const uint8_t bx = dlist[bi].bx;
478
0
          const int16_t by_pos = (by << bh_log2);
479
0
          const int16_t bx_pos = (bx << bw_log2);
480
          // Calculate the offset in the buffer based on block position
481
0
          const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
482
0
          const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
483
0
          const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
484
0
          const int tmp_buf_offset =
485
0
              get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
486
0
          num_error_calc_filt_units = get_error_calc_width_in_filt_units(
487
0
              dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
488
0
          curr_sse += aom_sse(
489
0
              &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
490
0
              (1 << MAX_SB_SIZE_LOG2),
491
0
              num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
492
0
        }
493
0
      }
494
0
    }
495
0
  } else {
496
0
    DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
497
498
0
    av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
499
0
                       cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
500
0
                       dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
501
0
                       sec_strength + (sec_strength == 3),
502
0
                       cdef_search_ctx->damping, coeff_shift);
503
0
    curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
504
0
        ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
505
0
        cdef_search_ctx->bsize[pli], coeff_shift, row, col);
506
0
  }
507
0
  return curr_sse;
508
0
}
509
510
// Calculates MSE at block level.
511
// Inputs:
512
//   cdef_search_ctx: Pointer to the structure containing parameters related to
513
//   CDEF search context.
514
//   fbr: Row index in units of 64x64 block
515
//   fbc: Column index in units of 64x64 block
516
// Returns:
517
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
518
void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
519
                             struct aom_internal_error_info *error_info,
520
0
                             int fbr, int fbc, int sb_count) {
521
  // TODO(aomedia:3276): Pass error_info to the low-level functions as required
522
  // in future to handle error propagation.
523
0
  (void)error_info;
524
0
  const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
525
0
  const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
526
0
  const int coeff_shift = cdef_search_ctx->coeff_shift;
527
0
  const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
528
0
  const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
529
530
  // Declare and initialize the temporary buffers.
531
0
  DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
532
0
  cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
533
0
  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
534
0
  int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
535
0
  uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
536
0
  int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
537
0
  int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
538
0
  int hb_step = 1, vb_step = 1;
539
0
  BLOCK_SIZE bs;
540
541
0
  const MB_MODE_INFO *const mbmi =
542
0
      mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
543
0
                              MI_SIZE_64X64 * fbc];
544
545
0
  uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
546
0
                                        ref->v_buffer };
547
0
  int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
548
0
                                   ref->uv_stride };
549
550
0
  if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
551
0
      mbmi->bsize == BLOCK_64X128) {
552
0
    bs = mbmi->bsize;
553
0
    if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
554
0
      nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
555
0
      hb_step = 2;
556
0
    }
557
0
    if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
558
0
      nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
559
0
      vb_step = 2;
560
0
    }
561
0
  } else {
562
0
    bs = BLOCK_64X64;
563
0
  }
564
  // Get number of 8x8 blocks which are not skip. Cdef processing happens for
565
  // 8x8 blocks which are not skip.
566
0
  const int cdef_count = av1_cdef_compute_sb_list(
567
0
      mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
568
0
  const bool is_fb_on_frm_left_boundary = (fbc == 0);
569
0
  const bool is_fb_on_frm_right_boundary =
570
0
      (fbc + hb_step == cdef_search_ctx->nhfb);
571
0
  const bool is_fb_on_frm_top_boundary = (fbr == 0);
572
0
  const bool is_fb_on_frm_bottom_boundary =
573
0
      (fbr + vb_step == cdef_search_ctx->nvfb);
574
0
  const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
575
0
  const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
576
0
  int dirinit = 0;
577
0
  for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
578
    /* We avoid filtering the pixels for which some of the pixels to
579
    average are outside the frame. We could change the filter instead,
580
    but it would add special cases for any future vectorization. */
581
0
    const int hfilt_size = (nhb << mi_wide_l2[pli]);
582
0
    const int vfilt_size = (nvb << mi_high_l2[pli]);
583
0
    const int ysize =
584
0
        vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
585
0
    const int xsize =
586
0
        hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
587
0
    const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
588
0
    const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
589
0
    struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
590
0
    cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
591
0
                             pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
592
0
                             ysize, xsize);
593
0
    fill_borders_for_fbs_on_frame_boundary(
594
0
        inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
595
0
        is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
596
0
        is_fb_on_frm_bottom_boundary);
597
0
    for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
598
0
      int pri_strength, sec_strength;
599
0
      get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
600
0
                                &sec_strength, gi);
601
0
      const uint64_t curr_mse = get_filt_error(
602
0
          cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
603
0
          ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
604
0
          pli, coeff_shift, bs);
605
0
      if (pli < 2)
606
0
        cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
607
0
      else
608
0
        cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
609
0
    }
610
0
  }
611
0
  cdef_search_ctx->sb_index[sb_count] =
612
0
      MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
613
0
}
614
615
// MSE calculation at frame level.
616
// Inputs:
617
//   cdef_search_ctx: Pointer to the structure containing parameters related to
618
//   CDEF search context.
619
// Returns:
620
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
621
static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
622
0
                                struct aom_internal_error_info *error_info) {
623
  // Loop over each sb.
624
0
  for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
625
0
    for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
626
      // Checks if cdef processing can be skipped for particular sb.
627
0
      if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
628
      // Calculate mse for each sb and store the relevant sb index.
629
0
      av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
630
0
                              cdef_search_ctx->sb_count);
631
0
      cdef_search_ctx->sb_count++;
632
0
    }
633
0
  }
634
0
}
635
636
// Allocates memory for members of CdefSearchCtx.
637
// Inputs:
638
//   cdef_search_ctx: Pointer to the structure containing parameters
639
//   related to CDEF search context.
640
// Returns:
641
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
642
0
static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) {
643
0
  const int nvfb = cdef_search_ctx->nvfb;
644
0
  const int nhfb = cdef_search_ctx->nhfb;
645
0
  CHECK_MEM_ERROR(
646
0
      cm, cdef_search_ctx->sb_index,
647
0
      aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0])));
648
0
  cdef_search_ctx->sb_count = 0;
649
0
  CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0],
650
0
                  aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
651
0
  CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1],
652
0
                  aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
653
0
}
654
655
// Deallocates the memory allocated for members of CdefSearchCtx.
656
// Inputs:
657
//   cdef_search_ctx: Pointer to the structure containing parameters
658
//   related to CDEF search context.
659
// Returns:
660
//   Nothing will be returned.
661
0
void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
662
0
  if (cdef_search_ctx) {
663
0
    aom_free(cdef_search_ctx->mse[0]);
664
0
    cdef_search_ctx->mse[0] = NULL;
665
0
    aom_free(cdef_search_ctx->mse[1]);
666
0
    cdef_search_ctx->mse[1] = NULL;
667
0
    aom_free(cdef_search_ctx->sb_index);
668
0
    cdef_search_ctx->sb_index = NULL;
669
0
  }
670
0
}
671
672
// Initialize the parameters related to CDEF search context.
673
// Inputs:
674
//   frame: Pointer to compressed frame buffer
675
//   ref: Pointer to the frame buffer holding the source frame
676
//   cm: Pointer to top level common structure
677
//   xd: Pointer to common current coding block structure
678
//   cdef_search_ctx: Pointer to the structure containing parameters related to
679
//   CDEF search context.
680
//   pick_method: Search method used to select CDEF parameters
681
// Returns:
682
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
683
static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
684
                                    const YV12_BUFFER_CONFIG *ref,
685
                                    AV1_COMMON *cm, MACROBLOCKD *xd,
686
                                    CdefSearchCtx *cdef_search_ctx,
687
0
                                    CDEF_PICK_METHOD pick_method) {
688
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
689
0
  const int num_planes = av1_num_planes(cm);
690
0
  cdef_search_ctx->mi_params = &cm->mi_params;
691
0
  cdef_search_ctx->ref = ref;
692
0
  cdef_search_ctx->nvfb =
693
0
      (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
694
0
  cdef_search_ctx->nhfb =
695
0
      (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
696
0
  cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
697
0
  cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
698
0
  cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
699
0
  cdef_search_ctx->num_planes = num_planes;
700
0
  cdef_search_ctx->pick_method = pick_method;
701
0
  cdef_search_ctx->sb_count = 0;
702
0
  cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
703
0
  av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
704
0
                       num_planes);
705
  // Initialize plane wise information.
706
0
  for (int pli = 0; pli < num_planes; pli++) {
707
0
    cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
708
0
    cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
709
0
    cdef_search_ctx->bsize[pli] =
710
0
        cdef_search_ctx->ydec[pli]
711
0
            ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
712
0
            : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
713
0
    cdef_search_ctx->mi_wide_l2[pli] =
714
0
        MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
715
0
    cdef_search_ctx->mi_high_l2[pli] =
716
0
        MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
717
0
    cdef_search_ctx->plane[pli] = xd->plane[pli];
718
0
  }
719
  // Function pointer initialization.
720
0
#if CONFIG_AV1_HIGHBITDEPTH
721
0
  if (cm->seq_params->use_highbitdepth) {
722
0
    cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
723
0
    cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
724
0
  } else {
725
0
    cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
726
0
    cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
727
0
  }
728
#else
729
  cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
730
  cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
731
#endif
732
0
}
733
734
void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
735
0
                           int is_screen_content) {
736
0
  const int bd = cm->seq_params->bit_depth;
737
0
  const int q =
738
0
      av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
739
0
  CdefInfo *const cdef_info = &cm->cdef_info;
740
  // Check the speed feature to avoid extra signaling.
741
0
  if (skip_cdef) {
742
0
    cdef_info->cdef_bits = 1;
743
0
    cdef_info->nb_cdef_strengths = 2;
744
0
  } else {
745
0
    cdef_info->cdef_bits = 0;
746
0
    cdef_info->nb_cdef_strengths = 1;
747
0
  }
748
0
  cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
749
750
0
  int predicted_y_f1 = 0;
751
0
  int predicted_y_f2 = 0;
752
0
  int predicted_uv_f1 = 0;
753
0
  int predicted_uv_f2 = 0;
754
0
  if (is_screen_content) {
755
0
    predicted_y_f1 =
756
0
        (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
757
0
    predicted_y_f2 =
758
0
        (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
759
0
    predicted_uv_f1 =
760
0
        (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
761
0
    predicted_uv_f2 =
762
0
        (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
763
0
    predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
764
0
    predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
765
0
    predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
766
0
    predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
767
0
  } else {
768
0
    if (!frame_is_intra_only(cm)) {
769
0
      predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
770
0
                                         q * 0.0068615186f + 0.02709886f),
771
0
                             0, 15);
772
0
      predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
773
0
                                         q * 0.0013993345f + 0.03831067f),
774
0
                             0, 3);
775
0
      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
776
0
                                          q * 0.0034628846f + 0.00887099f),
777
0
                              0, 15);
778
0
      predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
779
0
                                          q * 0.00028223585f + 0.05576307f),
780
0
                              0, 3);
781
0
    } else {
782
0
      predicted_y_f1 = clamp(
783
0
          (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
784
0
          0, 15);
785
0
      predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
786
0
                                         q * 0.0027798624f + 0.0079405f),
787
0
                             0, 3);
788
0
      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
789
0
                                          q * 0.012892405f - 0.00748388f),
790
0
                              0, 15);
791
0
      predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
792
0
                                          q * 0.00035520183f + 0.00228092f),
793
0
                              0, 3);
794
0
    }
795
0
  }
796
0
  cdef_info->cdef_strengths[0] =
797
0
      predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
798
0
  cdef_info->cdef_uv_strengths[0] =
799
0
      predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
800
801
  // mbmi->cdef_strength is already set in the encoding stage. We don't need to
802
  // set it again here.
803
0
  if (skip_cdef) {
804
0
    cdef_info->cdef_strengths[1] = 0;
805
0
    cdef_info->cdef_uv_strengths[1] = 0;
806
0
    return;
807
0
  }
808
809
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
810
0
  const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
811
0
  const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
812
0
  MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
813
  // mbmi is NULL when real-time rate control library is used.
814
0
  if (!mbmi) return;
815
0
  for (int r = 0; r < nvfb; ++r) {
816
0
    for (int c = 0; c < nhfb; ++c) {
817
0
      MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
818
0
      current_mbmi->cdef_strength = 0;
819
0
    }
820
0
    mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
821
0
  }
822
0
}
823
824
0
void av1_cdef_search(AV1_COMP *cpi) {
825
0
  AV1_COMMON *cm = &cpi->common;
826
0
  CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
827
828
0
  assert(cdef_control != CDEF_NONE);
829
  // For CDEF_ADAPTIVE, turning off CDEF around qindex 32 was best for still
830
  // pictures
831
0
  if ((cdef_control == CDEF_REFERENCE &&
832
0
       cpi->ppi->rtc_ref.non_reference_frame) ||
833
0
      (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
834
0
       (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
835
0
       cpi->oxcf.rc_cfg.cq_level <= 32)) {
836
0
    CdefInfo *const cdef_info = &cm->cdef_info;
837
0
    cdef_info->nb_cdef_strengths = 1;
838
0
    cdef_info->cdef_bits = 0;
839
0
    cdef_info->cdef_strengths[0] = 0;
840
0
    cdef_info->cdef_uv_strengths[0] = 0;
841
0
    return;
842
0
  }
843
844
  // Indicate if external RC is used for testing
845
0
  const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl;
846
0
  if (rtc_ext_rc) {
847
0
    av1_pick_cdef_from_qp(cm, 0, 0);
848
0
    return;
849
0
  }
850
0
  CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method;
851
0
  if (pick_method == CDEF_PICK_FROM_Q) {
852
0
    const int use_screen_content_model =
853
0
        cm->quant_params.base_qindex >
854
0
            AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
855
0
                   cpi->rc.best_quality + 5) &&
856
0
        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
857
0
    av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb,
858
0
                          use_screen_content_model);
859
0
    return;
860
0
  }
861
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
862
0
  const int damping = 3 + (cm->quant_params.base_qindex >> 6);
863
0
  const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
864
0
                    pick_method <= CDEF_FAST_SEARCH_LVL5);
865
0
  const int num_planes = av1_num_planes(cm);
866
0
  MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
867
868
0
  if (!cpi->cdef_search_ctx)
869
0
    CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx,
870
0
                    aom_malloc(sizeof(*cpi->cdef_search_ctx)));
871
0
  CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx;
872
873
  // Initialize parameters related to CDEF search context.
874
0
  cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx,
875
0
                   pick_method);
876
  // Allocate CDEF search context buffers.
877
0
  cdef_alloc_data(cm, cdef_search_ctx);
878
  // Frame level mse calculation.
879
0
  if (cpi->mt_info.num_workers > 1) {
880
0
    av1_cdef_mse_calc_frame_mt(cpi);
881
0
  } else {
882
0
    cdef_mse_calc_frame(cdef_search_ctx, cm->error);
883
0
  }
884
885
  /* Search for different number of signaling bits. */
886
0
  int nb_strength_bits = 0;
887
0
  uint64_t best_rd = UINT64_MAX;
888
0
  CdefInfo *const cdef_info = &cm->cdef_info;
889
0
  int sb_count = cdef_search_ctx->sb_count;
890
0
  uint64_t(*mse[2])[TOTAL_STRENGTHS];
891
0
  mse[0] = cdef_search_ctx->mse[0];
892
0
  mse[1] = cdef_search_ctx->mse[1];
893
  /* Calculate the maximum number of bits required to signal CDEF strengths at
894
   * block level */
895
0
  const int total_strengths = nb_cdef_strengths[pick_method];
896
0
  const int joint_strengths =
897
0
      num_planes > 1 ? total_strengths * total_strengths : total_strengths;
898
0
  const int max_signaling_bits =
899
0
      joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
900
0
  int rdmult = cpi->td.mb.rdmult;
901
0
  for (int i = 0; i <= 3; i++) {
902
0
    if (i > max_signaling_bits) break;
903
0
    int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
904
0
    int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
905
0
    const int nb_strengths = 1 << i;
906
0
    uint64_t tot_mse;
907
0
    if (num_planes > 1) {
908
0
      tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
909
0
                                           mse, sb_count, pick_method);
910
0
    } else {
911
0
      tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
912
0
                                      pick_method);
913
0
    }
914
915
0
    const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
916
0
                                              (num_planes > 1 ? 2 : 1);
917
0
    const int rate_cost = av1_cost_literal(total_bits);
918
0
    const uint64_t dist = tot_mse * 16;
919
0
    const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
920
0
    if (rd < best_rd) {
921
0
      best_rd = rd;
922
0
      nb_strength_bits = i;
923
0
      memcpy(cdef_info->cdef_strengths, best_lev0,
924
0
             nb_strengths * sizeof(best_lev0[0]));
925
0
      if (num_planes > 1) {
926
0
        memcpy(cdef_info->cdef_uv_strengths, best_lev1,
927
0
               nb_strengths * sizeof(best_lev1[0]));
928
0
      }
929
0
    }
930
0
  }
931
932
0
  cdef_info->cdef_bits = nb_strength_bits;
933
0
  cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
934
0
  for (int i = 0; i < sb_count; i++) {
935
0
    uint64_t best_mse = UINT64_MAX;
936
0
    int best_gi = 0;
937
0
    for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
938
0
      uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
939
0
      if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
940
0
      if (curr < best_mse) {
941
0
        best_gi = gi;
942
0
        best_mse = curr;
943
0
      }
944
0
    }
945
0
    mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
946
0
        best_gi;
947
0
  }
948
0
  if (fast) {
949
0
    for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
950
0
      const int luma_strength = cdef_info->cdef_strengths[j];
951
0
      const int chroma_strength = cdef_info->cdef_uv_strengths[j];
952
0
      int pri_strength, sec_strength;
953
954
0
      STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
955
0
                                 luma_strength);
956
0
      STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
957
0
                                 chroma_strength);
958
0
    }
959
0
  }
960
961
  // For CDEF_ADAPTIVE, set primary and secondary CDEF at reduced strength for
962
  // qindexes 33 through 220.
963
  // Note 1: for odd strengths, the 0.5 discarded by ">> 1" is a significant
964
  // part of the strength when the strength is small, and because there are
965
  // few strength levels, odd strengths are reduced significantly more than a
966
  // half. This is intended behavior for reduced strength.
967
  // For example: a pri strength of 3 becomes 1, and a sec strength of 1
968
  // becomes 0.
969
  // Note 2: a (signaled) sec strength value of 3 is special as it results in an
970
  // actual sec strength of 4. We tried adding +1 to the sec strength 3 so it
971
  // maps to a reduced sec strength of 2. However, on Daala's subset1, the
972
  // resulting SSIMULACRA 2 scores were either exactly the same (at cpu-used 6),
973
  // or within noise level (at cpu-used 3). Given that there were no discernible
974
  // improvements, this special mapping was left out for reduced strength.
975
0
  if (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
976
0
      (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
977
0
      cpi->oxcf.rc_cfg.cq_level <= 220) {
978
0
    for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
979
0
      const int luma_strength = cdef_info->cdef_strengths[j];
980
0
      const int chroma_strength = cdef_info->cdef_uv_strengths[j];
981
982
0
      const int new_pri_luma_strength =
983
0
          (luma_strength / CDEF_SEC_STRENGTHS) >> 1;
984
0
      const int new_sec_luma_strength =
985
0
          (luma_strength % CDEF_SEC_STRENGTHS) >> 1;
986
0
      const int new_pri_chroma_strength =
987
0
          (chroma_strength / CDEF_SEC_STRENGTHS) >> 1;
988
0
      const int new_sec_chroma_strength =
989
0
          (chroma_strength % CDEF_SEC_STRENGTHS) >> 1;
990
991
0
      cdef_info->cdef_strengths[j] =
992
0
          new_pri_luma_strength * CDEF_SEC_STRENGTHS + new_sec_luma_strength;
993
0
      cdef_info->cdef_uv_strengths[j] =
994
0
          new_pri_chroma_strength * CDEF_SEC_STRENGTHS +
995
0
          new_sec_chroma_strength;
996
0
    }
997
0
  }
998
999
0
  cdef_info->cdef_damping = damping;
1000
  // Deallocate CDEF search context buffers.
1001
0
  av1_cdef_dealloc_data(cdef_search_ctx);
1002
0
}