Coverage Report

Created: 2025-08-29 07:08

/src/libavif/ext/aom/av1/encoder/pickcdef.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
#include <stdbool.h>
14
#include <string.h>
15
16
#include "config/aom_dsp_rtcd.h"
17
#include "config/aom_scale_rtcd.h"
18
19
#include "aom/aom_integer.h"
20
#include "av1/common/av1_common_int.h"
21
#include "av1/common/reconinter.h"
22
#include "av1/encoder/encoder.h"
23
#include "av1/encoder/ethread.h"
24
#include "av1/encoder/pickcdef.h"
25
#include "av1/encoder/mcomp.h"
26
27
// Get primary and secondary filter strength for the given strength index and
28
// search method
29
static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
30
                                             int *pri_strength,
31
                                             int *sec_strength,
32
827k
                                             int strength_idx) {
33
827k
  const int tot_sec_filter =
34
827k
      (pick_method == CDEF_FAST_SEARCH_LVL5)
35
827k
          ? REDUCED_SEC_STRENGTHS_LVL5
36
827k
          : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
37
18.4E
                                                    : CDEF_SEC_STRENGTHS);
38
827k
  const int pri_idx = strength_idx / tot_sec_filter;
39
827k
  const int sec_idx = strength_idx % tot_sec_filter;
40
827k
  *pri_strength = pri_idx;
41
827k
  *sec_strength = sec_idx;
42
827k
  if (pick_method == CDEF_FULL_SEARCH) return;
43
44
827k
  switch (pick_method) {
45
0
    case CDEF_FAST_SEARCH_LVL1:
46
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
47
0
      *pri_strength = priconv_lvl1[pri_idx];
48
0
      break;
49
0
    case CDEF_FAST_SEARCH_LVL2:
50
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
51
0
      *pri_strength = priconv_lvl2[pri_idx];
52
0
      break;
53
0
    case CDEF_FAST_SEARCH_LVL3:
54
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
55
0
      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
56
0
      *pri_strength = priconv_lvl2[pri_idx];
57
0
      *sec_strength = secconv_lvl3[sec_idx];
58
0
      break;
59
827k
    case CDEF_FAST_SEARCH_LVL4:
60
827k
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
61
827k
      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
62
827k
      *pri_strength = priconv_lvl4[pri_idx];
63
827k
      *sec_strength = secconv_lvl3[sec_idx];
64
827k
      break;
65
0
    case CDEF_FAST_SEARCH_LVL5:
66
0
      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
67
0
      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
68
0
      *pri_strength = priconv_lvl5[pri_idx];
69
0
      *sec_strength = secconv_lvl5[sec_idx];
70
0
      break;
71
0
    default: assert(0 && "Invalid CDEF search method");
72
827k
  }
73
827k
}
74
75
// Store CDEF filter strength calculated from strength index for given search
76
// method
77
#define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
78
94.4k
  do {                                                                       \
79
94.4k
    get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,   \
80
94.4k
                              (strength_idx));                               \
81
94.4k
    cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;        \
82
94.4k
  } while (0)
83
84
/* Search for the best strength to add as an option, knowing we
85
   already selected nb_strengths options. */
86
static uint64_t search_one(int *lev, int nb_strengths,
87
                           uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
88
189k
                           CDEF_PICK_METHOD pick_method) {
89
189k
  uint64_t tot_mse[TOTAL_STRENGTHS];
90
189k
  const int total_strengths = nb_cdef_strengths[pick_method];
91
189k
  int i, j;
92
189k
  uint64_t best_tot_mse = (uint64_t)1 << 63;
93
189k
  int best_id = 0;
94
189k
  memset(tot_mse, 0, sizeof(tot_mse));
95
627k
  for (i = 0; i < sb_count; i++) {
96
438k
    int gi;
97
438k
    uint64_t best_mse = (uint64_t)1 << 63;
98
    /* Find best mse among already selected options. */
99
876k
    for (gi = 0; gi < nb_strengths; gi++) {
100
438k
      if (mse[i][lev[gi]] < best_mse) {
101
277k
        best_mse = mse[i][lev[gi]];
102
277k
      }
103
438k
    }
104
    /* Find best mse when adding each possible new option. */
105
2.19M
    for (j = 0; j < total_strengths; j++) {
106
1.75M
      uint64_t best = best_mse;
107
1.75M
      if (mse[i][j] < best) best = mse[i][j];
108
1.75M
      tot_mse[j] += best;
109
1.75M
    }
110
438k
  }
111
946k
  for (j = 0; j < total_strengths; j++) {
112
757k
    if (tot_mse[j] < best_tot_mse) {
113
334k
      best_tot_mse = tot_mse[j];
114
334k
      best_id = j;
115
334k
    }
116
757k
  }
117
189k
  lev[nb_strengths] = best_id;
118
189k
  return best_tot_mse;
119
189k
}
120
121
/* Search for the best luma+chroma strength to add as an option, knowing we
122
   already selected nb_strengths options. */
123
static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
124
                                uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
125
1.33M
                                CDEF_PICK_METHOD pick_method) {
126
1.33M
  uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
127
1.33M
  int i, j;
128
1.33M
  uint64_t best_tot_mse = (uint64_t)1 << 63;
129
1.33M
  int best_id0 = 0;
130
1.33M
  int best_id1 = 0;
131
1.33M
  const int total_strengths = nb_cdef_strengths[pick_method];
132
1.33M
  memset(tot_mse, 0, sizeof(tot_mse));
133
4.36M
  for (i = 0; i < sb_count; i++) {
134
3.03M
    int gi;
135
3.03M
    uint64_t best_mse = (uint64_t)1 << 63;
136
    /* Find best mse among already selected options. */
137
15.7M
    for (gi = 0; gi < nb_strengths; gi++) {
138
12.7M
      uint64_t curr = mse[0][i][lev0[gi]];
139
12.7M
      curr += mse[1][i][lev1[gi]];
140
12.7M
      if (curr < best_mse) {
141
4.22M
        best_mse = curr;
142
4.22M
      }
143
12.7M
    }
144
    /* Find best mse when adding each possible new option. */
145
15.1M
    for (j = 0; j < total_strengths; j++) {
146
12.1M
      int k;
147
60.6M
      for (k = 0; k < total_strengths; k++) {
148
48.5M
        uint64_t best = best_mse;
149
48.5M
        uint64_t curr = mse[0][i][j];
150
48.5M
        curr += mse[1][i][k];
151
48.5M
        if (curr < best) best = curr;
152
48.5M
        tot_mse[j][k] += best;
153
48.5M
      }
154
12.1M
    }
155
3.03M
  }
156
6.67M
  for (j = 0; j < total_strengths; j++) {
157
5.33M
    int k;
158
26.6M
    for (k = 0; k < total_strengths; k++) {
159
21.3M
      if (tot_mse[j][k] < best_tot_mse) {
160
3.01M
        best_tot_mse = tot_mse[j][k];
161
3.01M
        best_id0 = j;
162
3.01M
        best_id1 = k;
163
3.01M
      }
164
21.3M
    }
165
5.33M
  }
166
1.33M
  lev0[nb_strengths] = best_id0;
167
1.33M
  lev1[nb_strengths] = best_id1;
168
1.33M
  return best_tot_mse;
169
1.33M
}
170
171
/* Search for the set of strengths that minimizes mse. */
172
static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
173
                                      uint64_t mse[][TOTAL_STRENGTHS],
174
                                      int sb_count,
175
81.1k
                                      CDEF_PICK_METHOD pick_method) {
176
81.1k
  uint64_t best_tot_mse;
177
81.1k
  int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
178
81.1k
              pick_method <= CDEF_FAST_SEARCH_LVL5);
179
81.1k
  int i;
180
81.1k
  best_tot_mse = (uint64_t)1 << 63;
181
  /* Greedy search: add one strength options at a time. */
182
270k
  for (i = 0; i < nb_strengths; i++) {
183
189k
    best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
184
189k
  }
185
  /* Trying to refine the greedy search by reconsidering each
186
     already-selected option. */
187
81.1k
  if (!fast) {
188
0
    for (i = 0; i < 4 * nb_strengths; i++) {
189
0
      int j;
190
0
      for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
191
0
      best_tot_mse =
192
0
          search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
193
0
    }
194
0
  }
195
81.1k
  return best_tot_mse;
196
81.1k
}
197
198
/* Search for the set of luma+chroma strengths that minimizes mse. */
199
static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
200
                                           int nb_strengths,
201
                                           uint64_t (**mse)[TOTAL_STRENGTHS],
202
                                           int sb_count,
203
71.1k
                                           CDEF_PICK_METHOD pick_method) {
204
71.1k
  uint64_t best_tot_mse;
205
71.1k
  int i;
206
71.1k
  best_tot_mse = (uint64_t)1 << 63;
207
  /* Greedy search: add one strength options at a time. */
208
338k
  for (i = 0; i < nb_strengths; i++) {
209
266k
    best_tot_mse =
210
266k
        search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
211
266k
  }
212
  /* Trying to refine the greedy search by reconsidering each
213
     already-selected option. */
214
1.13M
  for (i = 0; i < 4 * nb_strengths; i++) {
215
1.06M
    int j;
216
6.04M
    for (j = 0; j < nb_strengths - 1; j++) {
217
4.98M
      best_lev0[j] = best_lev0[j + 1];
218
4.98M
      best_lev1[j] = best_lev1[j + 1];
219
4.98M
    }
220
1.06M
    best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
221
1.06M
                                   sb_count, pick_method);
222
1.06M
  }
223
71.1k
  return best_tot_mse;
224
71.1k
}
225
226
static inline void init_src_params(int *src_stride, int *width, int *height,
227
                                   int *width_log2, int *height_log2,
228
171k
                                   BLOCK_SIZE bsize) {
229
171k
  *src_stride = block_size_wide[bsize];
230
171k
  *width = block_size_wide[bsize];
231
171k
  *height = block_size_high[bsize];
232
171k
  *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
233
171k
  *height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize];
234
171k
}
235
#if CONFIG_AV1_HIGHBITDEPTH
236
/* Compute MSE only on the blocks we filtered. */
237
static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
238
                                         cdef_list *dlist, int cdef_count,
239
                                         BLOCK_SIZE bsize, int coeff_shift,
240
171k
                                         int row, int col) {
241
171k
  assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
242
171k
         bsize == BLOCK_8X8);
243
171k
  uint64_t sum = 0;
244
171k
  int bi, bx, by;
245
171k
  uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
246
171k
  uint16_t *dst_buff = &dst16[row * dstride + col];
247
171k
  int src_stride, width, height, width_log2, height_log2;
248
171k
  init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
249
171k
                  bsize);
250
2.32M
  for (bi = 0; bi < cdef_count; bi++) {
251
2.15M
    by = dlist[bi].by;
252
2.15M
    bx = dlist[bi].bx;
253
2.15M
    sum += aom_mse_wxh_16bit_highbd(
254
2.15M
        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
255
2.15M
        &src[bi << (height_log2 + width_log2)], src_stride, width, height);
256
2.15M
  }
257
171k
  return sum >> 2 * coeff_shift;
258
171k
}
259
#endif
260
261
// Checks dual and quad block processing is applicable for block widths 8 and 4
262
// respectively.
263
static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width,
264
0
                                             int cdef_count, int bi, int iter) {
265
0
  assert(width == 8 || width == 4);
266
0
  const int blk_offset = (width == 8) ? 1 : 3;
267
0
  if ((iter + blk_offset) >= cdef_count) return 0;
268
269
0
  if (dlist[bi].by == dlist[bi + blk_offset].by &&
270
0
      dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
271
0
    return 1;
272
273
0
  return 0;
274
0
}
275
276
static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
277
                                  cdef_list *dlist, int cdef_count,
278
                                  BLOCK_SIZE bsize, int coeff_shift, int row,
279
0
                                  int col) {
280
0
  assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
281
0
         bsize == BLOCK_8X8);
282
0
  uint64_t sum = 0;
283
0
  int bi, bx, by;
284
0
  int iter = 0;
285
0
  int inc = 1;
286
0
  uint8_t *dst8 = (uint8_t *)dst;
287
0
  uint8_t *dst_buff = &dst8[row * dstride + col];
288
0
  int src_stride, width, height, width_log2, height_log2;
289
0
  init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
290
0
                  bsize);
291
292
0
  const int num_blks = 16 / width;
293
0
  for (bi = 0; bi < cdef_count; bi += inc) {
294
0
    by = dlist[bi].by;
295
0
    bx = dlist[bi].bx;
296
0
    uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
297
0
    uint8_t *dst_tmp =
298
0
        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
299
300
0
    if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
301
0
      sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
302
0
      iter += num_blks;
303
0
      inc = num_blks;
304
0
    } else {
305
0
      sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
306
0
                               height);
307
0
      iter += 1;
308
0
      inc = 1;
309
0
    }
310
0
  }
311
312
0
  return sum >> 2 * coeff_shift;
313
0
}
314
315
// Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
316
// region is outside frame boundary
317
static inline void fill_borders_for_fbs_on_frame_boundary(
318
    uint16_t *inbuf, int hfilt_size, int vfilt_size,
319
    bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
320
182k
    bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
321
182k
  if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
322
182k
      !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
323
5.98k
    return;
324
176k
  if (is_fb_on_frm_bottom_boundary) {
325
    // Fill bottom region of the block
326
121k
    const int buf_offset =
327
121k
        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
328
121k
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
329
121k
              CDEF_VERY_LARGE);
330
121k
  }
331
176k
  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
332
161k
    const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
333
    // Fill bottom-left region of the block
334
161k
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
335
161k
              CDEF_VERY_LARGE);
336
161k
  }
337
176k
  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
338
161k
    const int buf_offset =
339
161k
        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
340
    // Fill bottom-right region of the block
341
161k
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
342
161k
              CDEF_VERY_LARGE);
343
161k
  }
344
176k
  if (is_fb_on_frm_top_boundary) {
345
    // Fill top region of the block
346
121k
    fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
347
121k
              CDEF_VERY_LARGE);
348
121k
  }
349
176k
  if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
350
    // Fill top-left region of the block
351
161k
    fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
352
161k
  }
353
176k
  if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
354
161k
    const int buf_offset = hfilt_size + CDEF_HBORDER;
355
    // Fill top-right region of the block
356
161k
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
357
161k
              CDEF_VERY_LARGE);
358
161k
  }
359
176k
  if (is_fb_on_frm_left_boundary) {
360
119k
    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
361
    // Fill left region of the block
362
119k
    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
363
119k
              CDEF_VERY_LARGE);
364
119k
  }
365
176k
  if (is_fb_on_frm_right_boundary) {
366
119k
    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
367
    // Fill right region of the block
368
119k
    fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
369
119k
              vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
370
119k
  }
371
176k
}
372
373
// Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
374
// after CDEF filtering in single function call
375
static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
376
    cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
377
3.18M
    int subsampling_y) {
378
  // TODO(Ranjit): Extend the optimization for 422
379
3.18M
  if (subsampling_x != subsampling_y) return 1;
380
381
  // Combining more blocks seems to increase encode time due to increase in
382
  // control code
383
2.29M
  if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
384
2.29M
      dlist[bi].bx + 3 == dlist[bi + 3].bx) {
385
    /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
386
     * logic if y co-ordinates match and x co-ordinates are
387
     * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
388
820k
    return 4;
389
820k
  }
390
1.47M
  if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
391
1.47M
      dlist[bi].bx + 1 == dlist[bi + 1].bx) {
392
    /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
393
     * logic if their y co-ordinates match and x co-ordinates are
394
     * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
395
415k
    return 2;
396
415k
  }
397
1.06M
  return 1;
398
1.47M
}
399
400
// Returns the block error after CDEF filtering for a given strength
401
static inline uint64_t get_filt_error(
402
    const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
403
    cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
404
    int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
405
    int ref_stride, int row, int col, int pri_strength, int sec_strength,
406
732k
    int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
407
732k
  uint64_t curr_sse = 0;
408
732k
  const BLOCK_SIZE plane_bsize =
409
732k
      get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
410
732k
  const int bw_log2 = 3 - pd->subsampling_x;
411
732k
  const int bh_log2 = 3 - pd->subsampling_y;
412
413
  // TODO(Ranjit): Extend this optimization for HBD
414
732k
  if (!cdef_search_ctx->use_highbitdepth) {
415
    // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
416
    // error at CDEF block level
417
560k
    const int tot_blk_count =
418
560k
        (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
419
560k
        (bw_log2 + bh_log2);
420
560k
    if (cdef_count == tot_blk_count) {
421
      // Calculate the offset in the buffer based on block position
422
102k
      const FULLPEL_MV this_mv = { row, col };
423
102k
      const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
424
102k
      if (pri_strength == 0 && sec_strength == 0) {
425
        // When CDEF strength is zero, filtering is not applied. Hence
426
        // error is calculated between source and unfiltered pixels
427
25.6k
        curr_sse =
428
25.6k
            aom_sse(&ref_buffer[buf_offset], ref_stride,
429
25.6k
                    get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
430
25.6k
                    block_size_wide[plane_bsize], block_size_high[plane_bsize]);
431
77.0k
      } else {
432
77.0k
        DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
433
434
77.0k
        av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
435
77.0k
                           cdef_search_ctx->xdec[pli],
436
77.0k
                           cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
437
77.0k
                           dlist, cdef_count, pri_strength,
438
77.0k
                           sec_strength + (sec_strength == 3),
439
77.0k
                           cdef_search_ctx->damping, coeff_shift);
440
77.0k
        curr_sse =
441
77.0k
            aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
442
77.0k
                    (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
443
77.0k
                    block_size_high[plane_bsize]);
444
77.0k
      }
445
458k
    } else {
446
      // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
447
      // functions produce 8-bit output and the error is calculated in 8-bit
448
      // domain
449
458k
      if (pri_strength == 0 && sec_strength == 0) {
450
114k
        int num_error_calc_filt_units = 1;
451
912k
        for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
452
797k
          const uint8_t by = dlist[bi].by;
453
797k
          const uint8_t bx = dlist[bi].bx;
454
797k
          const int16_t by_pos = (by << bh_log2);
455
797k
          const int16_t bx_pos = (bx << bw_log2);
456
          // Calculate the offset in the buffer based on block position
457
797k
          const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
458
797k
          const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
459
797k
          num_error_calc_filt_units = get_error_calc_width_in_filt_units(
460
797k
              dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
461
797k
          curr_sse += aom_sse(
462
797k
              &ref_buffer[buf_offset], ref_stride,
463
797k
              get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
464
797k
              num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
465
797k
        }
466
343k
      } else {
467
343k
        DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
468
343k
        av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
469
343k
                           cdef_search_ctx->xdec[pli],
470
343k
                           cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
471
343k
                           dlist, cdef_count, pri_strength,
472
343k
                           sec_strength + (sec_strength == 3),
473
343k
                           cdef_search_ctx->damping, coeff_shift);
474
343k
        int num_error_calc_filt_units = 1;
475
2.74M
        for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
476
2.40M
          const uint8_t by = dlist[bi].by;
477
2.40M
          const uint8_t bx = dlist[bi].bx;
478
2.40M
          const int16_t by_pos = (by << bh_log2);
479
2.40M
          const int16_t bx_pos = (bx << bw_log2);
480
          // Calculate the offset in the buffer based on block position
481
2.40M
          const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
482
2.40M
          const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
483
2.40M
          const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
484
2.40M
          const int tmp_buf_offset =
485
2.40M
              get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
486
2.40M
          num_error_calc_filt_units = get_error_calc_width_in_filt_units(
487
2.40M
              dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
488
2.40M
          curr_sse += aom_sse(
489
2.40M
              &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
490
2.40M
              (1 << MAX_SB_SIZE_LOG2),
491
2.40M
              num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
492
2.40M
        }
493
343k
      }
494
458k
    }
495
560k
  } else {
496
172k
    DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
497
498
172k
    av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
499
172k
                       cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
500
172k
                       dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
501
172k
                       sec_strength + (sec_strength == 3),
502
172k
                       cdef_search_ctx->damping, coeff_shift);
503
172k
    curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
504
172k
        ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
505
172k
        cdef_search_ctx->bsize[pli], coeff_shift, row, col);
506
172k
  }
507
732k
  return curr_sse;
508
732k
}
509
510
// Calculates MSE at block level.
511
// Inputs:
512
//   cdef_search_ctx: Pointer to the structure containing parameters related to
513
//   CDEF search context.
514
//   fbr: Row index in units of 64x64 block
515
//   fbc: Column index in units of 64x64 block
516
// Returns:
517
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
518
void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
519
                             struct aom_internal_error_info *error_info,
520
102k
                             int fbr, int fbc, int sb_count) {
521
  // TODO(aomedia:3276): Pass error_info to the low-level functions as required
522
  // in future to handle error propagation.
523
102k
  (void)error_info;
524
102k
  const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
525
102k
  const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
526
102k
  const int coeff_shift = cdef_search_ctx->coeff_shift;
527
102k
  const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
528
102k
  const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
529
530
  // Declare and initialize the temporary buffers.
531
102k
  DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
532
102k
  cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
533
102k
  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
534
102k
  int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
535
102k
  uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
536
102k
  int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
537
102k
  int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
538
102k
  int hb_step = 1, vb_step = 1;
539
102k
  BLOCK_SIZE bs;
540
541
102k
  const MB_MODE_INFO *const mbmi =
542
102k
      mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
543
102k
                              MI_SIZE_64X64 * fbc];
544
545
102k
  uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
546
102k
                                        ref->v_buffer };
547
102k
  int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
548
102k
                                   ref->uv_stride };
549
550
102k
  if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
551
102k
      mbmi->bsize == BLOCK_64X128) {
552
0
    bs = mbmi->bsize;
553
0
    if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
554
0
      nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
555
0
      hb_step = 2;
556
0
    }
557
0
    if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
558
0
      nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
559
0
      vb_step = 2;
560
0
    }
561
102k
  } else {
562
102k
    bs = BLOCK_64X64;
563
102k
  }
564
  // Get number of 8x8 blocks which are not skip. Cdef processing happens for
565
  // 8x8 blocks which are not skip.
566
102k
  const int cdef_count = av1_cdef_compute_sb_list(
567
102k
      mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
568
102k
  const bool is_fb_on_frm_left_boundary = (fbc == 0);
569
102k
  const bool is_fb_on_frm_right_boundary =
570
102k
      (fbc + hb_step == cdef_search_ctx->nhfb);
571
102k
  const bool is_fb_on_frm_top_boundary = (fbr == 0);
572
102k
  const bool is_fb_on_frm_bottom_boundary =
573
102k
      (fbr + vb_step == cdef_search_ctx->nvfb);
574
102k
  const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
575
102k
  const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
576
102k
  int dirinit = 0;
577
286k
  for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
578
    /* We avoid filtering the pixels for which some of the pixels to
579
    average are outside the frame. We could change the filter instead,
580
    but it would add special cases for any future vectorization. */
581
183k
    const int hfilt_size = (nhb << mi_wide_l2[pli]);
582
183k
    const int vfilt_size = (nvb << mi_high_l2[pli]);
583
183k
    const int ysize =
584
183k
        vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
585
183k
    const int xsize =
586
183k
        hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
587
183k
    const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
588
183k
    const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
589
183k
    struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
590
183k
    cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
591
183k
                             pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
592
183k
                             ysize, xsize);
593
183k
    fill_borders_for_fbs_on_frame_boundary(
594
183k
        inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
595
183k
        is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
596
183k
        is_fb_on_frm_bottom_boundary);
597
916k
    for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
598
732k
      int pri_strength, sec_strength;
599
732k
      get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
600
732k
                                &sec_strength, gi);
601
732k
      const uint64_t curr_mse = get_filt_error(
602
732k
          cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
603
732k
          ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
604
732k
          pli, coeff_shift, bs);
605
732k
      if (pli < 2)
606
572k
        cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
607
160k
      else
608
160k
        cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
609
732k
    }
610
183k
  }
611
102k
  cdef_search_ctx->sb_index[sb_count] =
612
102k
      MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
613
102k
}
614
615
// MSE calculation at frame level.
616
// Inputs:
617
//   cdef_search_ctx: Pointer to the structure containing parameters related to
618
//   CDEF search context.
619
// Returns:
620
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
621
static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
622
18.0k
                                struct aom_internal_error_info *error_info) {
623
  // Loop over each sb.
624
40.0k
  for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
625
53.8k
    for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
626
      // Checks if cdef processing can be skipped for particular sb.
627
31.8k
      if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
628
      // Calculate mse for each sb and store the relevant sb index.
629
31.4k
      av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
630
31.4k
                              cdef_search_ctx->sb_count);
631
31.4k
      cdef_search_ctx->sb_count++;
632
31.4k
    }
633
22.0k
  }
634
18.0k
}
635
636
// Allocates memory for members of CdefSearchCtx.
637
// Inputs:
638
//   cdef_search_ctx: Pointer to the structure containing parameters
639
//   related to CDEF search context.
640
// Returns:
641
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
642
44.8k
static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) {
643
44.8k
  const int nvfb = cdef_search_ctx->nvfb;
644
44.8k
  const int nhfb = cdef_search_ctx->nhfb;
645
44.8k
  CHECK_MEM_ERROR(
646
44.8k
      cm, cdef_search_ctx->sb_index,
647
44.8k
      aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0])));
648
44.8k
  cdef_search_ctx->sb_count = 0;
649
44.8k
  CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0],
650
44.8k
                  aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
651
44.8k
  CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1],
652
44.8k
                  aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
653
44.8k
}
654
655
// Deallocates the memory allocated for members of CdefSearchCtx.
656
// Inputs:
657
//   cdef_search_ctx: Pointer to the structure containing parameters
658
//   related to CDEF search context.
659
// Returns:
660
//   Nothing will be returned.
661
125k
void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
662
125k
  if (cdef_search_ctx) {
663
60.7k
    aom_free(cdef_search_ctx->mse[0]);
664
60.7k
    cdef_search_ctx->mse[0] = NULL;
665
60.7k
    aom_free(cdef_search_ctx->mse[1]);
666
60.7k
    cdef_search_ctx->mse[1] = NULL;
667
60.7k
    aom_free(cdef_search_ctx->sb_index);
668
60.7k
    cdef_search_ctx->sb_index = NULL;
669
60.7k
  }
670
125k
}
671
672
// Initialize the parameters related to CDEF search context.
673
// Inputs:
674
//   frame: Pointer to compressed frame buffer
675
//   ref: Pointer to the frame buffer holding the source frame
676
//   cm: Pointer to top level common structure
677
//   xd: Pointer to common current coding block structure
678
//   cdef_search_ctx: Pointer to the structure containing parameters related to
679
//   CDEF search context.
680
//   pick_method: Search method used to select CDEF parameters
681
// Returns:
682
//   Nothing will be returned. Contents of cdef_search_ctx will be modified.
683
static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
684
                                    const YV12_BUFFER_CONFIG *ref,
685
                                    AV1_COMMON *cm, MACROBLOCKD *xd,
686
                                    CdefSearchCtx *cdef_search_ctx,
687
44.8k
                                    CDEF_PICK_METHOD pick_method) {
688
44.8k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
689
44.8k
  const int num_planes = av1_num_planes(cm);
690
44.8k
  cdef_search_ctx->mi_params = &cm->mi_params;
691
44.8k
  cdef_search_ctx->ref = ref;
692
44.8k
  cdef_search_ctx->nvfb =
693
44.8k
      (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
694
44.8k
  cdef_search_ctx->nhfb =
695
44.8k
      (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
696
44.8k
  cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
697
44.8k
  cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
698
44.8k
  cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
699
44.8k
  cdef_search_ctx->num_planes = num_planes;
700
44.8k
  cdef_search_ctx->pick_method = pick_method;
701
44.8k
  cdef_search_ctx->sb_count = 0;
702
44.8k
  cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
703
44.8k
  av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
704
44.8k
                       num_planes);
705
  // Initialize plane wise information.
706
125k
  for (int pli = 0; pli < num_planes; pli++) {
707
80.4k
    cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
708
80.4k
    cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
709
80.4k
    cdef_search_ctx->bsize[pli] =
710
80.4k
        cdef_search_ctx->ydec[pli]
711
80.4k
            ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
712
80.4k
            : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
713
80.4k
    cdef_search_ctx->mi_wide_l2[pli] =
714
80.4k
        MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
715
80.4k
    cdef_search_ctx->mi_high_l2[pli] =
716
80.4k
        MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
717
80.4k
    cdef_search_ctx->plane[pli] = xd->plane[pli];
718
80.4k
  }
719
  // Function pointer initialization.
720
44.8k
#if CONFIG_AV1_HIGHBITDEPTH
721
44.8k
  if (cm->seq_params->use_highbitdepth) {
722
13.8k
    cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
723
13.8k
    cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
724
31.0k
  } else {
725
31.0k
    cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
726
31.0k
    cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
727
31.0k
  }
728
#else
729
  cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
730
  cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
731
#endif
732
44.8k
}
733
734
void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
735
22.2k
                           int is_screen_content) {
736
22.2k
  const int bd = cm->seq_params->bit_depth;
737
22.2k
  const int q =
738
22.2k
      av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
739
22.2k
  CdefInfo *const cdef_info = &cm->cdef_info;
740
  // Check the speed feature to avoid extra signaling.
741
22.2k
  if (skip_cdef) {
742
0
    cdef_info->cdef_bits = 1;
743
0
    cdef_info->nb_cdef_strengths = 2;
744
22.2k
  } else {
745
22.2k
    cdef_info->cdef_bits = 0;
746
22.2k
    cdef_info->nb_cdef_strengths = 1;
747
22.2k
  }
748
22.2k
  cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
749
750
22.2k
  int predicted_y_f1 = 0;
751
22.2k
  int predicted_y_f2 = 0;
752
22.2k
  int predicted_uv_f1 = 0;
753
22.2k
  int predicted_uv_f2 = 0;
754
22.2k
  if (is_screen_content) {
755
0
    predicted_y_f1 =
756
0
        (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
757
0
    predicted_y_f2 =
758
0
        (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
759
0
    predicted_uv_f1 =
760
0
        (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
761
0
    predicted_uv_f2 =
762
0
        (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
763
0
    predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
764
0
    predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
765
0
    predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
766
0
    predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
767
22.2k
  } else {
768
22.2k
    if (!frame_is_intra_only(cm)) {
769
10.7k
      predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
770
10.7k
                                         q * 0.0068615186f + 0.02709886f),
771
10.7k
                             0, 15);
772
10.7k
      predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
773
10.7k
                                         q * 0.0013993345f + 0.03831067f),
774
10.7k
                             0, 3);
775
10.7k
      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
776
10.7k
                                          q * 0.0034628846f + 0.00887099f),
777
10.7k
                              0, 15);
778
10.7k
      predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
779
10.7k
                                          q * 0.00028223585f + 0.05576307f),
780
10.7k
                              0, 3);
781
11.4k
    } else {
782
11.4k
      predicted_y_f1 = clamp(
783
11.4k
          (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
784
11.4k
          0, 15);
785
11.4k
      predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
786
11.4k
                                         q * 0.0027798624f + 0.0079405f),
787
11.4k
                             0, 3);
788
11.4k
      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
789
11.4k
                                          q * 0.012892405f - 0.00748388f),
790
11.4k
                              0, 15);
791
11.4k
      predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
792
11.4k
                                          q * 0.00035520183f + 0.00228092f),
793
11.4k
                              0, 3);
794
11.4k
    }
795
22.2k
  }
796
22.2k
  cdef_info->cdef_strengths[0] =
797
22.2k
      predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
798
22.2k
  cdef_info->cdef_uv_strengths[0] =
799
22.2k
      predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
800
801
  // mbmi->cdef_strength is already set in the encoding stage. We don't need to
802
  // set it again here.
803
22.2k
  if (skip_cdef) {
804
0
    cdef_info->cdef_strengths[1] = 0;
805
0
    cdef_info->cdef_uv_strengths[1] = 0;
806
0
    return;
807
0
  }
808
809
22.2k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
810
22.2k
  const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
811
22.2k
  const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
812
22.2k
  MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
813
  // mbmi is NULL when real-time rate control library is used.
814
22.2k
  if (!mbmi) return;
815
56.9k
  for (int r = 0; r < nvfb; ++r) {
816
92.0k
    for (int c = 0; c < nhfb; ++c) {
817
57.3k
      MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
818
57.3k
      current_mbmi->cdef_strength = 0;
819
57.3k
    }
820
34.7k
    mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
821
34.7k
  }
822
22.2k
}
823
824
67.0k
void av1_cdef_search(AV1_COMP *cpi) {
825
67.0k
  AV1_COMMON *cm = &cpi->common;
826
67.0k
  CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
827
828
67.0k
  assert(cdef_control != CDEF_NONE);
829
  // For CDEF_ADAPTIVE, turning off CDEF around qindex 32 was best for still
830
  // pictures
831
67.0k
  if ((cdef_control == CDEF_REFERENCE &&
832
67.0k
       cpi->ppi->rtc_ref.non_reference_frame) ||
833
67.0k
      (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
834
67.0k
       (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
835
67.0k
       cpi->oxcf.rc_cfg.cq_level <= 32)) {
836
0
    CdefInfo *const cdef_info = &cm->cdef_info;
837
0
    cdef_info->nb_cdef_strengths = 1;
838
0
    cdef_info->cdef_bits = 0;
839
0
    cdef_info->cdef_strengths[0] = 0;
840
0
    cdef_info->cdef_uv_strengths[0] = 0;
841
0
    return;
842
0
  }
843
844
  // Indicate if external RC is used for testing
845
67.0k
  const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl;
846
67.0k
  if (rtc_ext_rc) {
847
0
    av1_pick_cdef_from_qp(cm, 0, 0);
848
0
    return;
849
0
  }
850
67.0k
  CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method;
851
67.0k
  if (pick_method == CDEF_PICK_FROM_Q) {
852
22.2k
    const int use_screen_content_model =
853
22.2k
        cm->quant_params.base_qindex >
854
22.2k
            AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
855
22.2k
                   cpi->rc.best_quality + 5) &&
856
22.2k
        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
857
22.2k
    av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb,
858
22.2k
                          use_screen_content_model);
859
22.2k
    return;
860
22.2k
  }
861
44.8k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
862
44.8k
  const int damping = 3 + (cm->quant_params.base_qindex >> 6);
863
44.8k
  const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
864
44.8k
                    pick_method <= CDEF_FAST_SEARCH_LVL5);
865
44.8k
  const int num_planes = av1_num_planes(cm);
866
44.8k
  MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
867
868
44.8k
  if (!cpi->cdef_search_ctx)
869
44.8k
    CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx,
870
44.8k
                    aom_malloc(sizeof(*cpi->cdef_search_ctx)));
871
44.8k
  CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx;
872
873
  // Initialize parameters related to CDEF search context.
874
44.8k
  cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx,
875
44.8k
                   pick_method);
876
  // Allocate CDEF search context buffers.
877
44.8k
  cdef_alloc_data(cm, cdef_search_ctx);
878
  // Frame level mse calculation.
879
44.8k
  if (cpi->mt_info.num_workers > 1) {
880
26.7k
    av1_cdef_mse_calc_frame_mt(cpi);
881
26.7k
  } else {
882
18.0k
    cdef_mse_calc_frame(cdef_search_ctx, cm->error);
883
18.0k
  }
884
885
  /* Search for different number of signaling bits. */
886
44.8k
  int nb_strength_bits = 0;
887
44.8k
  uint64_t best_rd = UINT64_MAX;
888
44.8k
  CdefInfo *const cdef_info = &cm->cdef_info;
889
44.8k
  int sb_count = cdef_search_ctx->sb_count;
890
44.8k
  uint64_t(*mse[2])[TOTAL_STRENGTHS];
891
44.8k
  mse[0] = cdef_search_ctx->mse[0];
892
44.8k
  mse[1] = cdef_search_ctx->mse[1];
893
  /* Calculate the maximum number of bits required to signal CDEF strengths at
894
   * block level */
895
44.8k
  const int total_strengths = nb_cdef_strengths[pick_method];
896
44.8k
  const int joint_strengths =
897
44.8k
      num_planes > 1 ? total_strengths * total_strengths : total_strengths;
898
44.8k
  const int max_signaling_bits =
899
44.8k
      joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
900
44.8k
  int rdmult = cpi->td.mb.rdmult;
901
197k
  for (int i = 0; i <= 3; i++) {
902
179k
    if (i > max_signaling_bits) break;
903
152k
    int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
904
152k
    int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
905
152k
    const int nb_strengths = 1 << i;
906
152k
    uint64_t tot_mse;
907
152k
    if (num_planes > 1) {
908
71.1k
      tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
909
71.1k
                                           mse, sb_count, pick_method);
910
81.1k
    } else {
911
81.1k
      tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
912
81.1k
                                      pick_method);
913
81.1k
    }
914
915
152k
    const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
916
152k
                                              (num_planes > 1 ? 2 : 1);
917
152k
    const int rate_cost = av1_cost_literal(total_bits);
918
152k
    const uint64_t dist = tot_mse * 16;
919
152k
    const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
920
152k
    if (rd < best_rd) {
921
47.0k
      best_rd = rd;
922
47.0k
      nb_strength_bits = i;
923
47.0k
      memcpy(cdef_info->cdef_strengths, best_lev0,
924
47.0k
             nb_strengths * sizeof(best_lev0[0]));
925
47.0k
      if (num_planes > 1) {
926
18.6k
        memcpy(cdef_info->cdef_uv_strengths, best_lev1,
927
18.6k
               nb_strengths * sizeof(best_lev1[0]));
928
18.6k
      }
929
47.0k
    }
930
152k
  }
931
932
44.8k
  cdef_info->cdef_bits = nb_strength_bits;
933
44.8k
  cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
934
147k
  for (int i = 0; i < sb_count; i++) {
935
103k
    uint64_t best_mse = UINT64_MAX;
936
103k
    int best_gi = 0;
937
223k
    for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
938
120k
      uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
939
120k
      if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
940
120k
      if (curr < best_mse) {
941
109k
        best_gi = gi;
942
109k
        best_mse = curr;
943
109k
      }
944
120k
    }
945
103k
    mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
946
103k
        best_gi;
947
103k
  }
948
44.8k
  if (fast) {
949
92.0k
    for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
950
47.2k
      const int luma_strength = cdef_info->cdef_strengths[j];
951
47.2k
      const int chroma_strength = cdef_info->cdef_uv_strengths[j];
952
47.2k
      int pri_strength, sec_strength;
953
954
47.2k
      STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
955
47.2k
                                 luma_strength);
956
47.2k
      STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
957
47.2k
                                 chroma_strength);
958
47.2k
    }
959
44.8k
  }
960
961
  // For CDEF_ADAPTIVE, set primary and secondary CDEF at reduced strength for
962
  // qindexes 33 through 220.
963
  // Note 1: for odd strengths, the 0.5 discarded by ">> 1" is a significant
964
  // part of the strength when the strength is small, and because there are
965
  // few strength levels, odd strengths are reduced significantly more than a
966
  // half. This is intended behavior for reduced strength.
967
  // For example: a pri strength of 3 becomes 1, and a sec strength of 1
968
  // becomes 0.
969
  // Note 2: a (signaled) sec strength value of 3 is special as it results in an
970
  // actual sec strength of 4. We tried adding +1 to the sec strength 3 so it
971
  // maps to a reduced sec strength of 2. However, on Daala's subset1, the
972
  // resulting SSIMULACRA 2 scores were either exactly the same (at cpu-used 6),
973
  // or within noise level (at cpu-used 3). Given that there were no discernible
974
  // improvements, this special mapping was left out for reduced strength.
975
44.8k
  if (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
976
44.8k
      (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
977
44.8k
      cpi->oxcf.rc_cfg.cq_level <= 220) {
978
0
    for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
979
0
      const int luma_strength = cdef_info->cdef_strengths[j];
980
0
      const int chroma_strength = cdef_info->cdef_uv_strengths[j];
981
982
0
      const int new_pri_luma_strength =
983
0
          (luma_strength / CDEF_SEC_STRENGTHS) >> 1;
984
0
      const int new_sec_luma_strength =
985
0
          (luma_strength % CDEF_SEC_STRENGTHS) >> 1;
986
0
      const int new_pri_chroma_strength =
987
0
          (chroma_strength / CDEF_SEC_STRENGTHS) >> 1;
988
0
      const int new_sec_chroma_strength =
989
0
          (chroma_strength % CDEF_SEC_STRENGTHS) >> 1;
990
991
0
      cdef_info->cdef_strengths[j] =
992
0
          new_pri_luma_strength * CDEF_SEC_STRENGTHS + new_sec_luma_strength;
993
0
      cdef_info->cdef_uv_strengths[j] =
994
0
          new_pri_chroma_strength * CDEF_SEC_STRENGTHS +
995
0
          new_sec_chroma_strength;
996
0
    }
997
0
  }
998
999
44.8k
  cdef_info->cdef_damping = damping;
1000
  // Deallocate CDEF search context buffers.
1001
44.8k
  av1_cdef_dealloc_data(cdef_search_ctx);
1002
44.8k
}