Coverage Report

Created: 2025-06-22 08:04

/src/aom/av1/encoder/allintra_vis.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
14
#include "config/aom_config.h"
15
16
#include "aom_util/aom_pthread.h"
17
18
#if CONFIG_TFLITE
19
#include "tensorflow/lite/c/c_api.h"
20
#include "av1/encoder/deltaq4_model.c"
21
#endif
22
23
#include "av1/common/common_data.h"
24
#include "av1/common/enums.h"
25
#include "av1/common/idct.h"
26
#include "av1/common/reconinter.h"
27
#include "av1/encoder/allintra_vis.h"
28
#include "av1/encoder/aq_variance.h"
29
#include "av1/encoder/encoder.h"
30
#include "av1/encoder/ethread.h"
31
#include "av1/encoder/hybrid_fwd_txfm.h"
32
#include "av1/encoder/model_rd.h"
33
#include "av1/encoder/rdopt_utils.h"
34
35
0
#define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128
36
0
#define MB_WIENER_PRED_BUF_STRIDE 128
37
38
// Maximum delta-q range allowed for Variance Boost after scaling
39
#define VAR_BOOST_MAX_DELTAQ_RANGE 80
40
// Maximum quantization step boost allowed for Variance Boost
41
0
#define VAR_BOOST_MAX_BOOST 8.0
42
43
0
void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) {
44
0
  const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd);
45
0
  assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL);
46
0
  const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE];
47
0
  const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE];
48
0
  assert(buf_width == MB_WIENER_PRED_BUF_STRIDE);
49
0
  const size_t buf_size =
50
0
      (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf))
51
0
      << is_high_bitdepth;
52
0
  CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size));
53
0
}
54
55
0
void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) {
56
0
  aom_free(td->wiener_tmp_pred_buf);
57
0
  td->wiener_tmp_pred_buf = NULL;
58
0
}
59
60
0
void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
61
0
  AV1_COMMON *cm = &cpi->common;
62
63
  // This block size is also used to determine number of workers in
64
  // multi-threading. If it is changed, one needs to change it accordingly in
65
  // "compute_num_ai_workers()".
66
0
  cpi->weber_bsize = BLOCK_8X8;
67
68
0
  if (cpi->oxcf.enable_rate_guide_deltaq) {
69
0
    if (cpi->mb_weber_stats && cpi->prep_rate_estimates &&
70
0
        cpi->ext_rate_distribution)
71
0
      return;
72
0
  } else {
73
0
    if (cpi->mb_weber_stats) return;
74
0
  }
75
76
0
  CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
77
0
                  aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
78
0
                             sizeof(*cpi->mb_weber_stats)));
79
80
0
  if (cpi->oxcf.enable_rate_guide_deltaq) {
81
0
    CHECK_MEM_ERROR(
82
0
        cm, cpi->prep_rate_estimates,
83
0
        aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
84
0
                   sizeof(*cpi->prep_rate_estimates)));
85
86
0
    CHECK_MEM_ERROR(
87
0
        cm, cpi->ext_rate_distribution,
88
0
        aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
89
0
                   sizeof(*cpi->ext_rate_distribution)));
90
0
  }
91
0
}
92
93
static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
94
0
                        int mi_col) {
95
0
  AV1_COMMON *const cm = &cpi->common;
96
0
  const int mi_wide = mi_size_wide[bsize];
97
0
  const int mi_high = mi_size_high[bsize];
98
99
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
100
0
  int mb_stride = cpi->frame_info.mi_cols;
101
0
  int mb_count = 0;
102
0
  int64_t satd = 0;
103
104
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
105
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
106
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
107
0
        continue;
108
109
0
      satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
110
0
                  .satd;
111
0
      ++mb_count;
112
0
    }
113
0
  }
114
115
0
  if (mb_count) satd = (int)(satd / mb_count);
116
0
  satd = AOMMAX(1, satd);
117
118
0
  return (int)satd;
119
0
}
120
121
static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
122
0
                       int mi_col) {
123
0
  AV1_COMMON *const cm = &cpi->common;
124
0
  const int mi_wide = mi_size_wide[bsize];
125
0
  const int mi_high = mi_size_high[bsize];
126
127
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
128
0
  int mb_stride = cpi->frame_info.mi_cols;
129
0
  int mb_count = 0;
130
0
  int64_t distortion = 0;
131
132
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
133
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
134
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
135
0
        continue;
136
137
0
      distortion +=
138
0
          cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
139
0
              .distortion;
140
0
      ++mb_count;
141
0
    }
142
0
  }
143
144
0
  if (mb_count) distortion = (int)(distortion / mb_count);
145
0
  distortion = AOMMAX(1, distortion);
146
147
0
  return (int)distortion;
148
0
}
149
150
static double get_max_scale(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
151
0
                            int mi_row, int mi_col) {
152
0
  const AV1_COMMON *const cm = &cpi->common;
153
0
  const int mi_wide = mi_size_wide[bsize];
154
0
  const int mi_high = mi_size_high[bsize];
155
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
156
0
  int mb_stride = cpi->frame_info.mi_cols;
157
0
  double min_max_scale = 10.0;
158
159
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
160
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
161
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
162
0
        continue;
163
0
      const WeberStats *weber_stats =
164
0
          &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
165
0
      if (weber_stats->max_scale < 1.0) continue;
166
0
      if (weber_stats->max_scale < min_max_scale)
167
0
        min_max_scale = weber_stats->max_scale;
168
0
    }
169
0
  }
170
0
  return min_max_scale;
171
0
}
172
173
static int get_window_wiener_var(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
174
0
                                 int mi_row, int mi_col) {
175
0
  const AV1_COMMON *const cm = &cpi->common;
176
0
  const int mi_wide = mi_size_wide[bsize];
177
0
  const int mi_high = mi_size_high[bsize];
178
179
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
180
0
  int sb_wiener_var = 0;
181
0
  int mb_stride = cpi->frame_info.mi_cols;
182
0
  int mb_count = 0;
183
0
  double base_num = 1;
184
0
  double base_den = 1;
185
0
  double base_reg = 1;
186
187
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
188
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
189
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
190
0
        continue;
191
192
0
      const WeberStats *weber_stats =
193
0
          &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
194
195
0
      base_num += ((double)weber_stats->distortion) *
196
0
                  sqrt((double)weber_stats->src_variance) *
197
0
                  weber_stats->rec_pix_max;
198
199
0
      base_den += fabs(
200
0
          weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
201
0
          weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
202
203
0
      base_reg += sqrt((double)weber_stats->distortion) *
204
0
                  sqrt((double)weber_stats->src_pix_max) * 0.1;
205
0
      ++mb_count;
206
0
    }
207
0
  }
208
209
0
  sb_wiener_var =
210
0
      (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
211
0
  sb_wiener_var = AOMMAX(1, sb_wiener_var);
212
213
0
  return (int)sb_wiener_var;
214
0
}
215
216
static int get_var_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
217
0
                                 int mi_row, int mi_col) {
218
0
  const AV1_COMMON *const cm = &cpi->common;
219
0
  const int mi_wide = mi_size_wide[bsize];
220
0
  const int mi_high = mi_size_high[bsize];
221
222
0
  int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
223
224
0
  if (mi_row >= (mi_high / 2)) {
225
0
    sb_wiener_var =
226
0
        AOMMIN(sb_wiener_var,
227
0
               get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
228
0
  }
229
0
  if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
230
0
    sb_wiener_var =
231
0
        AOMMIN(sb_wiener_var,
232
0
               get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
233
0
  }
234
0
  if (mi_col >= (mi_wide / 2)) {
235
0
    sb_wiener_var =
236
0
        AOMMIN(sb_wiener_var,
237
0
               get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
238
0
  }
239
0
  if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
240
0
    sb_wiener_var =
241
0
        AOMMIN(sb_wiener_var,
242
0
               get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
243
0
  }
244
245
0
  return sb_wiener_var;
246
0
}
247
248
0
static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
249
0
  const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
250
251
0
  assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
252
0
  int rate_cost = 1;
253
254
0
  for (int idx = 0; idx < eob; ++idx) {
255
0
    int abs_level = abs(qcoeff[scan_order->scan[idx]]);
256
0
    rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0);
257
0
  }
258
259
0
  return (rate_cost << AV1_PROB_COST_SHIFT);
260
0
}
261
262
void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
263
                                MACROBLOCKD *xd, const int mi_row,
264
                                int16_t *src_diff, tran_low_t *coeff,
265
                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
266
                                double *sum_rec_distortion,
267
0
                                double *sum_est_rate, uint8_t *pred_buffer) {
268
0
  AV1_COMMON *const cm = &cpi->common;
269
0
  uint8_t *buffer = cpi->source->y_buffer;
270
0
  int buf_stride = cpi->source->y_stride;
271
0
  MB_MODE_INFO mbmi;
272
0
  memset(&mbmi, 0, sizeof(mbmi));
273
0
  MB_MODE_INFO *mbmi_ptr = &mbmi;
274
0
  xd->mi = &mbmi_ptr;
275
0
  const BLOCK_SIZE bsize = cpi->weber_bsize;
276
0
  const TX_SIZE tx_size = max_txsize_lookup[bsize];
277
0
  const int block_size = tx_size_wide[tx_size];
278
0
  const int coeff_count = block_size * block_size;
279
0
  const int mb_step = mi_size_wide[bsize];
280
0
  const BitDepthInfo bd_info = get_bit_depth_info(xd);
281
0
  const MultiThreadInfo *const mt_info = &cpi->mt_info;
282
0
  const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
283
0
  AV1EncRowMultiThreadSync *const intra_row_mt_sync =
284
0
      &cpi->ppi->intra_row_mt_sync;
285
0
  const int mi_cols = cm->mi_params.mi_cols;
286
0
  const int mt_thread_id = mi_row / mb_step;
287
  // TODO(chengchen): test different unit step size
288
0
  const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE];
289
0
  const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step;
290
0
  int mt_unit_col = 0;
291
0
  const int is_high_bitdepth = is_cur_buf_hbd(xd);
292
293
0
  uint8_t *dst_buffer = pred_buffer;
294
0
  const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE;
295
296
0
  if (is_high_bitdepth) {
297
0
    uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer;
298
0
    dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16);
299
0
  }
300
301
0
  for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) {
302
0
    if (mi_col % mt_unit_step == 0) {
303
0
      intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id,
304
0
                                    mt_unit_col);
305
0
#if CONFIG_MULTITHREAD
306
0
      const int num_workers =
307
0
          AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
308
0
      if (num_workers > 1) {
309
0
        const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
310
0
        pthread_mutex_lock(enc_row_mt->mutex_);
311
0
        const bool exit = enc_row_mt->mb_wiener_mt_exit;
312
0
        pthread_mutex_unlock(enc_row_mt->mutex_);
313
        // Stop further processing in case any worker has encountered an error.
314
0
        if (exit) break;
315
0
      }
316
0
#endif
317
0
    }
318
319
0
    PREDICTION_MODE best_mode = DC_PRED;
320
0
    int best_intra_cost = INT_MAX;
321
0
    const int mi_width = mi_size_wide[bsize];
322
0
    const int mi_height = mi_size_high[bsize];
323
0
    set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
324
0
                          mi_row, mi_col);
325
0
    set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
326
0
                   AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows),
327
0
                   AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols));
328
0
    set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
329
0
                 av1_num_planes(cm));
330
0
    xd->mi[0]->bsize = bsize;
331
0
    xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
332
    // Set above and left mbmi to NULL as they are not available in the
333
    // preprocessing stage.
334
    // They are used to detemine intra edge filter types in intra prediction.
335
0
    if (xd->up_available) {
336
0
      xd->above_mbmi = NULL;
337
0
    }
338
0
    if (xd->left_available) {
339
0
      xd->left_mbmi = NULL;
340
0
    }
341
0
    uint8_t *mb_buffer =
342
0
        buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
343
0
    for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
344
0
         ++mode) {
345
      // TODO(chengchen): Here we use src instead of reconstructed frame as
346
      // the intra predictor to make single and multithread version match.
347
      // Ideally we want to use the reconstructed.
348
0
      av1_predict_intra_block(
349
0
          xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
350
0
          block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES,
351
0
          mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
352
0
      av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
353
0
                         mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
354
0
      av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
355
0
      int intra_cost = aom_satd(coeff, coeff_count);
356
0
      if (intra_cost < best_intra_cost) {
357
0
        best_intra_cost = intra_cost;
358
0
        best_mode = mode;
359
0
      }
360
0
    }
361
362
0
    av1_predict_intra_block(
363
0
        xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
364
0
        block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES,
365
0
        mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
366
0
    av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
367
0
                       mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
368
0
    av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
369
370
0
    const struct macroblock_plane *const p = &x->plane[0];
371
0
    uint16_t eob;
372
0
    const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
373
0
    QUANT_PARAM quant_param;
374
0
    int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
375
0
    av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
376
0
#if CONFIG_AV1_HIGHBITDEPTH
377
0
    if (is_cur_buf_hbd(xd)) {
378
0
      av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
379
0
                                    scan_order, &quant_param);
380
0
    } else {
381
0
      av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
382
0
                             scan_order, &quant_param);
383
0
    }
384
#else
385
    av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order,
386
                           &quant_param);
387
#endif  // CONFIG_AV1_HIGHBITDEPTH
388
389
0
    if (cpi->oxcf.enable_rate_guide_deltaq) {
390
0
      const int rate_cost = rate_estimator(qcoeff, eob, tx_size);
391
0
      cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols +
392
0
                               (mi_col / mb_step)] = rate_cost;
393
0
    }
394
395
0
    av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
396
0
                                dst_buffer_stride, eob, 0);
397
0
    WeberStats *weber_stats =
398
0
        &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
399
0
                             (mi_col / mb_step)];
400
401
0
    weber_stats->rec_pix_max = 1;
402
0
    weber_stats->rec_variance = 0;
403
0
    weber_stats->src_pix_max = 1;
404
0
    weber_stats->src_variance = 0;
405
0
    weber_stats->distortion = 0;
406
407
0
    int64_t src_mean = 0;
408
0
    int64_t rec_mean = 0;
409
0
    int64_t dist_mean = 0;
410
411
0
    for (int pix_row = 0; pix_row < block_size; ++pix_row) {
412
0
      for (int pix_col = 0; pix_col < block_size; ++pix_col) {
413
0
        int src_pix, rec_pix;
414
0
#if CONFIG_AV1_HIGHBITDEPTH
415
0
        if (is_cur_buf_hbd(xd)) {
416
0
          uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
417
0
          uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
418
0
          src_pix = src[pix_row * buf_stride + pix_col];
419
0
          rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
420
0
        } else {
421
0
          src_pix = mb_buffer[pix_row * buf_stride + pix_col];
422
0
          rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
423
0
        }
424
#else
425
        src_pix = mb_buffer[pix_row * buf_stride + pix_col];
426
        rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
427
#endif
428
0
        src_mean += src_pix;
429
0
        rec_mean += rec_pix;
430
0
        dist_mean += src_pix - rec_pix;
431
0
        weber_stats->src_variance += src_pix * src_pix;
432
0
        weber_stats->rec_variance += rec_pix * rec_pix;
433
0
        weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
434
0
        weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
435
0
        weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
436
0
      }
437
0
    }
438
439
0
    if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
440
0
      *sum_rec_distortion += weber_stats->distortion;
441
0
      int est_block_rate = 0;
442
0
      int64_t est_block_dist = 0;
443
0
      model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
444
0
                                      pix_num, &est_block_rate,
445
0
                                      &est_block_dist);
446
0
      *sum_est_rate += est_block_rate;
447
0
    }
448
449
0
    weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
450
0
    weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
451
0
    weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
452
0
    weber_stats->satd = best_intra_cost;
453
454
0
    qcoeff[0] = 0;
455
0
    int max_scale = 0;
456
0
    for (int idx = 1; idx < coeff_count; ++idx) {
457
0
      const int abs_qcoeff = abs(qcoeff[idx]);
458
0
      max_scale = AOMMAX(max_scale, abs_qcoeff);
459
0
    }
460
0
    weber_stats->max_scale = max_scale;
461
462
0
    if ((mi_col + mb_step) % mt_unit_step == 0 ||
463
0
        (mi_col + mb_step) >= mi_cols) {
464
0
      intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id,
465
0
                                     mt_unit_col, mt_unit_cols);
466
0
      ++mt_unit_col;
467
0
    }
468
0
  }
469
  // Set the pointer to null since mbmi is only allocated inside this function.
470
0
  xd->mi = NULL;
471
0
}
472
473
static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
474
0
                               double *sum_est_rate) {
475
0
  MACROBLOCK *x = &cpi->td.mb;
476
0
  MACROBLOCKD *xd = &x->e_mbd;
477
0
  const BLOCK_SIZE bsize = cpi->weber_bsize;
478
0
  const int mb_step = mi_size_wide[bsize];
479
0
  DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
480
0
  DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
481
0
  DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
482
0
  DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
483
0
  for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
484
0
    av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
485
0
                               dqcoeff, sum_rec_distortion, sum_est_rate,
486
0
                               cpi->td.wiener_tmp_pred_buf);
487
0
  }
488
0
}
489
490
static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi,
491
0
                                        const BLOCK_SIZE norm_block_size) {
492
0
  const AV1_COMMON *const cm = &cpi->common;
493
0
  int64_t norm_factor = 1;
494
0
  assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
495
0
  const int norm_step = mi_size_wide[norm_block_size];
496
0
  double sb_wiener_log = 0;
497
0
  double sb_count = 0;
498
0
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
499
0
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
500
0
      const int sb_wiener_var =
501
0
          get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
502
0
      const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
503
0
      const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
504
0
      const double scaled_satd = (double)satd / sqrt((double)sse);
505
0
      sb_wiener_log += scaled_satd * log(sb_wiener_var);
506
0
      sb_count += scaled_satd;
507
0
    }
508
0
  }
509
0
  if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
510
0
  norm_factor = AOMMAX(1, norm_factor);
511
512
0
  return norm_factor;
513
0
}
514
515
static void automatic_intra_tools_off(AV1_COMP *cpi,
516
                                      const double sum_rec_distortion,
517
0
                                      const double sum_est_rate) {
518
0
  if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
519
520
  // Thresholds
521
0
  const int high_quality_qindex = 128;
522
0
  const double high_quality_bpp = 2.0;
523
0
  const double high_quality_dist_per_pix = 4.0;
524
525
0
  AV1_COMMON *const cm = &cpi->common;
526
0
  const int qindex = cm->quant_params.base_qindex;
527
0
  const double dist_per_pix =
528
0
      (double)sum_rec_distortion / (cm->width * cm->height);
529
  // The estimate bpp is not accurate, an empirical constant 100 is divided.
530
0
  const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
531
532
0
  if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
533
0
      dist_per_pix < high_quality_dist_per_pix) {
534
0
    cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
535
0
    cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
536
0
    cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
537
0
    cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
538
0
  }
539
0
}
540
541
0
static void ext_rate_guided_quantization(AV1_COMP *cpi) {
542
  // Calculation uses 8x8.
543
0
  const int mb_step = mi_size_wide[cpi->weber_bsize];
544
  // Accumulate to 16x16, step size is in the unit of mi.
545
0
  const int block_step = 4;
546
547
0
  const char *filename = cpi->oxcf.rate_distribution_info;
548
0
  FILE *pfile = fopen(filename, "r");
549
0
  if (pfile == NULL) {
550
0
    assert(pfile != NULL);
551
0
    return;
552
0
  }
553
554
0
  double ext_rate_sum = 0.0;
555
0
  for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
556
0
    for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
557
0
      float val;
558
0
      const int fields_converted = fscanf(pfile, "%f", &val);
559
0
      if (fields_converted != 1) {
560
0
        assert(fields_converted == 1);
561
0
        fclose(pfile);
562
0
        return;
563
0
      }
564
0
      ext_rate_sum += val;
565
0
      cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
566
0
                                 (col / mb_step)] = val;
567
0
    }
568
0
  }
569
0
  fclose(pfile);
570
571
0
  int uniform_rate_sum = 0;
572
0
  for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
573
0
    for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
574
0
      int rate_sum = 0;
575
0
      for (int r = 0; r < block_step; r += mb_step) {
576
0
        for (int c = 0; c < block_step; c += mb_step) {
577
0
          const int mi_row = row + r;
578
0
          const int mi_col = col + c;
579
0
          rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) *
580
0
                                                   cpi->frame_info.mi_cols +
581
0
                                               (mi_col / mb_step)];
582
0
        }
583
0
      }
584
0
      uniform_rate_sum += rate_sum;
585
0
    }
586
0
  }
587
588
0
  const double scale = uniform_rate_sum / ext_rate_sum;
589
0
  cpi->ext_rate_scale = scale;
590
0
}
591
592
0
void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
593
0
  AV1_COMMON *const cm = &cpi->common;
594
0
  const SequenceHeader *const seq_params = cm->seq_params;
595
0
  if (aom_realloc_frame_buffer(
596
0
          &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
597
0
          seq_params->subsampling_y, seq_params->use_highbitdepth,
598
0
          cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
599
0
          NULL, cpi->alloc_pyramid, 0))
600
0
    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
601
0
                       "Failed to allocate frame buffer");
602
0
  av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td);
603
0
  cpi->norm_wiener_variance = 0;
604
605
0
  MACROBLOCK *x = &cpi->td.mb;
606
0
  MACROBLOCKD *xd = &x->e_mbd;
607
  // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
608
0
  MB_MODE_INFO mbmi;
609
0
  memset(&mbmi, 0, sizeof(mbmi));
610
0
  MB_MODE_INFO *mbmi_ptr = &mbmi;
611
0
  xd->mi = &mbmi_ptr;
612
0
  cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
613
0
  av1_frame_init_quantizer(cpi);
614
615
0
  double sum_rec_distortion = 0.0;
616
0
  double sum_est_rate = 0.0;
617
618
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
619
0
  const int num_workers =
620
0
      AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
621
0
  AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
622
0
  intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy;
623
0
  intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy;
624
  // Calculate differential contrast for each block for the entire image.
625
  // TODO(chengchen): properly accumulate the distortion and rate in
626
  // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if
627
  // auto_intra_tools_off is true.
628
0
  if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
629
0
    intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read;
630
0
    intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write;
631
0
    av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion,
632
0
                              &sum_est_rate);
633
0
  } else {
634
0
    calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate);
635
0
  }
636
637
  // Determine whether to turn off several intra coding tools.
638
0
  automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
639
640
  // Read external rate distribution and use it to guide delta quantization
641
0
  if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi);
642
643
0
  const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size;
644
0
  cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size);
645
0
  const int norm_step = mi_size_wide[norm_block_size];
646
647
0
  double sb_wiener_log = 0;
648
0
  double sb_count = 0;
649
0
  for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
650
0
    sb_wiener_log = 0;
651
0
    sb_count = 0;
652
0
    for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
653
0
      for (int mi_col = 0; mi_col < cm->mi_params.mi_cols;
654
0
           mi_col += norm_step) {
655
0
        int sb_wiener_var =
656
0
            get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
657
658
0
        double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
659
0
        double min_max_scale = AOMMAX(
660
0
            1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
661
662
0
        beta = AOMMIN(beta, 4);
663
0
        beta = AOMMAX(beta, 0.25);
664
665
0
        if (beta < 1 / min_max_scale) continue;
666
667
0
        sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
668
669
0
        int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
670
0
        int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
671
0
        double scaled_satd = (double)satd / sqrt((double)sse);
672
0
        sb_wiener_log += scaled_satd * log(sb_wiener_var);
673
0
        sb_count += scaled_satd;
674
0
      }
675
0
    }
676
677
0
    if (sb_count > 0)
678
0
      cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
679
0
    cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
680
0
  }
681
682
  // Set the pointer to null since mbmi is only allocated inside this function.
683
0
  xd->mi = NULL;
684
0
  aom_free_frame_buffer(&cm->cur_frame->buf);
685
0
  av1_dealloc_mb_wiener_var_pred_buf(&cpi->td);
686
0
}
687
688
static int get_rate_guided_quantizer(const AV1_COMP *const cpi,
689
0
                                     BLOCK_SIZE bsize, int mi_row, int mi_col) {
690
  // Calculation uses 8x8.
691
0
  const int mb_step = mi_size_wide[cpi->weber_bsize];
692
  // Accumulate to 16x16
693
0
  const int block_step = mi_size_wide[BLOCK_16X16];
694
0
  double sb_rate_hific = 0.0;
695
0
  double sb_rate_uniform = 0.0;
696
0
  for (int row = mi_row; row < mi_row + mi_size_wide[bsize];
697
0
       row += block_step) {
698
0
    for (int col = mi_col; col < mi_col + mi_size_high[bsize];
699
0
         col += block_step) {
700
0
      sb_rate_hific +=
701
0
          cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
702
0
                                     (col / mb_step)];
703
704
0
      for (int r = 0; r < block_step; r += mb_step) {
705
0
        for (int c = 0; c < block_step; c += mb_step) {
706
0
          const int this_row = row + r;
707
0
          const int this_col = col + c;
708
0
          sb_rate_uniform +=
709
0
              cpi->prep_rate_estimates[(this_row / mb_step) *
710
0
                                           cpi->frame_info.mi_cols +
711
0
                                       (this_col / mb_step)];
712
0
        }
713
0
      }
714
0
    }
715
0
  }
716
0
  sb_rate_hific *= cpi->ext_rate_scale;
717
718
0
  const double weight = 1.0;
719
0
  const double rate_diff =
720
0
      weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform;
721
0
  double scale = pow(2, rate_diff);
722
723
0
  scale = scale * scale;
724
0
  double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
725
0
  scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale);
726
727
0
  const AV1_COMMON *const cm = &cpi->common;
728
0
  const int base_qindex = cm->quant_params.base_qindex;
729
0
  int offset =
730
0
      av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale);
731
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
732
0
  const int max_offset = delta_q_info->delta_q_res * 10;
733
0
  offset = AOMMIN(offset, max_offset - 1);
734
0
  offset = AOMMAX(offset, -max_offset + 1);
735
0
  int qindex = cm->quant_params.base_qindex + offset;
736
0
  qindex = AOMMIN(qindex, MAXQ);
737
0
  qindex = AOMMAX(qindex, MINQ);
738
0
  if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
739
740
0
  return qindex;
741
0
}
742
743
int av1_get_sbq_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
744
0
                              int mi_row, int mi_col) {
745
0
  if (cpi->oxcf.enable_rate_guide_deltaq) {
746
0
    return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col);
747
0
  }
748
749
0
  const AV1_COMMON *const cm = &cpi->common;
750
0
  const int base_qindex = cm->quant_params.base_qindex;
751
0
  int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
752
0
  int offset = 0;
753
0
  double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
754
0
  double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
755
0
  beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
756
757
  // Cap beta such that the delta q value is not much far away from the base q.
758
0
  beta = AOMMIN(beta, 4);
759
0
  beta = AOMMAX(beta, 0.25);
760
0
  offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
761
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
762
0
  offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
763
0
  offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
764
0
  int qindex = cm->quant_params.base_qindex + offset;
765
0
  qindex = AOMMIN(qindex, MAXQ);
766
0
  qindex = AOMMAX(qindex, MINQ);
767
0
  if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
768
769
0
  return qindex;
770
0
}
771
772
0
void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
773
0
  AV1_COMMON *cm = &cpi->common;
774
775
0
  if (cpi->mb_delta_q) return;
776
777
0
  CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
778
0
                  aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
779
0
                             sizeof(*cpi->mb_delta_q)));
780
0
}
781
782
#if CONFIG_TFLITE
783
static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
784
                         int bit_depth, uint8_t *y_buffer, int y_stride,
785
                         float *predicts0, float *predicts1) {
786
  // Create the model and interpreter options.
787
  TfLiteModel *model =
788
      TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
789
  if (model == NULL) return 1;
790
791
  TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
792
  TfLiteInterpreterOptionsSetNumThreads(options, 2);
793
  if (options == NULL) {
794
    TfLiteModelDelete(model);
795
    return 1;
796
  }
797
798
  // Create the interpreter.
799
  TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
800
  if (interpreter == NULL) {
801
    TfLiteInterpreterOptionsDelete(options);
802
    TfLiteModelDelete(model);
803
    return 1;
804
  }
805
806
  // Allocate tensors and populate the input tensor data.
807
  TfLiteInterpreterAllocateTensors(interpreter);
808
  TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
809
  if (input_tensor == NULL) {
810
    TfLiteInterpreterDelete(interpreter);
811
    TfLiteInterpreterOptionsDelete(options);
812
    TfLiteModelDelete(model);
813
    return 1;
814
  }
815
816
  size_t input_size = TfLiteTensorByteSize(input_tensor);
817
  float *input_data = aom_calloc(input_size, 1);
818
  if (input_data == NULL) {
819
    TfLiteInterpreterDelete(interpreter);
820
    TfLiteInterpreterOptionsDelete(options);
821
    TfLiteModelDelete(model);
822
    return 1;
823
  }
824
825
  const int num_mi_w = mi_size_wide[block_size];
826
  const int num_mi_h = mi_size_high[block_size];
827
  for (int row = 0; row < num_rows; ++row) {
828
    for (int col = 0; col < num_cols; ++col) {
829
      const int row_offset = (row * num_mi_h) << 2;
830
      const int col_offset = (col * num_mi_w) << 2;
831
832
      uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
833
      int r = row_offset, pos = 0;
834
      const float base = (float)((1 << bit_depth) - 1);
835
      while (r < row_offset + (num_mi_h << 2)) {
836
        for (int c = 0; c < (num_mi_w << 2); ++c) {
837
          input_data[pos++] = bit_depth > 8
838
                                  ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base
839
                                  : (float)*(buf + c) / base;
840
        }
841
        buf += y_stride;
842
        ++r;
843
      }
844
      TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
845
846
      // Execute inference.
847
      if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
848
        TfLiteInterpreterDelete(interpreter);
849
        TfLiteInterpreterOptionsDelete(options);
850
        TfLiteModelDelete(model);
851
        return 1;
852
      }
853
854
      // Extract the output tensor data.
855
      const TfLiteTensor *output_tensor =
856
          TfLiteInterpreterGetOutputTensor(interpreter, 0);
857
      if (output_tensor == NULL) {
858
        TfLiteInterpreterDelete(interpreter);
859
        TfLiteInterpreterOptionsDelete(options);
860
        TfLiteModelDelete(model);
861
        return 1;
862
      }
863
864
      size_t output_size = TfLiteTensorByteSize(output_tensor);
865
      float output_data[2];
866
867
      TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size);
868
      predicts0[row * num_cols + col] = output_data[0];
869
      predicts1[row * num_cols + col] = output_data[1];
870
    }
871
  }
872
873
  // Dispose of the model and interpreter objects.
874
  TfLiteInterpreterDelete(interpreter);
875
  TfLiteInterpreterOptionsDelete(options);
876
  TfLiteModelDelete(model);
877
  aom_free(input_data);
878
  return 0;
879
}
880
881
void av1_set_mb_ur_variance(AV1_COMP *cpi) {
882
  const AV1_COMMON *cm = &cpi->common;
883
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
884
  uint8_t *y_buffer = cpi->source->y_buffer;
885
  const int y_stride = cpi->source->y_stride;
886
  const int block_size = cpi->common.seq_params->sb_size;
887
  const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
888
889
  const int num_mi_w = mi_size_wide[block_size];
890
  const int num_mi_h = mi_size_high[block_size];
891
  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
892
  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
893
894
  // TODO(sdeng): fit a better model_1; disable it at this time.
895
  float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f;
896
  CHECK_MEM_ERROR(cm, mb_delta_q0,
897
                  aom_calloc(num_rows * num_cols, sizeof(float)));
898
  CHECK_MEM_ERROR(cm, mb_delta_q1,
899
                  aom_calloc(num_rows * num_cols, sizeof(float)));
900
901
  if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer,
902
                    y_stride, mb_delta_q0, mb_delta_q1)) {
903
    aom_internal_error(cm->error, AOM_CODEC_ERROR,
904
                       "Failed to call TFlite functions.");
905
  }
906
907
  // Loop through each SB block.
908
  for (int row = 0; row < num_rows; ++row) {
909
    for (int col = 0; col < num_cols; ++col) {
910
      const int index = row * num_cols + col;
911
      delta_q_avg0 += mb_delta_q0[index];
912
    }
913
  }
914
915
  delta_q_avg0 /= (float)(num_rows * num_cols);
916
917
  float scaling_factor;
918
  const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
919
  if (cq_level < delta_q_avg0) {
920
    scaling_factor = cq_level / delta_q_avg0;
921
  } else {
922
    scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0);
923
  }
924
925
  for (int row = 0; row < num_rows; ++row) {
926
    for (int col = 0; col < num_cols; ++col) {
927
      const int index = row * num_cols + col;
928
      cpi->mb_delta_q[index] =
929
          RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ *
930
               scaling_factor * (mb_delta_q0[index] - delta_q_avg0));
931
    }
932
  }
933
934
  aom_free(mb_delta_q0);
935
  aom_free(mb_delta_q1);
936
}
937
#else  // !CONFIG_TFLITE
938
0
void av1_set_mb_ur_variance(AV1_COMP *cpi) {
939
0
  const AV1_COMMON *cm = &cpi->common;
940
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
941
0
  const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
942
0
  uint8_t *y_buffer = cpi->source->y_buffer;
943
0
  const int y_stride = cpi->source->y_stride;
944
0
  const int block_size = cpi->common.seq_params->sb_size;
945
946
0
  const int num_mi_w = mi_size_wide[block_size];
947
0
  const int num_mi_h = mi_size_high[block_size];
948
0
  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
949
0
  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
950
951
0
  int *mb_delta_q[2];
952
0
  CHECK_MEM_ERROR(cm, mb_delta_q[0],
953
0
                  aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
954
0
  CHECK_MEM_ERROR(cm, mb_delta_q[1],
955
0
                  aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
956
957
  // Approximates the model change between current version (Spet 2021) and the
958
  // baseline (July 2021).
959
0
  const double model_change[] = { 3.0, 3.0 };
960
  // The following parameters are fitted from user labeled data.
961
0
  const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
962
0
  const double b[] = { 0.004898, 0.003093 };
963
0
  const double c[] = { (29.932 + model_change[0]) * 4.0,
964
0
                       (42.100 + model_change[1]) * 4.0 };
965
0
  int delta_q_avg[2] = { 0, 0 };
966
  // Loop through each SB block.
967
0
  for (int row = 0; row < num_rows; ++row) {
968
0
    for (int col = 0; col < num_cols; ++col) {
969
0
      double var = 0.0, num_of_var = 0.0;
970
0
      const int index = row * num_cols + col;
971
972
      // Loop through each 8x8 block.
973
0
      for (int mi_row = row * num_mi_h;
974
0
           mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
975
0
           mi_row += 2) {
976
0
        for (int mi_col = col * num_mi_w;
977
0
             mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
978
0
             mi_col += 2) {
979
0
          struct buf_2d buf;
980
0
          const int row_offset_y = mi_row << 2;
981
0
          const int col_offset_y = mi_col << 2;
982
983
0
          buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
984
0
          buf.stride = y_stride;
985
986
0
          unsigned int block_variance;
987
0
          block_variance = av1_get_perpixel_variance_facade(
988
0
              cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
989
990
0
          block_variance = AOMMAX(block_variance, 1);
991
0
          var += log((double)block_variance);
992
0
          num_of_var += 1.0;
993
0
        }
994
0
      }
995
0
      var = exp(var / num_of_var);
996
0
      mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
997
0
      mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
998
0
      delta_q_avg[0] += mb_delta_q[0][index];
999
0
      delta_q_avg[1] += mb_delta_q[1][index];
1000
0
    }
1001
0
  }
1002
1003
0
  delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
1004
0
  delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
1005
1006
0
  int model_idx;
1007
0
  double scaling_factor;
1008
0
  const int cq_level = cpi->oxcf.rc_cfg.cq_level;
1009
0
  if (cq_level < delta_q_avg[0]) {
1010
0
    model_idx = 0;
1011
0
    scaling_factor = (double)cq_level / delta_q_avg[0];
1012
0
  } else if (cq_level < delta_q_avg[1]) {
1013
0
    model_idx = 2;
1014
0
    scaling_factor =
1015
0
        (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
1016
0
  } else {
1017
0
    model_idx = 1;
1018
0
    scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
1019
0
  }
1020
1021
0
  const double new_delta_q_avg =
1022
0
      delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
1023
0
  for (int row = 0; row < num_rows; ++row) {
1024
0
    for (int col = 0; col < num_cols; ++col) {
1025
0
      const int index = row * num_cols + col;
1026
0
      if (model_idx == 2) {
1027
0
        const double delta_q =
1028
0
            mb_delta_q[0][index] +
1029
0
            scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
1030
0
        cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
1031
0
                                      100.0 * (delta_q - new_delta_q_avg));
1032
0
      } else {
1033
0
        cpi->mb_delta_q[index] = RINT(
1034
0
            (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
1035
0
            (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
1036
0
      }
1037
0
    }
1038
0
  }
1039
1040
0
  aom_free(mb_delta_q[0]);
1041
0
  aom_free(mb_delta_q[1]);
1042
0
}
1043
#endif
1044
1045
int av1_get_sbq_user_rating_based(const AV1_COMP *const cpi, int mi_row,
1046
0
                                  int mi_col) {
1047
0
  const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
1048
0
  const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
1049
0
  const AV1_COMMON *const cm = &cpi->common;
1050
0
  const int base_qindex = cm->quant_params.base_qindex;
1051
0
  if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
1052
1053
0
  const int num_mi_w = mi_size_wide[bsize];
1054
0
  const int num_mi_h = mi_size_high[bsize];
1055
0
  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
1056
0
  const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
1057
0
  const int delta_q = cpi->mb_delta_q[index];
1058
1059
0
  int qindex = base_qindex + delta_q;
1060
0
  qindex = AOMMIN(qindex, MAXQ);
1061
0
  qindex = AOMMAX(qindex, MINQ + 1);
1062
1063
0
  return qindex;
1064
0
}
1065
1066
#if !CONFIG_REALTIME_ONLY
1067
1068
// Variance Boost: a variance adaptive quantization implementation
1069
// SVT-AV1 appendix with an overview and a graphical, step-by-step explanation
1070
// of the implementation
1071
// https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Appendix-Variance-Boost.md
1072
0
int av1_get_sbq_variance_boost(const AV1_COMP *cpi, const MACROBLOCK *x) {
1073
0
  const AV1_COMMON *cm = &cpi->common;
1074
0
  const int base_qindex = cm->quant_params.base_qindex;
1075
0
  const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
1076
1077
  // Variance Boost only supports 64x64 SBs.
1078
0
  assert(cm->seq_params->sb_size == BLOCK_64X64);
1079
1080
  // Strength is currently hard-coded and optimized for still pictures. In the
1081
  // future, we might want to expose this as a parameter that can be fine-tuned
1082
  // by the caller.
1083
0
  const int strength = 3;
1084
0
  unsigned int variance = av1_get_variance_boost_block_variance(cpi, x);
1085
1086
  // Variance = 0 areas are either completely flat patches or have very fine
1087
  // gradients. Boost these blocks as if they have a variance of 1.
1088
0
  if (variance == 0) {
1089
0
    variance = 1;
1090
0
  }
1091
1092
  // Compute a boost based on a fast-growing formula.
1093
  // High and medium variance SBs essentially get no boost, while lower variance
1094
  // SBs get increasingly stronger boosts.
1095
0
  assert(strength >= 1 && strength <= 4);
1096
1097
  // Still picture curve, with variance crossover point at 1024.
1098
0
  double qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10.0) + 1.0;
1099
0
  qstep_ratio = fclamp(qstep_ratio, 1.0, VAR_BOOST_MAX_BOOST);
1100
1101
0
  double base_q = av1_convert_qindex_to_q(base_qindex, bit_depth);
1102
0
  double target_q = base_q / qstep_ratio;
1103
0
  int target_qindex = av1_convert_q_to_qindex(target_q, bit_depth);
1104
1105
  // Determine the SB's delta_q boost by computing an (unscaled) delta_q from
1106
  // the base and target q values, then scale that delta_q according to the
1107
  // frame's base qindex.
1108
  // The scaling coefficients were chosen empirically to maximize SSIMULACRA 2
1109
  // scores, 10th percentile scores, and subjective quality. Boosts become
1110
  // smaller (for a given variance) the lower the base qindex.
1111
0
  int boost = (int)round((base_qindex + 544.0) * (base_qindex - target_qindex) /
1112
0
                         1279.0);
1113
0
  boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost);
1114
1115
  // Variance Boost was designed to always operate in the lossy domain, so MINQ
1116
  // is excluded.
1117
0
  int sb_qindex = AOMMAX(base_qindex - boost, MINQ + 1);
1118
1119
0
  return sb_qindex;
1120
0
}
1121
#endif