Coverage Report

Created: 2022-08-24 06:17

/src/aom/av1/encoder/allintra_vis.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2021, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include "config/aom_config.h"
13
14
#if CONFIG_TFLITE
15
#include "tensorflow/lite/c/c_api.h"
16
#include "av1/encoder/deltaq4_model.c"
17
#endif
18
19
#include "av1/common/common_data.h"
20
#include "av1/common/enums.h"
21
#include "av1/common/idct.h"
22
#include "av1/common/reconinter.h"
23
#include "av1/encoder/allintra_vis.h"
24
#include "av1/encoder/encoder.h"
25
#include "av1/encoder/hybrid_fwd_txfm.h"
26
#include "av1/encoder/model_rd.h"
27
#include "av1/encoder/rdopt_utils.h"
28
29
// Process the wiener variance in 16x16 block basis.
30
0
static int qsort_comp(const void *elem1, const void *elem2) {
31
0
  int a = *((const int *)elem1);
32
0
  int b = *((const int *)elem2);
33
0
  if (a > b) return 1;
34
0
  if (a < b) return -1;
35
0
  return 0;
36
0
}
37
38
0
void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
39
0
  AV1_COMMON *cm = &cpi->common;
40
41
0
  cpi->weber_bsize = BLOCK_8X8;
42
43
0
  if (cpi->mb_weber_stats) return;
44
45
0
  CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
46
0
                  aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
47
0
                             sizeof(*cpi->mb_weber_stats)));
48
0
}
49
50
static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
51
0
                        int mi_col) {
52
0
  AV1_COMMON *const cm = &cpi->common;
53
0
  const int mi_wide = mi_size_wide[bsize];
54
0
  const int mi_high = mi_size_high[bsize];
55
56
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
57
0
  int mb_stride = cpi->frame_info.mi_cols;
58
0
  int mb_count = 0;
59
0
  int64_t satd = 0;
60
61
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
62
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
63
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
64
0
        continue;
65
66
0
      satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
67
0
                  .satd;
68
0
      ++mb_count;
69
0
    }
70
0
  }
71
72
0
  if (mb_count) satd = (int)(satd / mb_count);
73
0
  satd = AOMMAX(1, satd);
74
75
0
  return (int)satd;
76
0
}
77
78
static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
79
0
                       int mi_col) {
80
0
  AV1_COMMON *const cm = &cpi->common;
81
0
  const int mi_wide = mi_size_wide[bsize];
82
0
  const int mi_high = mi_size_high[bsize];
83
84
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
85
0
  int mb_stride = cpi->frame_info.mi_cols;
86
0
  int mb_count = 0;
87
0
  int64_t distortion = 0;
88
89
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
90
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
91
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
92
0
        continue;
93
94
0
      distortion +=
95
0
          cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
96
0
              .distortion;
97
0
      ++mb_count;
98
0
    }
99
0
  }
100
101
0
  if (mb_count) distortion = (int)(distortion / mb_count);
102
0
  distortion = AOMMAX(1, distortion);
103
104
0
  return (int)distortion;
105
0
}
106
107
static double get_max_scale(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
108
0
                            int mi_col) {
109
0
  AV1_COMMON *const cm = &cpi->common;
110
0
  const int mi_wide = mi_size_wide[bsize];
111
0
  const int mi_high = mi_size_high[bsize];
112
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
113
0
  int mb_stride = cpi->frame_info.mi_cols;
114
0
  double min_max_scale = 10.0;
115
116
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
117
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
118
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
119
0
        continue;
120
0
      WeberStats *weber_stats =
121
0
          &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
122
0
      if (weber_stats->max_scale < 1.0) continue;
123
0
      if (weber_stats->max_scale < min_max_scale)
124
0
        min_max_scale = weber_stats->max_scale;
125
0
    }
126
0
  }
127
0
  return min_max_scale;
128
0
}
129
130
static int get_window_wiener_var(AV1_COMP *const cpi, BLOCK_SIZE bsize,
131
0
                                 int mi_row, int mi_col) {
132
0
  AV1_COMMON *const cm = &cpi->common;
133
0
  const int mi_wide = mi_size_wide[bsize];
134
0
  const int mi_high = mi_size_high[bsize];
135
136
0
  const int mi_step = mi_size_wide[cpi->weber_bsize];
137
0
  int sb_wiener_var = 0;
138
0
  int mb_stride = cpi->frame_info.mi_cols;
139
0
  int mb_count = 0;
140
0
  double base_num = 1;
141
0
  double base_den = 1;
142
0
  double base_reg = 1;
143
144
0
  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
145
0
    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
146
0
      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
147
0
        continue;
148
149
0
      WeberStats *weber_stats =
150
0
          &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
151
152
0
      base_num += ((double)weber_stats->distortion) *
153
0
                  sqrt((double)weber_stats->src_variance) *
154
0
                  weber_stats->rec_pix_max;
155
156
0
      base_den += fabs(
157
0
          weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
158
0
          weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
159
160
0
      base_reg += sqrt((double)weber_stats->distortion) *
161
0
                  sqrt((double)weber_stats->src_pix_max) * 0.1;
162
0
      ++mb_count;
163
0
    }
164
0
  }
165
166
0
  sb_wiener_var =
167
0
      (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
168
0
  sb_wiener_var = AOMMAX(1, sb_wiener_var);
169
170
0
  return (int)sb_wiener_var;
171
0
}
172
173
static int get_var_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize,
174
0
                                 int mi_row, int mi_col) {
175
0
  AV1_COMMON *const cm = &cpi->common;
176
0
  const int mi_wide = mi_size_wide[bsize];
177
0
  const int mi_high = mi_size_high[bsize];
178
179
0
  int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
180
181
0
  if (mi_row >= (mi_high / 2)) {
182
0
    sb_wiener_var =
183
0
        AOMMIN(sb_wiener_var,
184
0
               get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
185
0
  }
186
0
  if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
187
0
    sb_wiener_var =
188
0
        AOMMIN(sb_wiener_var,
189
0
               get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
190
0
  }
191
0
  if (mi_col >= (mi_wide / 2)) {
192
0
    sb_wiener_var =
193
0
        AOMMIN(sb_wiener_var,
194
0
               get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
195
0
  }
196
0
  if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
197
0
    sb_wiener_var =
198
0
        AOMMIN(sb_wiener_var,
199
0
               get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
200
0
  }
201
202
0
  return sb_wiener_var;
203
0
}
204
205
static double calc_src_mean_var(const uint8_t *const src_buffer,
206
                                const int buf_stride, const int block_size,
207
0
                                const int use_hbd, double *mean) {
208
0
  double src_mean = 0.0;
209
0
  double src_variance = 0.0;
210
0
  for (int pix_row = 0; pix_row < block_size; ++pix_row) {
211
0
    for (int pix_col = 0; pix_col < block_size; ++pix_col) {
212
0
      int src_pix;
213
0
      if (use_hbd) {
214
0
        const uint16_t *src = CONVERT_TO_SHORTPTR(src_buffer);
215
0
        src_pix = src[pix_row * buf_stride + pix_col];
216
0
      } else {
217
0
        src_pix = src_buffer[pix_row * buf_stride + pix_col];
218
0
      }
219
0
      src_mean += src_pix;
220
0
      src_variance += src_pix * src_pix;
221
0
    }
222
0
  }
223
0
  const int pix_num = block_size * block_size;
224
0
  src_variance -= (src_mean * src_mean) / pix_num;
225
0
  src_variance /= pix_num;
226
0
  *mean = src_mean / pix_num;
227
0
  return src_variance;
228
0
}
229
230
static BLOCK_SIZE pick_block_size(AV1_COMP *cpi,
231
0
                                  const BLOCK_SIZE orig_block_size) {
232
0
  const BLOCK_SIZE sub_block_size =
233
0
      get_partition_subsize(orig_block_size, PARTITION_SPLIT);
234
0
  const int mb_step = mi_size_wide[orig_block_size];
235
0
  const int sub_step = mb_step >> 1;
236
0
  const TX_SIZE tx_size = max_txsize_lookup[orig_block_size];
237
0
  const int block_size = tx_size_wide[tx_size];
238
0
  const int split_block_size = block_size >> 1;
239
0
  assert(split_block_size >= 8);
240
0
  const uint8_t *const buffer = cpi->source->y_buffer;
241
0
  const int buf_stride = cpi->source->y_stride;
242
0
  const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
243
244
0
  double vote = 0.0;
245
0
  int sb_count = 0;
246
0
  for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
247
0
    for (int mi_col = 0; mi_col < cpi->frame_info.mi_cols; mi_col += mb_step) {
248
0
      const uint8_t *mb_buffer =
249
0
          buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
250
      // (1). Calculate mean and var using the original block size
251
0
      double mean = 0.0;
252
0
      const double orig_var =
253
0
          calc_src_mean_var(mb_buffer, buf_stride, block_size, use_hbd, &mean);
254
      // (2). Calculate mean and var using the split block size
255
0
      double split_var[4] = { 0 };
256
0
      double split_mean[4] = { 0 };
257
0
      int sub_idx = 0;
258
0
      for (int row = mi_row; row < mi_row + mb_step; row += sub_step) {
259
0
        for (int col = mi_col; col < mi_col + mb_step; col += sub_step) {
260
0
          mb_buffer = buffer + row * MI_SIZE * buf_stride + col * MI_SIZE;
261
0
          split_var[sub_idx] =
262
0
              calc_src_mean_var(mb_buffer, buf_stride, split_block_size,
263
0
                                use_hbd, &split_mean[sub_idx]);
264
0
          ++sub_idx;
265
0
        }
266
0
      }
267
      // (3). Determine whether to use the original or the split block size.
268
      // If use original, vote += 1.0.
269
      // If use split, vote -= 1.0.
270
0
      double max_split_mean = 0.0;
271
0
      double max_split_var = 0.0;
272
0
      double geo_split_var = 0.0;
273
0
      for (int i = 0; i < 4; ++i) {
274
0
        max_split_mean = AOMMAX(max_split_mean, split_mean[i]);
275
0
        max_split_var = AOMMAX(max_split_var, split_var[i]);
276
0
        geo_split_var += log(split_var[i]);
277
0
      }
278
0
      geo_split_var = exp(geo_split_var / 4);
279
0
      const double param_1 = 1.5;
280
0
      const double param_2 = 1.0;
281
      // If the variance of the large block size is considerably larger than the
282
      // geometric mean of vars of small blocks;
283
      // Or if the variance of the large block size is larger than the local
284
      // variance;
285
      // Or if the variance of the large block size is considerably larger
286
      // than the mean.
287
      // It indicates that the source block is not a flat area, therefore we
288
      // might want to split into smaller block sizes to capture the
289
      // local characteristics.
290
0
      if (orig_var > param_1 * geo_split_var || orig_var > max_split_var ||
291
0
          sqrt(orig_var) > param_2 * mean) {
292
0
        vote -= 1.0;
293
0
      } else {
294
0
        vote += 1.0;
295
0
      }
296
0
      ++sb_count;
297
0
    }
298
0
  }
299
300
0
  return vote > 0.0 ? orig_block_size : sub_block_size;
301
0
}
302
303
static int64_t pick_norm_factor_and_block_size(AV1_COMP *const cpi,
304
0
                                               BLOCK_SIZE *best_block_size) {
305
0
  const AV1_COMMON *const cm = &cpi->common;
306
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
307
0
  BLOCK_SIZE last_block_size;
308
0
  BLOCK_SIZE this_block_size = sb_size;
309
0
  *best_block_size = sb_size;
310
  // Pick from block size 64x64, 32x32 and 16x16.
311
0
  do {
312
0
    last_block_size = this_block_size;
313
0
    assert(this_block_size >= BLOCK_16X16 && this_block_size <= BLOCK_128X128);
314
0
    const int block_size = block_size_wide[this_block_size];
315
0
    if (block_size < 32) break;
316
0
    this_block_size = pick_block_size(cpi, last_block_size);
317
0
  } while (this_block_size != last_block_size);
318
0
  *best_block_size = this_block_size;
319
320
0
  int64_t norm_factor = 1;
321
0
  const BLOCK_SIZE norm_block_size = this_block_size;
322
0
  assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_64X64);
323
0
  const int norm_step = mi_size_wide[norm_block_size];
324
0
  double sb_wiener_log = 0;
325
0
  double sb_count = 0;
326
0
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
327
0
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
328
0
      const int sb_wiener_var =
329
0
          get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
330
0
      const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
331
0
      const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
332
0
      const double scaled_satd = (double)satd / sqrt((double)sse);
333
0
      sb_wiener_log += scaled_satd * log(sb_wiener_var);
334
0
      sb_count += scaled_satd;
335
0
    }
336
0
  }
337
0
  if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
338
0
  norm_factor = AOMMAX(1, norm_factor);
339
340
0
  return norm_factor;
341
0
}
342
343
static void automatic_intra_tools_off(AV1_COMP *cpi,
344
                                      const double sum_rec_distortion,
345
0
                                      const double sum_est_rate) {
346
0
  if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
347
348
  // Thresholds
349
0
  const int high_quality_qindex = 128;
350
0
  const double high_quality_bpp = 2.0;
351
0
  const double high_quality_dist_per_pix = 4.0;
352
353
0
  AV1_COMMON *const cm = &cpi->common;
354
0
  const int qindex = cm->quant_params.base_qindex;
355
0
  const double dist_per_pix =
356
0
      (double)sum_rec_distortion / (cm->width * cm->height);
357
  // The estimate bpp is not accurate, an empirical constant 100 is divided.
358
0
  const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
359
360
0
  if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
361
0
      dist_per_pix < high_quality_dist_per_pix) {
362
0
    cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
363
0
    cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
364
0
    cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
365
0
    cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
366
0
  }
367
0
}
368
369
0
void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
370
0
  AV1_COMMON *const cm = &cpi->common;
371
0
  uint8_t *buffer = cpi->source->y_buffer;
372
0
  int buf_stride = cpi->source->y_stride;
373
0
  ThreadData *td = &cpi->td;
374
0
  MACROBLOCK *x = &td->mb;
375
0
  MACROBLOCKD *xd = &x->e_mbd;
376
0
  MB_MODE_INFO mbmi;
377
0
  memset(&mbmi, 0, sizeof(mbmi));
378
0
  MB_MODE_INFO *mbmi_ptr = &mbmi;
379
0
  xd->mi = &mbmi_ptr;
380
0
  xd->cur_buf = cpi->source;
381
382
0
  const SequenceHeader *const seq_params = cm->seq_params;
383
0
  if (aom_realloc_frame_buffer(
384
0
          &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
385
0
          seq_params->subsampling_y, seq_params->use_highbitdepth,
386
0
          cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
387
0
          NULL, cpi->oxcf.tool_cfg.enable_global_motion))
388
0
    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
389
0
                       "Failed to allocate frame buffer");
390
391
0
  cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
392
0
  av1_frame_init_quantizer(cpi);
393
394
0
  DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
395
0
  DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
396
0
  DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
397
0
  DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
398
399
0
  int mi_row, mi_col;
400
401
0
  BLOCK_SIZE bsize = cpi->weber_bsize;
402
0
  const TX_SIZE tx_size = max_txsize_lookup[bsize];
403
0
  const int block_size = tx_size_wide[tx_size];
404
0
  const int coeff_count = block_size * block_size;
405
406
0
  const BitDepthInfo bd_info = get_bit_depth_info(xd);
407
0
  cpi->norm_wiener_variance = 0;
408
0
  int mb_step = mi_size_wide[bsize];
409
410
0
  double sum_rec_distortion = 0.0;
411
0
  double sum_est_rate = 0.0;
412
0
  for (mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
413
0
    for (mi_col = 0; mi_col < cpi->frame_info.mi_cols; mi_col += mb_step) {
414
0
      PREDICTION_MODE best_mode = DC_PRED;
415
0
      int best_intra_cost = INT_MAX;
416
417
0
      xd->up_available = mi_row > 0;
418
0
      xd->left_available = mi_col > 0;
419
420
0
      const int mi_width = mi_size_wide[bsize];
421
0
      const int mi_height = mi_size_high[bsize];
422
0
      set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
423
0
                            mi_row, mi_col);
424
0
      set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
425
0
                     cm->mi_params.mi_rows, cm->mi_params.mi_cols);
426
0
      set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
427
0
                   av1_num_planes(cm));
428
0
      xd->mi[0]->bsize = bsize;
429
0
      xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
430
431
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
432
0
                           mi_col, 0, av1_num_planes(cm));
433
434
0
      int dst_buffer_stride = xd->plane[0].dst.stride;
435
0
      uint8_t *dst_buffer = xd->plane[0].dst.buf;
436
0
      uint8_t *mb_buffer =
437
0
          buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
438
439
0
      for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
440
0
           ++mode) {
441
0
        av1_predict_intra_block(
442
0
            xd, cm->seq_params->sb_size,
443
0
            cm->seq_params->enable_intra_edge_filter, block_size, block_size,
444
0
            tx_size, mode, 0, 0, FILTER_INTRA_MODES, dst_buffer,
445
0
            dst_buffer_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
446
447
0
        av1_subtract_block(bd_info, block_size, block_size, src_diff,
448
0
                           block_size, mb_buffer, buf_stride, dst_buffer,
449
0
                           dst_buffer_stride);
450
0
        av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
451
0
        int intra_cost = aom_satd(coeff, coeff_count);
452
0
        if (intra_cost < best_intra_cost) {
453
0
          best_intra_cost = intra_cost;
454
0
          best_mode = mode;
455
0
        }
456
0
      }
457
458
0
      int idx;
459
0
      av1_predict_intra_block(xd, cm->seq_params->sb_size,
460
0
                              cm->seq_params->enable_intra_edge_filter,
461
0
                              block_size, block_size, tx_size, best_mode, 0, 0,
462
0
                              FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride,
463
0
                              dst_buffer, dst_buffer_stride, 0, 0, 0);
464
0
      av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
465
0
                         mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
466
0
      av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
467
468
0
      const struct macroblock_plane *const p = &x->plane[0];
469
0
      uint16_t eob;
470
0
      const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
471
0
      QUANT_PARAM quant_param;
472
0
      int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
473
0
      av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
474
0
#if CONFIG_AV1_HIGHBITDEPTH
475
0
      if (is_cur_buf_hbd(xd)) {
476
0
        av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
477
0
                                      scan_order, &quant_param);
478
0
      } else {
479
0
        av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
480
0
                               scan_order, &quant_param);
481
0
      }
482
#else
483
      av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
484
                             scan_order, &quant_param);
485
#endif  // CONFIG_AV1_HIGHBITDEPTH
486
0
      av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
487
0
                                  dst_buffer_stride, eob, 0);
488
0
      WeberStats *weber_stats =
489
0
          &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
490
0
                               (mi_col / mb_step)];
491
492
0
      weber_stats->rec_pix_max = 1;
493
0
      weber_stats->rec_variance = 0;
494
0
      weber_stats->src_pix_max = 1;
495
0
      weber_stats->src_variance = 0;
496
0
      weber_stats->distortion = 0;
497
498
0
      int64_t src_mean = 0;
499
0
      int64_t rec_mean = 0;
500
0
      int64_t dist_mean = 0;
501
502
0
      for (int pix_row = 0; pix_row < block_size; ++pix_row) {
503
0
        for (int pix_col = 0; pix_col < block_size; ++pix_col) {
504
0
          int src_pix, rec_pix;
505
0
#if CONFIG_AV1_HIGHBITDEPTH
506
0
          if (is_cur_buf_hbd(xd)) {
507
0
            uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
508
0
            uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
509
0
            src_pix = src[pix_row * buf_stride + pix_col];
510
0
            rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
511
0
          } else {
512
0
            src_pix = mb_buffer[pix_row * buf_stride + pix_col];
513
0
            rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
514
0
          }
515
#else
516
          src_pix = mb_buffer[pix_row * buf_stride + pix_col];
517
          rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
518
#endif
519
0
          src_mean += src_pix;
520
0
          rec_mean += rec_pix;
521
0
          dist_mean += src_pix - rec_pix;
522
0
          weber_stats->src_variance += src_pix * src_pix;
523
0
          weber_stats->rec_variance += rec_pix * rec_pix;
524
0
          weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
525
0
          weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
526
0
          weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
527
0
        }
528
0
      }
529
530
0
      sum_rec_distortion += weber_stats->distortion;
531
0
      int est_block_rate = 0;
532
0
      int64_t est_block_dist = 0;
533
0
      model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
534
0
                                      pix_num, &est_block_rate,
535
0
                                      &est_block_dist);
536
0
      sum_est_rate += est_block_rate;
537
538
0
      weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
539
0
      weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
540
0
      weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
541
0
      weber_stats->satd = best_intra_cost;
542
543
0
      qcoeff[0] = 0;
544
0
      for (idx = 1; idx < coeff_count; ++idx) qcoeff[idx] = abs(qcoeff[idx]);
545
0
      qsort(qcoeff, coeff_count, sizeof(*coeff), qsort_comp);
546
547
0
      weber_stats->max_scale = (double)qcoeff[coeff_count - 1];
548
0
    }
549
0
  }
550
551
  // Determine whether to turn off several intra coding tools.
552
0
  automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
553
554
0
  BLOCK_SIZE norm_block_size = BLOCK_16X16;
555
0
  cpi->norm_wiener_variance =
556
0
      pick_norm_factor_and_block_size(cpi, &norm_block_size);
557
0
  const int norm_step = mi_size_wide[norm_block_size];
558
559
0
  double sb_wiener_log = 0;
560
0
  double sb_count = 0;
561
0
  for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
562
0
    sb_wiener_log = 0;
563
0
    sb_count = 0;
564
0
    for (mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
565
0
      for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
566
0
        int sb_wiener_var =
567
0
            get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
568
569
0
        double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
570
0
        double min_max_scale = AOMMAX(
571
0
            1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
572
0
        beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
573
0
        beta = AOMMIN(beta, 4);
574
0
        beta = AOMMAX(beta, 0.25);
575
576
0
        sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
577
578
0
        int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
579
0
        int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
580
0
        double scaled_satd = (double)satd / sqrt((double)sse);
581
0
        sb_wiener_log += scaled_satd * log(sb_wiener_var);
582
0
        sb_count += scaled_satd;
583
0
      }
584
0
    }
585
586
0
    if (sb_count > 0)
587
0
      cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
588
0
    cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
589
0
  }
590
591
0
  aom_free_frame_buffer(&cm->cur_frame->buf);
592
0
}
593
594
int av1_get_sbq_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
595
0
                              int mi_col) {
596
0
  AV1_COMMON *const cm = &cpi->common;
597
0
  const int base_qindex = cm->quant_params.base_qindex;
598
0
  int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
599
0
  int offset = 0;
600
0
  double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
601
0
  double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
602
0
  beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
603
604
  // Cap beta such that the delta q value is not much far away from the base q.
605
0
  beta = AOMMIN(beta, 4);
606
0
  beta = AOMMAX(beta, 0.25);
607
0
  offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
608
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
609
0
  offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
610
0
  offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
611
0
  int qindex = cm->quant_params.base_qindex + offset;
612
0
  qindex = AOMMIN(qindex, MAXQ);
613
0
  qindex = AOMMAX(qindex, MINQ);
614
0
  if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
615
616
0
  return qindex;
617
0
}
618
619
0
void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
620
0
  AV1_COMMON *cm = &cpi->common;
621
622
0
  if (cpi->mb_delta_q) return;
623
624
0
  CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
625
0
                  aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
626
0
                             sizeof(*cpi->mb_delta_q)));
627
0
}
628
629
#if CONFIG_TFLITE
630
static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
631
                         uint8_t *y_buffer, int y_stride, float *predicts) {
632
  // Create the model and interpreter options.
633
  TfLiteModel *model =
634
      TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
635
  if (model == NULL) return 1;
636
637
  TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
638
  TfLiteInterpreterOptionsSetNumThreads(options, 2);
639
  if (options == NULL) {
640
    TfLiteModelDelete(model);
641
    return 1;
642
  }
643
644
  // Create the interpreter.
645
  TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
646
  if (interpreter == NULL) {
647
    TfLiteInterpreterOptionsDelete(options);
648
    TfLiteModelDelete(model);
649
    return 1;
650
  }
651
652
  // Allocate tensors and populate the input tensor data.
653
  TfLiteInterpreterAllocateTensors(interpreter);
654
  TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
655
  if (input_tensor == NULL) {
656
    TfLiteInterpreterDelete(interpreter);
657
    TfLiteInterpreterOptionsDelete(options);
658
    TfLiteModelDelete(model);
659
    return 1;
660
  }
661
662
  struct aom_internal_error_info error;
663
  size_t input_size = TfLiteTensorByteSize(input_tensor);
664
  float *input_data;
665
  AOM_CHECK_MEM_ERROR(&error, input_data, aom_calloc(input_size, 1));
666
667
  const int num_mi_w = mi_size_wide[block_size];
668
  const int num_mi_h = mi_size_high[block_size];
669
  for (int row = 0; row < num_rows; ++row) {
670
    for (int col = 0; col < num_cols; ++col) {
671
      const int row_offset = (row * num_mi_h) << 2;
672
      const int col_offset = (col * num_mi_w) << 2;
673
674
      uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
675
      int r = row_offset, pos = 0;
676
      while (r < row_offset + (num_mi_h << 2)) {
677
        for (int c = 0; c < (num_mi_w << 2); ++c) {
678
          input_data[pos++] = (float)*(buf + c) / 255.0f;
679
        }
680
        buf += y_stride;
681
        ++r;
682
      }
683
      TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
684
685
      // Execute inference.
686
      if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
687
        TfLiteInterpreterDelete(interpreter);
688
        TfLiteInterpreterOptionsDelete(options);
689
        TfLiteModelDelete(model);
690
        return 1;
691
      }
692
693
      // Extract the output tensor data.
694
      const TfLiteTensor *output_tensor =
695
          TfLiteInterpreterGetOutputTensor(interpreter, 0);
696
      if (output_tensor == NULL) {
697
        TfLiteInterpreterDelete(interpreter);
698
        TfLiteInterpreterOptionsDelete(options);
699
        TfLiteModelDelete(model);
700
        return 1;
701
      }
702
703
      size_t output_size = TfLiteTensorByteSize(output_tensor);
704
      float output_data;
705
706
      TfLiteTensorCopyToBuffer(output_tensor, &output_data, output_size);
707
      predicts[row * num_cols + col] = output_data;
708
    }
709
  }
710
711
  // Dispose of the model and interpreter objects.
712
  TfLiteInterpreterDelete(interpreter);
713
  TfLiteInterpreterOptionsDelete(options);
714
  TfLiteModelDelete(model);
715
  aom_free(input_data);
716
  return 0;
717
}
718
719
void av1_set_mb_ur_variance(AV1_COMP *cpi) {
720
  const AV1_COMMON *cm = &cpi->common;
721
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
722
  uint8_t *y_buffer = cpi->source->y_buffer;
723
  const int y_stride = cpi->source->y_stride;
724
  const int block_size = cpi->common.seq_params->sb_size;
725
726
  const int num_mi_w = mi_size_wide[block_size];
727
  const int num_mi_h = mi_size_high[block_size];
728
  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
729
  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
730
  const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
731
732
  // TODO(sdeng): add highbitdepth support.
733
  (void)use_hbd;
734
735
  float *mb_delta_q, delta_q_avg = 0.0f;
736
  CHECK_MEM_ERROR(cm, mb_delta_q,
737
                  aom_calloc(num_rows * num_cols, sizeof(float)));
738
739
  // TODO(sdeng): train the model at a different quality level.
740
  if (model_predict(block_size, num_cols, num_rows, y_buffer, y_stride,
741
                    mb_delta_q)) {
742
    aom_internal_error(cm->error, AOM_CODEC_ERROR,
743
                       "Failed to call TFlite functions.");
744
  }
745
746
  // Loop through each SB block.
747
  for (int row = 0; row < num_rows; ++row) {
748
    for (int col = 0; col < num_cols; ++col) {
749
      const int index = row * num_cols + col;
750
      delta_q_avg += mb_delta_q[index];
751
    }
752
  }
753
754
  delta_q_avg /= (float)(num_rows * num_cols);
755
756
  // Approximates the model change between current version (Spet 2021) and the
757
  // baseline (July 2021).
758
  const float model_change = 3.0f * 4.0f / (float)MAXQ;
759
  delta_q_avg += model_change;
760
761
  float scaling_factor;
762
  const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
763
  if (cq_level < delta_q_avg) {
764
    scaling_factor = cq_level / delta_q_avg;
765
  } else {
766
    scaling_factor = 1.0f - (cq_level - delta_q_avg) / (1.0f - delta_q_avg);
767
  }
768
  delta_q_avg -= model_change;
769
770
  for (int row = 0; row < num_rows; ++row) {
771
    for (int col = 0; col < num_cols; ++col) {
772
      const int index = row * num_cols + col;
773
      cpi->mb_delta_q[index] =
774
          RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * (float)MAXQ *
775
               scaling_factor * (mb_delta_q[index] - delta_q_avg));
776
    }
777
  }
778
779
  aom_free(mb_delta_q);
780
}
781
#else   // !CONFIG_TFLITE
782
0
void av1_set_mb_ur_variance(AV1_COMP *cpi) {
783
0
  const AV1_COMMON *cm = &cpi->common;
784
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
785
0
  ThreadData *td = &cpi->td;
786
0
  MACROBLOCK *x = &td->mb;
787
0
  MACROBLOCKD *xd = &x->e_mbd;
788
0
  uint8_t *y_buffer = cpi->source->y_buffer;
789
0
  const int y_stride = cpi->source->y_stride;
790
0
  const int block_size = cpi->common.seq_params->sb_size;
791
792
0
  const int num_mi_w = mi_size_wide[block_size];
793
0
  const int num_mi_h = mi_size_high[block_size];
794
0
  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
795
0
  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
796
0
  const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
797
798
0
  int *mb_delta_q[2];
799
0
  CHECK_MEM_ERROR(cm, mb_delta_q[0],
800
0
                  aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
801
0
  CHECK_MEM_ERROR(cm, mb_delta_q[1],
802
0
                  aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
803
804
  // Approximates the model change between current version (Spet 2021) and the
805
  // baseline (July 2021).
806
0
  const double model_change[] = { 3.0, 3.0 };
807
  // The following parameters are fitted from user labeled data.
808
0
  const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
809
0
  const double b[] = { 0.004898, 0.003093 };
810
0
  const double c[] = { (29.932 + model_change[0]) * 4.0,
811
0
                       (42.100 + model_change[1]) * 4.0 };
812
0
  int delta_q_avg[2] = { 0, 0 };
813
  // Loop through each SB block.
814
0
  for (int row = 0; row < num_rows; ++row) {
815
0
    for (int col = 0; col < num_cols; ++col) {
816
0
      double var = 0.0, num_of_var = 0.0;
817
0
      const int index = row * num_cols + col;
818
819
      // Loop through each 8x8 block.
820
0
      for (int mi_row = row * num_mi_h;
821
0
           mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
822
0
           mi_row += 2) {
823
0
        for (int mi_col = col * num_mi_w;
824
0
             mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
825
0
             mi_col += 2) {
826
0
          struct buf_2d buf;
827
0
          const int row_offset_y = mi_row << 2;
828
0
          const int col_offset_y = mi_col << 2;
829
830
0
          buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
831
0
          buf.stride = y_stride;
832
833
0
          unsigned int block_variance;
834
0
          if (use_hbd) {
835
0
            block_variance = av1_high_get_sby_perpixel_variance(
836
0
                cpi, &buf, BLOCK_8X8, xd->bd);
837
0
          } else {
838
0
            block_variance =
839
0
                av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8);
840
0
          }
841
842
0
          block_variance = AOMMAX(block_variance, 1);
843
0
          var += log((double)block_variance);
844
0
          num_of_var += 1.0;
845
0
        }
846
0
      }
847
0
      var = exp(var / num_of_var);
848
0
      mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
849
0
      mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
850
0
      delta_q_avg[0] += mb_delta_q[0][index];
851
0
      delta_q_avg[1] += mb_delta_q[1][index];
852
0
    }
853
0
  }
854
855
0
  delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
856
0
  delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
857
858
0
  int model_idx;
859
0
  double scaling_factor;
860
0
  const int cq_level = cpi->oxcf.rc_cfg.cq_level;
861
0
  if (cq_level < delta_q_avg[0]) {
862
0
    model_idx = 0;
863
0
    scaling_factor = (double)cq_level / delta_q_avg[0];
864
0
  } else if (cq_level < delta_q_avg[1]) {
865
0
    model_idx = 2;
866
0
    scaling_factor =
867
0
        (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
868
0
  } else {
869
0
    model_idx = 1;
870
0
    scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
871
0
  }
872
873
0
  const double new_delta_q_avg =
874
0
      delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
875
0
  for (int row = 0; row < num_rows; ++row) {
876
0
    for (int col = 0; col < num_cols; ++col) {
877
0
      const int index = row * num_cols + col;
878
0
      if (model_idx == 2) {
879
0
        const double delta_q =
880
0
            mb_delta_q[0][index] +
881
0
            scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
882
0
        cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
883
0
                                      100.0 * (delta_q - new_delta_q_avg));
884
0
      } else {
885
0
        cpi->mb_delta_q[index] = RINT(
886
0
            (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
887
0
            (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
888
0
      }
889
0
    }
890
0
  }
891
892
0
  aom_free(mb_delta_q[0]);
893
0
  aom_free(mb_delta_q[1]);
894
0
}
895
#endif  // CONFIG_TFLITE
896
897
0
int av1_get_sbq_user_rating_based(AV1_COMP *const cpi, int mi_row, int mi_col) {
898
0
  const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
899
0
  const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
900
0
  AV1_COMMON *const cm = &cpi->common;
901
0
  const int base_qindex = cm->quant_params.base_qindex;
902
0
  if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
903
904
0
  const int num_mi_w = mi_size_wide[bsize];
905
0
  const int num_mi_h = mi_size_high[bsize];
906
0
  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
907
0
  const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
908
0
  const int delta_q = cpi->mb_delta_q[index];
909
910
0
  int qindex = base_qindex + delta_q;
911
0
  qindex = AOMMIN(qindex, MAXQ);
912
0
  qindex = AOMMAX(qindex, MINQ + 1);
913
914
0
  return qindex;
915
0
}