Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/encoder/encodeframe.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <float.h>
14
#include <math.h>
15
#include <stdbool.h>
16
#include <stdio.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/binary_codes_writer.h"
24
#include "aom_ports/mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_util/aom_pthread.h"
27
#if CONFIG_MISMATCH_DEBUG
28
#include "aom_util/debug_util.h"
29
#endif  // CONFIG_MISMATCH_DEBUG
30
31
#include "av1/common/cfl.h"
32
#include "av1/common/common.h"
33
#include "av1/common/common_data.h"
34
#include "av1/common/entropy.h"
35
#include "av1/common/entropymode.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mv.h"
38
#include "av1/common/mvref_common.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconintra.h"
42
#include "av1/common/reconinter.h"
43
#include "av1/common/seg_common.h"
44
#include "av1/common/tile_common.h"
45
#include "av1/common/warped_motion.h"
46
47
#include "av1/encoder/allintra_vis.h"
48
#include "av1/encoder/aq_complexity.h"
49
#include "av1/encoder/aq_cyclicrefresh.h"
50
#include "av1/encoder/aq_variance.h"
51
#include "av1/encoder/av1_quantize.h"
52
#include "av1/encoder/global_motion_facade.h"
53
#include "av1/encoder/encodeframe.h"
54
#include "av1/encoder/encodeframe_utils.h"
55
#include "av1/encoder/encodemb.h"
56
#include "av1/encoder/encodemv.h"
57
#include "av1/encoder/encodetxb.h"
58
#include "av1/encoder/ethread.h"
59
#include "av1/encoder/extend.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/ml.h"
62
#include "av1/encoder/motion_search_facade.h"
63
#include "av1/encoder/partition_strategy.h"
64
#if !CONFIG_REALTIME_ONLY
65
#include "av1/encoder/partition_model_weights.h"
66
#endif
67
#include "av1/encoder/partition_search.h"
68
#include "av1/encoder/rd.h"
69
#include "av1/encoder/rdopt.h"
70
#include "av1/encoder/reconinter_enc.h"
71
#include "av1/encoder/segmentation.h"
72
#include "av1/encoder/tokenize.h"
73
#include "av1/encoder/tpl_model.h"
74
#include "av1/encoder/var_based_part.h"
75
76
#if CONFIG_TUNE_VMAF
77
#include "av1/encoder/tune_vmaf.h"
78
#endif
79
80
/*!\cond */
81
// This is used as a reference when computing the source variance for the
82
//  purposes of activity masking.
83
// Eventually this should be replaced by custom no-reference routines,
84
//  which will be faster.
85
static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94
  128, 128, 128, 128, 128, 128, 128, 128
95
};
96
97
#if CONFIG_AV1_HIGHBITDEPTH
98
static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
99
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
107
  128, 128, 128, 128, 128, 128, 128, 128
108
};
109
110
static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
126
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
127
};
128
129
static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
130
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
148
  128 * 16, 128 * 16
149
};
150
#endif  // CONFIG_AV1_HIGHBITDEPTH
151
/*!\endcond */
152
153
// For the given bit depth, returns a constant array used to assist the
154
// calculation of source block variance, which will then be used to decide
155
// adaptive quantizers.
156
0
static const uint8_t *get_var_offs(int use_hbd, int bd) {
157
0
#if CONFIG_AV1_HIGHBITDEPTH
158
0
  if (use_hbd) {
159
0
    assert(bd == 8 || bd == 10 || bd == 12);
160
0
    const int off_index = (bd - 8) >> 1;
161
0
    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
162
0
                                                AV1_HIGH_VAR_OFFS_10,
163
0
                                                AV1_HIGH_VAR_OFFS_12 };
164
0
    return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
165
0
  }
166
#else
167
  (void)use_hbd;
168
  (void)bd;
169
  assert(!use_hbd);
170
#endif
171
0
  assert(bd == 8);
172
0
  return AV1_VAR_OFFS;
173
0
}
174
175
0
void av1_init_rtc_counters(MACROBLOCK *const x) {
176
0
  av1_init_cyclic_refresh_counters(x);
177
0
  x->cnt_zeromv = 0;
178
0
  x->sb_col_scroll = 0;
179
0
  x->sb_row_scroll = 0;
180
0
}
181
182
0
void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
183
0
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
184
0
    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
185
0
  cpi->rc.cnt_zeromv += x->cnt_zeromv;
186
0
  cpi->rc.num_col_blscroll_last_tl0 += x->sb_col_scroll;
187
0
  cpi->rc.num_row_blscroll_last_tl0 += x->sb_row_scroll;
188
0
}
189
190
unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
191
                                       const MACROBLOCKD *xd,
192
                                       const struct buf_2d *ref,
193
                                       BLOCK_SIZE bsize, int plane,
194
0
                                       int use_hbd) {
195
0
  const int subsampling_x = xd->plane[plane].subsampling_x;
196
0
  const int subsampling_y = xd->plane[plane].subsampling_y;
197
0
  const BLOCK_SIZE plane_bsize =
198
0
      get_plane_block_size(bsize, subsampling_x, subsampling_y);
199
0
  unsigned int sse;
200
0
  const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
201
0
      ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
202
0
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
203
0
}
204
205
unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
206
                                              const MACROBLOCKD *xd,
207
                                              const struct buf_2d *ref,
208
0
                                              BLOCK_SIZE bsize, int plane) {
209
0
  const int use_hbd = is_cur_buf_hbd(xd);
210
0
  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
211
0
}
212
213
void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
214
                          int mi_row, int mi_col, const int num_planes,
215
0
                          BLOCK_SIZE bsize) {
216
  // Set current frame pointer.
217
0
  x->e_mbd.cur_buf = src;
218
219
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
220
  // the static analysis warnings.
221
0
  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
222
0
    const int is_uv = i > 0;
223
0
    setup_pred_plane(
224
0
        &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
225
0
        src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
226
0
        x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
227
0
  }
228
0
}
229
230
#if !CONFIG_REALTIME_ONLY
231
/*!\brief Assigns different quantization parameters to each superblock
232
 * based on statistics relevant to the selected delta-q mode (variance).
233
 * This is the non-rd version.
234
 *
235
 * \param[in]     cpi         Top level encoder instance structure
236
 * \param[in,out] td          Thread data structure
237
 * \param[in,out] x           Superblock level data for this block.
238
 * \param[in]     tile_info   Tile information / identification
239
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
240
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
241
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
242
 *
243
 * \remark No return value but updates superblock and thread data
244
 * related to the q / q delta to be used.
245
 */
246
static inline void setup_delta_q_nonrd(AV1_COMP *const cpi, ThreadData *td,
247
                                       MACROBLOCK *const x,
248
                                       const TileInfo *const tile_info,
249
0
                                       int mi_row, int mi_col, int num_planes) {
250
0
  AV1_COMMON *const cm = &cpi->common;
251
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
252
0
  assert(delta_q_info->delta_q_present_flag);
253
254
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
255
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
256
257
0
  const int delta_q_res = delta_q_info->delta_q_res;
258
0
  int current_qindex = cm->quant_params.base_qindex;
259
260
0
  if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
261
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
262
0
  }
263
264
0
  x->rdmult_cur_qindex = current_qindex;
265
0
  MACROBLOCKD *const xd = &x->e_mbd;
266
0
  current_qindex = av1_adjust_q_from_delta_q_res(
267
0
      delta_q_res, xd->current_base_qindex, current_qindex);
268
269
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
270
0
  x->rdmult_delta_qindex = x->delta_qindex;
271
272
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
273
0
  xd->mi[0]->current_qindex = current_qindex;
274
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
275
276
  // keep track of any non-zero delta-q used
277
0
  td->deltaq_used |= (x->delta_qindex != 0);
278
0
}
279
280
/*!\brief Assigns different quantization parameters to each superblock
281
 * based on statistics relevant to the selected delta-q mode (TPL weight,
282
 * variance, HDR, etc).
283
 *
284
 * \ingroup tpl_modelling
285
 *
286
 * \param[in]     cpi         Top level encoder instance structure
287
 * \param[in,out] td          Thread data structure
288
 * \param[in,out] x           Superblock level data for this block.
289
 * \param[in]     tile_info   Tile information / identification
290
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
291
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
292
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
293
 *
294
 * \remark No return value but updates superblock and thread data
295
 * related to the q / q delta to be used.
296
 */
297
static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
298
                                 MACROBLOCK *const x,
299
                                 const TileInfo *const tile_info, int mi_row,
300
0
                                 int mi_col, int num_planes) {
301
0
  AV1_COMMON *const cm = &cpi->common;
302
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
303
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
304
305
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
306
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
307
308
0
  const int delta_q_res = delta_q_info->delta_q_res;
309
0
  int current_qindex = cm->quant_params.base_qindex;
310
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
311
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
312
0
  const int sb_cols =
313
0
      CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
314
0
  const int sb_index = sb_row * sb_cols + sb_col;
315
0
  if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
316
0
                                   DUCKY_ENCODE_FRAME_MODE_QINDEX) {
317
0
    current_qindex =
318
0
        cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
319
0
  } else if (cpi->ext_ratectrl.ready &&
320
0
             (cpi->ext_ratectrl.funcs.rc_type & AOM_RC_QP) != 0 &&
321
0
             cpi->ext_ratectrl.funcs.get_encodeframe_decision != NULL &&
322
0
             cpi->ext_ratectrl.sb_params_list != NULL) {
323
0
    if (cpi->ext_ratectrl.use_delta_q) {
324
      // Only used for per-coding-block RD mult calculation later.
325
0
      current_qindex = av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size,
326
0
                                                      mi_row, mi_col);
327
328
0
      const int q_index = cpi->ext_ratectrl.sb_params_list[sb_index].q_index;
329
0
      if (q_index != AOM_DEFAULT_Q) {
330
0
        current_qindex = q_index;
331
0
      }
332
0
    }
333
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
334
0
    if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
335
0
      const int block_wavelet_energy_level =
336
0
          av1_block_wavelet_energy_level(cpi, x, sb_size);
337
0
      x->sb_energy_level = block_wavelet_energy_level;
338
0
      current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
339
0
          cpi, block_wavelet_energy_level);
340
0
    } else {
341
0
      const int block_var_level = av1_log_block_var(cpi, x, sb_size);
342
0
      x->sb_energy_level = block_var_level;
343
0
      current_qindex =
344
0
          av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
345
0
    }
346
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
347
0
             cpi->oxcf.algo_cfg.enable_tpl_model) {
348
    // Setup deltaq based on tpl stats
349
0
    current_qindex =
350
0
        av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
351
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
352
0
    current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
353
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
354
0
    current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
355
0
  } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
356
0
    current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
357
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
358
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
359
0
  }
360
361
0
  x->rdmult_cur_qindex = current_qindex;
362
0
  MACROBLOCKD *const xd = &x->e_mbd;
363
0
  const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
364
0
      delta_q_res, xd->current_base_qindex, current_qindex);
365
0
  if (cpi->use_ducky_encode) {
366
0
    assert(adjusted_qindex == current_qindex);
367
0
  }
368
0
  current_qindex = adjusted_qindex;
369
370
0
  x->delta_qindex = cm->delta_q_info.delta_q_present_flag
371
0
                        ? current_qindex - cm->quant_params.base_qindex
372
0
                        : 0;
373
0
  x->rdmult_delta_qindex = current_qindex - cm->quant_params.base_qindex;
374
375
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
376
0
  xd->mi[0]->current_qindex = cm->delta_q_info.delta_q_present_flag
377
0
                                  ? current_qindex
378
0
                                  : cm->quant_params.base_qindex;
379
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
380
381
  // keep track of any non-zero delta-q used
382
0
  td->deltaq_used |= (x->delta_qindex != 0);
383
384
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode &&
385
0
      cm->delta_q_info.delta_q_present_flag) {
386
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
387
0
    const int lfmask = ~(delta_lf_res - 1);
388
0
    const int delta_lf_from_base =
389
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
390
0
    const int8_t delta_lf =
391
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
392
0
    const int frame_lf_count =
393
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
394
0
    const int mib_size = cm->seq_params->mib_size;
395
396
    // pre-set the delta lf for loop filter. Note that this value is set
397
    // before mi is assigned for each block in current superblock
398
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
399
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
400
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
401
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
402
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
403
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
404
0
        }
405
0
      }
406
0
    }
407
0
  }
408
0
}
409
410
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
411
0
                                 int mi_col) {
412
0
  const AV1_COMMON *cm = &cpi->common;
413
0
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
414
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
415
0
  MACROBLOCK *x = &td->mb;
416
0
  const int frame_idx = cpi->gf_frame_index;
417
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
418
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
419
420
0
  av1_zero(x->tpl_keep_ref_frame);
421
422
0
  if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
423
0
  if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
424
0
  if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
425
426
0
  const int is_overlay =
427
0
      cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
428
0
  if (is_overlay) {
429
0
    memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
430
0
    return;
431
0
  }
432
433
0
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
434
0
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
435
0
  const int tpl_stride = tpl_frame->stride;
436
0
  int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
437
0
  const int step = 1 << block_mis_log2;
438
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
439
440
0
  const int mi_row_end =
441
0
      AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
442
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
443
0
  const int mi_col_sr =
444
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
445
0
  const int mi_col_end_sr =
446
0
      AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
447
0
                                  cm->superres_scale_denominator),
448
0
             mi_cols_sr);
449
0
  const int row_step = step;
450
0
  const int col_step_sr =
451
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
452
0
  for (int row = mi_row; row < mi_row_end; row += row_step) {
453
0
    for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
454
0
      const TplDepStats *this_stats =
455
0
          &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
456
0
      int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
457
      // Find the winner ref frame idx for the current block
458
0
      int64_t best_inter_cost = this_stats->pred_error[0];
459
0
      int best_rf_idx = 0;
460
0
      for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
461
0
        if ((this_stats->pred_error[idx] < best_inter_cost) &&
462
0
            (this_stats->pred_error[idx] != 0)) {
463
0
          best_inter_cost = this_stats->pred_error[idx];
464
0
          best_rf_idx = idx;
465
0
        }
466
0
      }
467
      // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
468
      // LAST_FRAME.
469
0
      tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
470
0
                                    this_stats->pred_error[LAST_FRAME - 1];
471
472
0
      for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
473
0
        inter_cost[rf_idx] += tpl_pred_error[rf_idx];
474
0
    }
475
0
  }
476
477
0
  int rank_index[INTER_REFS_PER_FRAME - 1];
478
0
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
479
0
    rank_index[idx] = idx + 1;
480
0
    for (int i = idx; i > 0; --i) {
481
0
      if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
482
0
        const int tmp = rank_index[i - 1];
483
0
        rank_index[i - 1] = rank_index[i];
484
0
        rank_index[i] = tmp;
485
0
      }
486
0
    }
487
0
  }
488
489
0
  x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
490
0
  x->tpl_keep_ref_frame[LAST_FRAME] = 1;
491
492
0
  int cutoff_ref = 0;
493
0
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
494
0
    x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
495
0
    if (idx > 2) {
496
0
      if (!cutoff_ref) {
497
        // If the predictive coding gains are smaller than the previous more
498
        // relevant frame over certain amount, discard this frame and all the
499
        // frames afterwards.
500
0
        if (llabs(inter_cost[rank_index[idx]]) <
501
0
                llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
502
0
            inter_cost[rank_index[idx]] == 0)
503
0
          cutoff_ref = 1;
504
0
      }
505
506
0
      if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
507
0
    }
508
0
  }
509
0
}
510
511
static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
512
0
                                           int mi_row, int mi_col) {
513
0
  const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
514
0
  const int orig_rdmult = cpi->rd.RDMULT;
515
516
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
517
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
518
0
  const int gf_group_index = cpi->gf_frame_index;
519
0
  if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
520
0
      cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
521
0
      cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
522
0
    const int dr =
523
0
        av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
524
0
    x->rdmult = dr;
525
0
  }
526
0
}
527
#endif  // !CONFIG_REALTIME_ONLY
528
529
#if CONFIG_RT_ML_PARTITIONING
530
// Get a prediction(stored in x->est_pred) for the whole superblock.
531
static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
532
                               MACROBLOCK *x, int mi_row, int mi_col) {
533
  AV1_COMMON *const cm = &cpi->common;
534
  const int is_key_frame = frame_is_intra_only(cm);
535
  MACROBLOCKD *xd = &x->e_mbd;
536
537
  // TODO(kyslov) Extend to 128x128
538
  assert(cm->seq_params->sb_size == BLOCK_64X64);
539
540
  av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
541
542
  if (!is_key_frame) {
543
    MB_MODE_INFO *mi = xd->mi[0];
544
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
545
546
    assert(yv12 != NULL);
547
548
    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
549
                         get_ref_scale_factors(cm, LAST_FRAME), 1);
550
    mi->ref_frame[0] = LAST_FRAME;
551
    mi->ref_frame[1] = NONE;
552
    mi->bsize = BLOCK_64X64;
553
    mi->mv[0].as_int = 0;
554
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
555
556
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
557
558
    xd->plane[0].dst.buf = x->est_pred;
559
    xd->plane[0].dst.stride = 64;
560
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
561
  } else {
562
#if CONFIG_AV1_HIGHBITDEPTH
563
    switch (xd->bd) {
564
      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
565
      case 10:
566
        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
567
        break;
568
      case 12:
569
        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
570
        break;
571
    }
572
#else
573
    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
574
#endif  // CONFIG_VP9_HIGHBITDEPTH
575
  }
576
}
577
#endif  // CONFIG_RT_ML_PARTITIONING
578
579
0
#define AVG_CDF_WEIGHT_LEFT 3
580
0
#define AVG_CDF_WEIGHT_TOP_RIGHT 1
581
582
/*!\brief Encode a superblock (minimal RD search involved)
583
 *
584
 * \ingroup partition_search
585
 * Encodes the superblock by a pre-determined partition pattern, only minor
586
 * rd-based searches are allowed to adjust the initial pattern. It is only used
587
 * by realtime encoding.
588
 */
589
static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
590
                                   TileDataEnc *tile_data, TokenExtra **tp,
591
                                   const int mi_row, const int mi_col,
592
0
                                   const int seg_skip) {
593
0
  AV1_COMMON *const cm = &cpi->common;
594
0
  MACROBLOCK *const x = &td->mb;
595
0
  const SPEED_FEATURES *const sf = &cpi->sf;
596
0
  const TileInfo *const tile_info = &tile_data->tile_info;
597
0
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
598
0
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
599
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
600
0
  PC_TREE *const pc_root = td->pc_root;
601
602
0
#if !CONFIG_REALTIME_ONLY
603
0
  if (cm->delta_q_info.delta_q_present_flag) {
604
0
    const int num_planes = av1_num_planes(cm);
605
606
0
    setup_delta_q_nonrd(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
607
0
  }
608
0
#endif
609
#if CONFIG_RT_ML_PARTITIONING
610
  if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
611
    RD_STATS dummy_rdc;
612
    get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
613
    av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
614
                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
615
    return;
616
  }
617
#endif
618
  // Set the partition
619
0
  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
620
0
      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
621
0
       (!frame_is_intra_only(cm) &&
622
0
        (!cpi->ppi->use_svc ||
623
0
         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
624
    // set a fixed-size partition
625
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
626
0
    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
627
0
    if (sf->rt_sf.use_fast_fixed_part &&
628
0
        x->content_state_sb.source_sad_nonrd < kLowSad) {
629
0
      bsize_select = cm->seq_params->sb_size;
630
0
    }
631
0
    if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
632
0
        cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
633
0
      bsize_select = cm->seq_params->sb_size;
634
0
      x->force_zeromv_skip_for_sb = 1;
635
0
    }
636
0
    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
637
0
    if (x->content_state_sb.source_sad_nonrd > kZeroSad)
638
0
      x->force_color_check_block_level = 1;
639
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
640
0
  } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
641
    // set a variance-based partition
642
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
643
0
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
644
0
  }
645
0
  assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
646
0
         sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
647
0
  set_cb_offsets(td->mb.cb_offset, 0, 0);
648
649
  // Initialize the flag to skip cdef to 1.
650
0
  if (sf->rt_sf.skip_cdef_sb) {
651
0
    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
652
    // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
653
    // "blocks".
654
0
    for (int r = 0; r < block64_in_sb; ++r) {
655
0
      for (int c = 0; c < block64_in_sb; ++c) {
656
0
        const int idx_in_sb =
657
0
            r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
658
0
        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
659
0
      }
660
0
    }
661
0
  }
662
663
#if CONFIG_COLLECT_COMPONENT_TIMING
664
  start_timing(cpi, nonrd_use_partition_time);
665
#endif
666
0
  av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
667
0
                          pc_root);
668
#if CONFIG_COLLECT_COMPONENT_TIMING
669
  end_timing(cpi, nonrd_use_partition_time);
670
#endif
671
0
}
672
673
// This function initializes the stats for encode_rd_sb.
674
static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
675
                                     const TileDataEnc *tile_data,
676
                                     SIMPLE_MOTION_DATA_TREE *sms_root,
677
                                     RD_STATS *rd_cost, int mi_row, int mi_col,
678
                                     int gather_tpl_data) {
679
  const AV1_COMMON *cm = &cpi->common;
680
  const TileInfo *tile_info = &tile_data->tile_info;
681
  MACROBLOCK *x = &td->mb;
682
683
  const SPEED_FEATURES *sf = &cpi->sf;
684
  const int use_simple_motion_search =
685
      (sf->part_sf.simple_motion_search_split ||
686
       sf->part_sf.simple_motion_search_prune_rect ||
687
       sf->part_sf.simple_motion_search_early_term_none ||
688
       sf->part_sf.ml_early_term_after_part_split_level) &&
689
      !frame_is_intra_only(cm);
690
  if (use_simple_motion_search) {
691
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
692
                                             mi_row, mi_col);
693
  }
694
695
#if !CONFIG_REALTIME_ONLY
696
  if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
697
        cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
698
    init_ref_frame_space(cpi, td, mi_row, mi_col);
699
    x->sb_energy_level = 0;
700
    x->part_search_info.cnn_output_valid = 0;
701
    if (gather_tpl_data) {
702
      if (cpi->cb_delta_rdmult_enabled) {
703
        const int num_planes = av1_num_planes(cm);
704
        const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
705
        setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
706
        av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
707
      }
708
709
      // TODO(jingning): revisit this function.
710
      if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
711
        adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
712
      }
713
    }
714
  }
715
#else
716
  (void)tile_info;
717
  (void)mi_row;
718
  (void)mi_col;
719
  (void)gather_tpl_data;
720
#endif
721
722
  x->reuse_inter_pred = false;
723
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
724
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
725
  av1_zero(x->picked_ref_frames_mask);
726
  av1_invalid_rd_stats(rd_cost);
727
}
728
729
#if !CONFIG_REALTIME_ONLY
730
static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
731
                                        const TileDataEnc *tile_data,
732
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
733
                                        RD_STATS *rd_cost, int mi_row,
734
0
                                        int mi_col, int delta_qp_ofs) {
735
0
  AV1_COMMON *const cm = &cpi->common;
736
0
  MACROBLOCK *const x = &td->mb;
737
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
738
0
  const TileInfo *tile_info = &tile_data->tile_info;
739
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
740
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
741
0
  assert(delta_q_info->delta_q_present_flag);
742
0
  const int delta_q_res = delta_q_info->delta_q_res;
743
744
0
  const SPEED_FEATURES *sf = &cpi->sf;
745
0
  const int use_simple_motion_search =
746
0
      (sf->part_sf.simple_motion_search_split ||
747
0
       sf->part_sf.simple_motion_search_prune_rect ||
748
0
       sf->part_sf.simple_motion_search_early_term_none ||
749
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
750
0
      !frame_is_intra_only(cm);
751
0
  if (use_simple_motion_search) {
752
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
753
0
                                             mi_row, mi_col);
754
0
  }
755
756
0
  int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
757
758
0
  MACROBLOCKD *const xd = &x->e_mbd;
759
0
  current_qindex = av1_adjust_q_from_delta_q_res(
760
0
      delta_q_res, xd->current_base_qindex, current_qindex);
761
762
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
763
764
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
765
0
  xd->mi[0]->current_qindex = current_qindex;
766
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
767
768
  // keep track of any non-zero delta-q used
769
0
  td->deltaq_used |= (x->delta_qindex != 0);
770
771
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
772
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
773
0
    const int lfmask = ~(delta_lf_res - 1);
774
0
    const int delta_lf_from_base =
775
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
776
0
    const int8_t delta_lf =
777
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
778
0
    const int frame_lf_count =
779
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
780
0
    const int mib_size = cm->seq_params->mib_size;
781
782
    // pre-set the delta lf for loop filter. Note that this value is set
783
    // before mi is assigned for each block in current superblock
784
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
785
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
786
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
787
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
788
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
789
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
790
0
        }
791
0
      }
792
0
    }
793
0
  }
794
795
0
  x->reuse_inter_pred = false;
796
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
797
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
798
0
  av1_zero(x->picked_ref_frames_mask);
799
0
  av1_invalid_rd_stats(rd_cost);
800
0
}
801
802
static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
803
                       TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
804
                       int mi_col, BLOCK_SIZE bsize,
805
                       SIMPLE_MOTION_DATA_TREE *sms_tree,
806
0
                       SB_FIRST_PASS_STATS *sb_org_stats) {
807
0
  AV1_COMMON *const cm = &cpi->common;
808
0
  MACROBLOCK *const x = &td->mb;
809
0
  RD_STATS rdc_winner, cur_rdc;
810
0
  av1_invalid_rd_stats(&rdc_winner);
811
812
0
  int best_qindex = td->mb.rdmult_delta_qindex;
813
0
  const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
814
0
  const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
815
0
  const int step = cm->delta_q_info.delta_q_res;
816
817
0
  for (int sweep_qp_delta = start; sweep_qp_delta <= end;
818
0
       sweep_qp_delta += step) {
819
0
    sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
820
0
                                mi_col, sweep_qp_delta);
821
822
0
    const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
823
0
    const int backup_current_qindex =
824
0
        cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
825
826
0
    av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
827
0
    av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
828
0
    cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
829
830
0
    td->pc_root = av1_alloc_pc_tree_node(bsize);
831
0
    if (!td->pc_root)
832
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
833
0
                         "Failed to allocate PC_TREE");
834
0
    av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
835
0
                          &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
836
0
                          SB_DRY_PASS, NULL);
837
838
0
    if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
839
0
        (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
840
0
         rdc_winner.rdcost == cur_rdc.rdcost)) {
841
0
      rdc_winner = cur_rdc;
842
0
      best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
843
0
    }
844
0
  }
845
846
0
  return best_qindex;
847
0
}
848
#endif  //! CONFIG_REALTIME_ONLY
849
850
/*!\brief Encode a superblock (RD-search-based)
851
 *
852
 * \ingroup partition_search
853
 * Conducts partition search for a superblock, based on rate-distortion costs,
854
 * from scratch or adjusting from a pre-calculated partition pattern.
855
 */
856
static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
857
                                TileDataEnc *tile_data, TokenExtra **tp,
858
                                const int mi_row, const int mi_col,
859
0
                                const int seg_skip) {
860
0
  AV1_COMMON *const cm = &cpi->common;
861
0
  MACROBLOCK *const x = &td->mb;
862
0
  MACROBLOCKD *const xd = &x->e_mbd;
863
0
  const SPEED_FEATURES *const sf = &cpi->sf;
864
0
  const TileInfo *const tile_info = &tile_data->tile_info;
865
0
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
866
0
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
867
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
868
0
  const int num_planes = av1_num_planes(cm);
869
0
  int dummy_rate;
870
0
  int64_t dummy_dist;
871
0
  RD_STATS dummy_rdc;
872
0
  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
873
874
#if CONFIG_REALTIME_ONLY
875
  (void)seg_skip;
876
#endif  // CONFIG_REALTIME_ONLY
877
878
0
  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
879
0
                    1);
880
881
  // Encode the superblock
882
0
  if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
883
    // partition search starting from a variance-based partition
884
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
885
0
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
886
887
#if CONFIG_COLLECT_COMPONENT_TIMING
888
    start_timing(cpi, rd_use_partition_time);
889
#endif
890
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
891
0
    if (!td->pc_root)
892
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
893
0
                         "Failed to allocate PC_TREE");
894
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
895
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
896
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
897
0
                               sf->part_sf.partition_search_type);
898
0
    td->pc_root = NULL;
899
#if CONFIG_COLLECT_COMPONENT_TIMING
900
    end_timing(cpi, rd_use_partition_time);
901
#endif
902
0
  }
903
0
#if !CONFIG_REALTIME_ONLY
904
0
  else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
905
    // partition search by adjusting a fixed-size partition
906
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
907
0
    const BLOCK_SIZE bsize =
908
0
        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
909
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
910
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
911
0
    if (!td->pc_root)
912
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
913
0
                         "Failed to allocate PC_TREE");
914
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
915
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
916
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
917
0
                               sf->part_sf.partition_search_type);
918
0
    td->pc_root = NULL;
919
0
  } else {
920
    // The most exhaustive recursive partition search
921
0
    SuperBlockEnc *sb_enc = &x->sb_enc;
922
    // No stats for overlay frames. Exclude key frame.
923
0
    av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
924
925
    // Reset the tree for simple motion search data
926
0
    av1_reset_simple_motion_tree_partition(sms_root, sb_size);
927
928
#if CONFIG_COLLECT_COMPONENT_TIMING
929
    start_timing(cpi, rd_pick_partition_time);
930
#endif
931
932
    // Estimate the maximum square partition block size, which will be used
933
    // as the starting block size for partitioning the sb
934
0
    set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
935
936
    // The superblock can be searched only once, or twice consecutively for
937
    // better quality. Note that the meaning of passes here is different from
938
    // the general concept of 1-pass/2-pass encoders.
939
0
    const int num_passes =
940
0
        cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
941
942
0
    if (cpi->oxcf.sb_qp_sweep &&
943
0
        !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
944
0
          cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
945
0
        cm->delta_q_info.delta_q_present_flag) {
946
0
      AOM_CHECK_MEM_ERROR(
947
0
          x->e_mbd.error_info, td->mb.sb_stats_cache,
948
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
949
0
      av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
950
0
                          mi_col);
951
0
      assert(x->rdmult_delta_qindex == x->delta_qindex);
952
953
0
      const int best_qp_diff =
954
0
          sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
955
0
                      td->mb.sb_stats_cache) -
956
0
          x->rdmult_delta_qindex;
957
958
0
      sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
959
0
                                  mi_row, mi_col, best_qp_diff);
960
961
0
      const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
962
0
      const int backup_current_qindex =
963
0
          cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
964
965
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
966
0
      av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
967
0
                           mi_col);
968
969
0
      cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
970
0
          backup_current_qindex;
971
0
      aom_free(td->mb.sb_stats_cache);
972
0
      td->mb.sb_stats_cache = NULL;
973
0
    }
974
0
    if (num_passes == 1) {
975
#if CONFIG_PARTITION_SEARCH_ORDER
976
      if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
977
        av1_reset_part_sf(&cpi->sf.part_sf);
978
        av1_reset_sf_for_ext_part(cpi);
979
        RD_STATS this_rdc;
980
        av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
981
                                mi_col, sb_size, &this_rdc);
982
      } else {
983
        td->pc_root = av1_alloc_pc_tree_node(sb_size);
984
        if (!td->pc_root)
985
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
986
                             "Failed to allocate PC_TREE");
987
        av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
988
                              &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
989
                              NULL, SB_SINGLE_PASS, NULL);
990
      }
991
#else
992
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
993
0
      if (!td->pc_root)
994
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
995
0
                           "Failed to allocate PC_TREE");
996
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
997
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
998
0
                            SB_SINGLE_PASS, NULL);
999
0
#endif  // CONFIG_PARTITION_SEARCH_ORDER
1000
0
    } else {
1001
      // First pass
1002
0
      AOM_CHECK_MEM_ERROR(
1003
0
          x->e_mbd.error_info, td->mb.sb_fp_stats,
1004
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
1005
0
      av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1006
0
                          mi_col);
1007
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1008
0
      if (!td->pc_root)
1009
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1010
0
                           "Failed to allocate PC_TREE");
1011
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1012
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1013
0
                            SB_DRY_PASS, NULL);
1014
1015
      // Second pass
1016
0
      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
1017
0
                        mi_col, 0);
1018
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
1019
0
      av1_reset_simple_motion_tree_partition(sms_root, sb_size);
1020
1021
0
      av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1022
0
                           mi_col);
1023
1024
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1025
0
      if (!td->pc_root)
1026
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1027
0
                           "Failed to allocate PC_TREE");
1028
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1029
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1030
0
                            SB_WET_PASS, NULL);
1031
0
      aom_free(td->mb.sb_fp_stats);
1032
0
      td->mb.sb_fp_stats = NULL;
1033
0
    }
1034
1035
    // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
1036
0
    sb_enc->tpl_data_count = 0;
1037
#if CONFIG_COLLECT_COMPONENT_TIMING
1038
    end_timing(cpi, rd_pick_partition_time);
1039
#endif
1040
0
  }
1041
0
#endif  // !CONFIG_REALTIME_ONLY
1042
1043
  // Update the inter rd model
1044
  // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
1045
0
  if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
1046
0
      cm->tiles.cols == 1 && cm->tiles.rows == 1) {
1047
0
    av1_inter_mode_data_fit(tile_data, x->rdmult);
1048
0
  }
1049
0
}
1050
1051
// Check if the cost update of symbols mode, coeff and dv are tile or off.
1052
static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
1053
0
    const AV1_COMP *const cpi) {
1054
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
1055
1056
0
  return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1057
0
          inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1058
0
          cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1059
0
}
1060
1061
// When row-mt is enabled and cost update frequencies are set to off/tile,
1062
// processing of current SB can start even before processing of top-right SB
1063
// is finished. This function checks if it is sufficient to wait for top SB
1064
// to finish processing before current SB starts processing.
1065
0
static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
1066
0
  const MODE mode = cpi->oxcf.mode;
1067
0
  if (mode == GOOD) return 0;
1068
1069
0
  if (mode == ALLINTRA)
1070
0
    return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
1071
0
  else if (mode == REALTIME)
1072
0
    return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
1073
0
            cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1074
0
  else
1075
0
    return 0;
1076
0
}
1077
1078
/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1079
 *
1080
 * \ingroup partition_search
1081
 * \callgraph
1082
 * \callergraph
1083
 */
1084
static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1085
0
                                         int mi_col) {
1086
0
  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1087
1088
0
  const AV1_COMMON *const cm = &cpi->common;
1089
0
  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1090
0
                                   ? (cm->seq_params->mib_size >> 1)
1091
0
                                   : cm->seq_params->mib_size;
1092
0
  const int num_blk_64x64_cols =
1093
0
      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1094
0
  const int num_blk_64x64_rows =
1095
0
      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1096
0
  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1097
0
  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1098
0
  uint64_t curr_sb_sad = UINT64_MAX;
1099
  // Avoid the border as sad_blk_64x64 may not be set for the border
1100
  // in the scene detection.
1101
0
  if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1102
0
      (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1103
0
    return curr_sb_sad;
1104
0
  }
1105
0
  const uint64_t *const src_sad_blk_64x64_data =
1106
0
      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1107
0
                              blk_64x64_row_index * num_blk_64x64_cols];
1108
0
  if (cm->seq_params->sb_size == BLOCK_128X128) {
1109
    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1110
    // superblock
1111
0
    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1112
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
1113
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1114
0
  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1115
0
    curr_sb_sad = src_sad_blk_64x64_data[0];
1116
0
  }
1117
0
  return curr_sb_sad;
1118
0
}
1119
1120
/*!\brief Determine whether grading content can be skipped based on sad stat
1121
 *
1122
 * \ingroup partition_search
1123
 * \callgraph
1124
 * \callergraph
1125
 */
1126
static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1127
                                              MACROBLOCK *const x, int mi_row,
1128
0
                                              int mi_col) {
1129
0
  if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1130
0
    return true;
1131
0
  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1132
0
  if (curr_sb_sad == UINT64_MAX) return true;
1133
0
  if (curr_sb_sad == 0) {
1134
0
    x->content_state_sb.source_sad_nonrd = kZeroSad;
1135
0
    return false;
1136
0
  }
1137
0
  AV1_COMMON *const cm = &cpi->common;
1138
0
  bool do_calc_src_content = true;
1139
1140
0
  if (cpi->oxcf.speed < 9) return do_calc_src_content;
1141
1142
  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1143
0
  if (AOMMIN(cm->width, cm->height) < 360) {
1144
    // Derive Average 64x64 block source SAD from SB source SAD
1145
0
    const uint64_t avg_64x64_blk_sad =
1146
0
        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1147
0
                                                   : curr_sb_sad;
1148
1149
    // The threshold is determined based on kLowSad and kHighSad threshold and
1150
    // test results.
1151
0
    uint64_t thresh_low = 15000;
1152
0
    uint64_t thresh_high = 40000;
1153
1154
0
    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1155
0
      thresh_low = thresh_low << 1;
1156
0
      thresh_high = thresh_high << 1;
1157
0
    }
1158
1159
0
    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1160
0
      do_calc_src_content = false;
1161
      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1162
      // to RTC rd path.
1163
0
      x->content_state_sb.source_sad_nonrd = kMedSad;
1164
0
    }
1165
0
  }
1166
1167
0
  return do_calc_src_content;
1168
0
}
1169
1170
/*!\brief Determine whether grading content is needed based on sf and frame stat
1171
 *
1172
 * \ingroup partition_search
1173
 * \callgraph
1174
 * \callergraph
1175
 */
1176
// TODO(any): consolidate sfs to make interface cleaner
1177
static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1178
                                           TileDataEnc *tile_data, int mi_row,
1179
0
                                           int mi_col) {
1180
0
  AV1_COMMON *const cm = &cpi->common;
1181
0
  if (cm->current_frame.frame_type == KEY_FRAME ||
1182
0
      (cpi->ppi->use_svc &&
1183
0
       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1184
0
    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1185
0
    assert(x->content_state_sb.source_sad_rd == kMedSad);
1186
0
    return;
1187
0
  }
1188
0
  bool calc_src_content = false;
1189
1190
0
  if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1191
0
    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1192
0
      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1193
0
    } else {
1194
0
      x->content_state_sb.source_sad_nonrd = kZeroSad;
1195
0
    }
1196
0
  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1197
0
             (cm->width * cm->height <= 352 * 288)) {
1198
0
    if (cpi->rc.frame_source_sad > 0)
1199
0
      calc_src_content = true;
1200
0
    else
1201
0
      x->content_state_sb.source_sad_rd = kZeroSad;
1202
0
  }
1203
0
  if (calc_src_content)
1204
0
    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1205
0
}
1206
1207
/*!\brief Encode a superblock row by breaking it into superblocks
1208
 *
1209
 * \ingroup partition_search
1210
 * \callgraph
1211
 * \callergraph
1212
 * Do partition and mode search for an sb row: one row of superblocks filling up
1213
 * the width of the current tile.
1214
 */
1215
static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1216
                                 TileDataEnc *tile_data, int mi_row,
1217
0
                                 TokenExtra **tp) {
1218
0
  AV1_COMMON *const cm = &cpi->common;
1219
0
  const TileInfo *const tile_info = &tile_data->tile_info;
1220
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1221
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1222
0
  AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1223
0
  bool row_mt_enabled = mt_info->row_mt_enabled;
1224
0
  MACROBLOCK *const x = &td->mb;
1225
0
  MACROBLOCKD *const xd = &x->e_mbd;
1226
0
  const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1227
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1228
0
  const int mib_size = cm->seq_params->mib_size;
1229
0
  const int mib_size_log2 = cm->seq_params->mib_size_log2;
1230
0
  const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1231
0
  const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1232
1233
#if CONFIG_COLLECT_COMPONENT_TIMING
1234
  start_timing(cpi, encode_sb_row_time);
1235
#endif
1236
1237
  // Initialize the left context for the new SB row
1238
0
  av1_zero_left_context(xd);
1239
1240
  // Reset delta for quantizer and loof filters at the beginning of every tile
1241
0
  if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1242
0
    if (cpi->cb_delta_rdmult_enabled)
1243
0
      xd->current_base_qindex = cm->quant_params.base_qindex;
1244
0
    if (cm->delta_q_info.delta_lf_present_flag) {
1245
0
      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1246
0
    }
1247
0
  }
1248
1249
0
  reset_thresh_freq_fact(x);
1250
1251
  // Code each SB in the row
1252
0
  for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1253
0
       mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1254
    // In realtime/allintra mode and when frequency of cost updates is off/tile,
1255
    // wait for the top superblock to finish encoding. Otherwise, wait for the
1256
    // top-right superblock to finish encoding.
1257
0
    enc_row_mt->sync_read_ptr(
1258
0
        row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1259
1260
0
#if CONFIG_MULTITHREAD
1261
0
    if (row_mt_enabled) {
1262
0
      pthread_mutex_lock(enc_row_mt->mutex_);
1263
0
      const bool row_mt_exit = enc_row_mt->row_mt_exit;
1264
0
      pthread_mutex_unlock(enc_row_mt->mutex_);
1265
      // Exit in case any worker has encountered an error.
1266
0
      if (row_mt_exit) return;
1267
0
    }
1268
0
#endif
1269
1270
0
    const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1271
0
    if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1272
0
      if ((tile_info->mi_col_start == mi_col)) {
1273
        // restore frame context at the 1st column sb
1274
0
        *xd->tile_ctx = *x->row_ctx;
1275
0
      } else {
1276
        // update context
1277
0
        int wt_left = AVG_CDF_WEIGHT_LEFT;
1278
0
        int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1279
0
        if (tile_info->mi_col_end > (mi_col + mib_size))
1280
0
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1281
0
                              wt_left, wt_tr);
1282
0
        else
1283
0
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1284
0
                              wt_left, wt_tr);
1285
0
      }
1286
0
    }
1287
1288
    // Update the rate cost tables for some symbols
1289
0
    av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1290
1291
    // Reset color coding related parameters
1292
0
    av1_zero(x->color_sensitivity_sb);
1293
0
    av1_zero(x->color_sensitivity_sb_g);
1294
0
    av1_zero(x->color_sensitivity_sb_alt);
1295
0
    av1_zero(x->color_sensitivity);
1296
0
    x->content_state_sb.source_sad_nonrd = kMedSad;
1297
0
    x->content_state_sb.source_sad_rd = kMedSad;
1298
0
    x->content_state_sb.lighting_change = 0;
1299
0
    x->content_state_sb.low_sumdiff = 0;
1300
0
    x->force_zeromv_skip_for_sb = 0;
1301
0
    x->sb_me_block = 0;
1302
0
    x->sb_me_partition = 0;
1303
0
    x->sb_me_mv.as_int = 0;
1304
0
    x->sb_force_fixed_part = 1;
1305
0
    x->color_palette_thresh = 64;
1306
0
    x->force_color_check_block_level = 0;
1307
0
    x->nonrd_prune_ref_frame_search =
1308
0
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1309
1310
0
    if (cpi->oxcf.mode == ALLINTRA) {
1311
0
      x->intra_sb_rdmult_modifier = 128;
1312
0
    }
1313
1314
0
    xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1315
0
    x->source_variance = UINT_MAX;
1316
0
    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1317
1318
    // Get segment id and skip flag
1319
0
    const struct segmentation *const seg = &cm->seg;
1320
0
    int seg_skip = 0;
1321
0
    if (seg->enabled) {
1322
0
      const uint8_t *const map =
1323
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1324
0
      const uint8_t segment_id =
1325
0
          map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1326
0
              : 0;
1327
0
      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1328
0
    }
1329
1330
0
    produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1331
1332
0
    init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1333
0
                                        sb_size);
1334
1335
    // Grade the temporal variation of the sb, the grade will be used to decide
1336
    // fast mode search strategy for coding blocks
1337
0
    if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1338
1339
    // encode the superblock
1340
0
    if (use_nonrd_mode) {
1341
0
      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1342
0
    } else {
1343
0
      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1344
0
    }
1345
1346
    // Update the top-right context in row_mt coding
1347
0
    if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1348
0
      if (sb_cols_in_tile == 1)
1349
0
        x->row_ctx[0] = *xd->tile_ctx;
1350
0
      else if (sb_col_in_tile >= 1)
1351
0
        x->row_ctx[sb_col_in_tile - 1] = *xd->tile_ctx;
1352
0
    }
1353
0
    enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1354
0
                               sb_cols_in_tile);
1355
0
  }
1356
1357
#if CONFIG_COLLECT_COMPONENT_TIMING
1358
  end_timing(cpi, encode_sb_row_time);
1359
#endif
1360
0
}
1361
1362
0
static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1363
0
  AV1_COMMON *const cm = &cpi->common;
1364
0
  const int num_planes = av1_num_planes(cm);
1365
0
  MACROBLOCK *const x = &cpi->td.mb;
1366
0
  MACROBLOCKD *const xd = &x->e_mbd;
1367
1368
  // Copy data over into macro block data structures.
1369
0
  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1370
0
                       cm->seq_params->sb_size);
1371
1372
0
  av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1373
0
                         cm->seq_params->subsampling_y, num_planes);
1374
0
}
1375
1376
0
void av1_alloc_tile_data(AV1_COMP *cpi) {
1377
0
  AV1_COMMON *const cm = &cpi->common;
1378
0
  const int tile_cols = cm->tiles.cols;
1379
0
  const int tile_rows = cm->tiles.rows;
1380
1381
0
  av1_row_mt_mem_dealloc(cpi);
1382
1383
0
  aom_free(cpi->tile_data);
1384
0
  cpi->allocated_tiles = 0;
1385
1386
0
  CHECK_MEM_ERROR(
1387
0
      cm, cpi->tile_data,
1388
0
      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1389
1390
0
  cpi->allocated_tiles = tile_cols * tile_rows;
1391
0
  for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1392
0
    for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1393
0
      const int tile_index = tile_row * tile_cols + tile_col;
1394
0
      TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1395
0
      av1_zero(this_tile->row_mt_sync);
1396
0
      this_tile->row_ctx = NULL;
1397
0
    }
1398
0
  }
1399
0
}
1400
1401
0
void av1_init_tile_data(AV1_COMP *cpi) {
1402
0
  AV1_COMMON *const cm = &cpi->common;
1403
0
  const int num_planes = av1_num_planes(cm);
1404
0
  const int tile_cols = cm->tiles.cols;
1405
0
  const int tile_rows = cm->tiles.rows;
1406
0
  int tile_col, tile_row;
1407
0
  TokenInfo *const token_info = &cpi->token_info;
1408
0
  TokenExtra *pre_tok = token_info->tile_tok[0][0];
1409
0
  TokenList *tplist = token_info->tplist[0][0];
1410
0
  unsigned int tile_tok = 0;
1411
0
  int tplist_count = 0;
1412
1413
0
  if (!is_stat_generation_stage(cpi) &&
1414
0
      cm->features.allow_screen_content_tools) {
1415
    // Number of tokens for which token info needs to be allocated.
1416
0
    unsigned int tokens_required =
1417
0
        get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1418
0
                        MAX_SB_SIZE_LOG2, num_planes);
1419
    // Allocate/reallocate memory for token related info if the number of tokens
1420
    // required is more than the number of tokens already allocated. This could
1421
    // occur in case of the following:
1422
    // 1) If the memory is not yet allocated
1423
    // 2) If the frame dimensions have changed
1424
0
    const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1425
0
    if (realloc_tokens) {
1426
0
      free_token_info(token_info);
1427
0
      alloc_token_info(cm, token_info, tokens_required);
1428
0
      pre_tok = token_info->tile_tok[0][0];
1429
0
      tplist = token_info->tplist[0][0];
1430
0
    }
1431
0
  }
1432
1433
0
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1434
0
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1435
0
      TileDataEnc *const tile_data =
1436
0
          &cpi->tile_data[tile_row * tile_cols + tile_col];
1437
0
      TileInfo *const tile_info = &tile_data->tile_info;
1438
0
      av1_tile_init(tile_info, cm, tile_row, tile_col);
1439
0
      tile_data->firstpass_top_mv = kZeroMv;
1440
0
      tile_data->abs_sum_level = 0;
1441
1442
0
      if (is_token_info_allocated(token_info)) {
1443
0
        token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1444
0
        pre_tok = token_info->tile_tok[tile_row][tile_col];
1445
0
        tile_tok = allocated_tokens(
1446
0
            tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1447
0
            num_planes);
1448
0
        token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1449
0
        tplist = token_info->tplist[tile_row][tile_col];
1450
0
        tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1451
0
      }
1452
0
      tile_data->allow_update_cdf = !cm->tiles.large_scale;
1453
0
      tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1454
0
                                    !cm->features.disable_cdf_update &&
1455
0
                                    !delay_wait_for_top_right_sb(cpi);
1456
0
      tile_data->tctx = *cm->fc;
1457
0
    }
1458
0
  }
1459
0
}
1460
1461
// Populate the start palette token info prior to encoding an SB row.
1462
static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1463
                                   int tile_row, int tile_col, int mi_row,
1464
0
                                   TokenExtra **tp) {
1465
0
  const TokenInfo *token_info = &cpi->token_info;
1466
0
  if (!is_token_info_allocated(token_info)) return;
1467
1468
0
  const AV1_COMMON *cm = &cpi->common;
1469
0
  const int num_planes = av1_num_planes(cm);
1470
0
  TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1471
0
  const int sb_row_in_tile =
1472
0
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1473
1474
0
  get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1475
0
                cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1476
0
  assert(tplist != NULL);
1477
0
  tplist[sb_row_in_tile].start = *tp;
1478
0
}
1479
1480
// Populate the token count after encoding an SB row.
1481
static inline void populate_token_count(AV1_COMP *cpi,
1482
                                        const TileInfo *tile_info, int tile_row,
1483
                                        int tile_col, int mi_row,
1484
0
                                        TokenExtra *tok) {
1485
0
  const TokenInfo *token_info = &cpi->token_info;
1486
0
  if (!is_token_info_allocated(token_info)) return;
1487
1488
0
  const AV1_COMMON *cm = &cpi->common;
1489
0
  const int num_planes = av1_num_planes(cm);
1490
0
  TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1491
0
  const int sb_row_in_tile =
1492
0
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1493
0
  const int tile_mb_cols =
1494
0
      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1495
0
  const int num_mb_rows_in_sb =
1496
0
      ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1497
0
  tplist[sb_row_in_tile].count =
1498
0
      (unsigned int)(tok - tplist[sb_row_in_tile].start);
1499
1500
0
  assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1501
0
         get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1502
0
                         cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1503
0
                         num_planes));
1504
1505
0
  (void)num_planes;
1506
0
  (void)tile_mb_cols;
1507
0
  (void)num_mb_rows_in_sb;
1508
0
}
1509
1510
/*!\brief Encode a superblock row
1511
 *
1512
 * \ingroup partition_search
1513
 */
1514
void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1515
0
                       int tile_col, int mi_row) {
1516
0
  AV1_COMMON *const cm = &cpi->common;
1517
0
  const int tile_cols = cm->tiles.cols;
1518
0
  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1519
0
  const TileInfo *const tile_info = &this_tile->tile_info;
1520
0
  TokenExtra *tok = NULL;
1521
1522
0
  get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1523
1524
0
  encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1525
1526
0
  populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1527
0
}
1528
1529
/*!\brief Encode a tile
1530
 *
1531
 * \ingroup partition_search
1532
 */
1533
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1534
0
                     int tile_col) {
1535
0
  AV1_COMMON *const cm = &cpi->common;
1536
0
  TileDataEnc *const this_tile =
1537
0
      &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1538
0
  const TileInfo *const tile_info = &this_tile->tile_info;
1539
1540
0
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1541
1542
0
  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1543
0
                         tile_info->mi_col_end, tile_row);
1544
0
  av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1545
0
                         &td->mb.e_mbd);
1546
1547
0
#if !CONFIG_REALTIME_ONLY
1548
0
  if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1549
0
    cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1550
0
#endif
1551
1552
0
  if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1553
0
    av1_crc32c_calculator_init(
1554
0
        &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1555
0
  }
1556
1557
0
  for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1558
0
       mi_row += cm->seq_params->mib_size) {
1559
0
    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1560
0
  }
1561
0
  this_tile->abs_sum_level = td->abs_sum_level;
1562
0
}
1563
1564
/*!\brief Break one frame into tiles and encode the tiles
1565
 *
1566
 * \ingroup partition_search
1567
 *
1568
 * \param[in]    cpi    Top-level encoder structure
1569
 */
1570
0
static inline void encode_tiles(AV1_COMP *cpi) {
1571
0
  AV1_COMMON *const cm = &cpi->common;
1572
0
  const int tile_cols = cm->tiles.cols;
1573
0
  const int tile_rows = cm->tiles.rows;
1574
0
  int tile_col, tile_row;
1575
1576
0
  MACROBLOCK *const mb = &cpi->td.mb;
1577
0
  assert(IMPLIES(cpi->tile_data == NULL, cpi->allocated_tiles == 0));
1578
0
  if (cpi->allocated_tiles != tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1579
1580
0
  av1_init_tile_data(cpi);
1581
0
  av1_alloc_mb_data(cpi, mb);
1582
1583
0
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1584
0
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1585
0
      TileDataEnc *const this_tile =
1586
0
          &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1587
0
      cpi->td.intrabc_used = 0;
1588
0
      cpi->td.deltaq_used = 0;
1589
0
      cpi->td.abs_sum_level = 0;
1590
0
      cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1591
0
      cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1592
0
      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1593
0
      cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1594
0
      av1_init_rtc_counters(&cpi->td.mb);
1595
0
      cpi->td.mb.palette_pixels = 0;
1596
0
      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1597
0
      if (!frame_is_intra_only(&cpi->common))
1598
0
        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1599
0
      cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1600
0
      cpi->intrabc_used |= cpi->td.intrabc_used;
1601
0
      cpi->deltaq_used |= cpi->td.deltaq_used;
1602
0
    }
1603
0
  }
1604
1605
0
  av1_dealloc_mb_data(mb, av1_num_planes(cm));
1606
0
}
1607
1608
// Set the relative distance of a reference frame w.r.t. current frame
1609
static inline void set_rel_frame_dist(
1610
    const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1611
0
    const int ref_frame_flags) {
1612
0
  MV_REFERENCE_FRAME ref_frame;
1613
0
  int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1614
0
  ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1615
0
  ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1616
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1617
0
    ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1618
0
    if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1619
0
      int dist = av1_encoder_get_relative_dist(
1620
0
          cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1621
0
          cm->current_frame.display_order_hint);
1622
0
      ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1623
      // Get the nearest ref_frame in the past
1624
0
      if (abs(dist) < min_past_dist && dist < 0) {
1625
0
        ref_frame_dist_info->nearest_past_ref = ref_frame;
1626
0
        min_past_dist = abs(dist);
1627
0
      }
1628
      // Get the nearest ref_frame in the future
1629
0
      if (dist < min_future_dist && dist > 0) {
1630
0
        ref_frame_dist_info->nearest_future_ref = ref_frame;
1631
0
        min_future_dist = dist;
1632
0
      }
1633
0
    }
1634
0
  }
1635
0
}
1636
1637
0
static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1638
0
  assert(!frame_is_intra_only(cm));
1639
1640
0
  int one_sided_refs = 1;
1641
0
  const int cur_display_order_hint = cm->current_frame.display_order_hint;
1642
0
  for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1643
0
    const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1644
0
    if (buf == NULL) continue;
1645
0
    if (av1_encoder_get_relative_dist(buf->display_order_hint,
1646
0
                                      cur_display_order_hint) > 0) {
1647
0
      one_sided_refs = 0;  // bwd reference
1648
0
      break;
1649
0
    }
1650
0
  }
1651
0
  return one_sided_refs;
1652
0
}
1653
1654
static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1655
0
                                             int ref_order_hint[2]) {
1656
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1657
0
  ref_order_hint[0] = ref_order_hint[1] = 0;
1658
0
  if (!skip_mode_info->skip_mode_allowed) return;
1659
1660
0
  const RefCntBuffer *const buf_0 =
1661
0
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1662
0
  const RefCntBuffer *const buf_1 =
1663
0
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1664
0
  assert(buf_0 != NULL && buf_1 != NULL);
1665
1666
0
  ref_order_hint[0] = buf_0->order_hint;
1667
0
  ref_order_hint[1] = buf_1->order_hint;
1668
0
}
1669
1670
0
static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1671
0
  AV1_COMMON *const cm = &cpi->common;
1672
1673
0
  av1_setup_skip_mode_allowed(cm);
1674
0
  if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1675
1676
  // Turn off skip mode if the temporal distances of the reference pair to the
1677
  // current frame are different by more than 1 frame.
1678
0
  const int cur_offset = (int)cm->current_frame.order_hint;
1679
0
  int ref_offset[2];
1680
0
  get_skip_mode_ref_offsets(cm, ref_offset);
1681
0
  const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1682
0
                                            cur_offset, ref_offset[0]);
1683
0
  const int cur_to_ref1 = abs(get_relative_dist(
1684
0
      &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1685
0
  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1686
1687
  // High Latency: Turn off skip mode if all refs are fwd.
1688
0
  if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1689
1690
0
  const int ref_frame[2] = {
1691
0
    cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1692
0
    cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1693
0
  };
1694
0
  if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1695
0
      !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1696
0
    return 0;
1697
1698
0
  return 1;
1699
0
}
1700
1701
static inline void set_default_interp_skip_flags(
1702
0
    const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1703
0
  const int num_planes = av1_num_planes(cm);
1704
0
  interp_search_flags->default_interp_skip_flags =
1705
0
      (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1706
0
                        : INTERP_SKIP_LUMA_SKIP_CHROMA;
1707
0
}
1708
1709
/*!\cond */
1710
typedef struct {
1711
  // Scoring function for usefulness of references (the lower score, the more
1712
  // useful)
1713
  int score;
1714
  // Index in the reference buffer
1715
  int index;
1716
} RefScoreData;
1717
/*!\endcond */
1718
1719
// Comparison function to sort reference frames in ascending score order.
1720
0
static int compare_score_data_asc(const void *a, const void *b) {
1721
0
  const RefScoreData *ra = (const RefScoreData *)a;
1722
0
  const RefScoreData *rb = (const RefScoreData *)b;
1723
1724
0
  const int score_diff = ra->score - rb->score;
1725
0
  if (score_diff != 0) return score_diff;
1726
1727
0
  return ra->index - rb->index;
1728
0
}
1729
1730
// Determines whether a given reference frame is "good" based on temporal
1731
// distance and base_qindex. The "good" reference frames are not allowed to be
1732
// pruned by the speed feature "prune_single_ref" and "prune_comp_ref_frames"
1733
// at block level.
1734
0
static inline void setup_keep_ref_frame_mask(AV1_COMP *cpi) {
1735
0
  const int prune_single_ref = cpi->sf.inter_sf.prune_single_ref;
1736
0
  const int prune_comp_ref_frames = cpi->sf.inter_sf.prune_comp_ref_frames;
1737
0
  const AV1_COMMON *const cm = &cpi->common;
1738
0
  cpi->keep_single_ref_frame_mask = 0;
1739
0
  cpi->keep_comp_ref_frame_mask = 0;
1740
0
  if (frame_is_intra_only(cm)) return;
1741
1742
0
  RefScoreData ref_score_data[INTER_REFS_PER_FRAME];
1743
0
  for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
1744
0
    ref_score_data[i].score = INT_MAX;
1745
0
    ref_score_data[i].index = i;
1746
0
  }
1747
1748
  // Calculate score for each reference frame based on relative distance to
1749
  // the current frame and its base_qindex. A lower score means that the
1750
  // reference is potentially more useful.
1751
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
1752
0
       ++ref_frame) {
1753
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1754
0
      const RefFrameDistanceInfo *const ref_frame_dist_info =
1755
0
          &cpi->ref_frame_dist_info;
1756
0
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
1757
0
      ref_score_data[ref_frame - LAST_FRAME].score =
1758
0
          abs(ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME]) +
1759
0
          buf->base_qindex;
1760
0
    }
1761
0
  }
1762
1763
0
  qsort(ref_score_data, INTER_REFS_PER_FRAME, sizeof(ref_score_data[0]),
1764
0
        compare_score_data_asc);
1765
1766
  // Decide the number of reference frames for which pruning via the speed
1767
  // feature prune_single_ref is disallowed.
1768
  // prune_single_ref = 0 => None of the 7 reference frames are pruned.
1769
  // prune_single_ref = 1 => The best 5 reference frames are not pruned.
1770
  // prune_single_ref = 2 => The best 3 reference frames are not pruned.
1771
  // prune_single_ref = 3, 4 => All the 7 references are allowed to be pruned.
1772
0
  static const int num_single_ref_to_keep_lookup[5] = { INTER_REFS_PER_FRAME, 5,
1773
0
                                                        3, 0, 0 };
1774
0
  assert(prune_single_ref >= 0 && prune_single_ref <= 4);
1775
0
  const int num_single_ref_to_keep =
1776
0
      num_single_ref_to_keep_lookup[prune_single_ref];
1777
0
  for (int i = 0; i < num_single_ref_to_keep; ++i) {
1778
0
    const int idx = ref_score_data[i].index;
1779
0
    cpi->keep_single_ref_frame_mask |= 1 << idx;
1780
0
  }
1781
1782
  // Decide the number of reference frame pairs for which pruning via the speed
1783
  // feature "prune_comp_ref_frames" is disallowed.
1784
  // prune_comp_ref_frames = 0    => None of the allowed reference frame pairs
1785
  //                                 are pruned.
1786
  // prune_comp_ref_frames = 1    => The best 3 reference frame pairs are not
1787
  //                                 allowed to be pruned, i.e, reference frame
1788
  //                                 pairs with rank (1, 2), (1, 3), (2, 3) are
1789
  //                                 not  pruned.
1790
  // prune_comp_ref_frames = 2, 3 => All the reference frame pairs are allowed
1791
  //                                 to be pruned.
1792
0
  static const int num_comp_ref_to_keep_lookup[4] = { INTER_REFS_PER_FRAME, 3,
1793
0
                                                      0, 0 };
1794
0
  assert(prune_comp_ref_frames >= 0 && prune_comp_ref_frames <= 3);
1795
0
  const int num_comp_ref_to_keep =
1796
0
      num_comp_ref_to_keep_lookup[prune_comp_ref_frames];
1797
0
  for (int i = 0; i < num_comp_ref_to_keep; ++i) {
1798
0
    const int idx = ref_score_data[i].index;
1799
0
    cpi->keep_comp_ref_frame_mask |= 1 << idx;
1800
0
  }
1801
0
}
1802
1803
0
static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1804
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1805
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
1806
0
      cpi->all_one_sided_refs) {
1807
    // Disable all compound references
1808
0
    cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1809
0
  } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1810
0
             cpi->sf.inter_sf.selective_ref_frame >= 1) {
1811
0
    AV1_COMMON *const cm = &cpi->common;
1812
0
    const int cur_frame_display_order_hint =
1813
0
        cm->current_frame.display_order_hint;
1814
0
    unsigned int *ref_display_order_hint =
1815
0
        cm->cur_frame->ref_display_order_hint;
1816
0
    const int arf2_dist = av1_encoder_get_relative_dist(
1817
0
        ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1818
0
        cur_frame_display_order_hint);
1819
0
    const int bwd_dist = av1_encoder_get_relative_dist(
1820
0
        ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1821
0
        cur_frame_display_order_hint);
1822
1823
0
    for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1824
0
      MV_REFERENCE_FRAME rf[2];
1825
0
      av1_set_ref_frame(rf, ref_idx);
1826
0
      if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1827
0
          !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1828
0
        continue;
1829
0
      }
1830
1831
0
      if (!cpi->all_one_sided_refs) {
1832
0
        int ref_dist[2];
1833
0
        for (int i = 0; i < 2; ++i) {
1834
0
          ref_dist[i] = av1_encoder_get_relative_dist(
1835
0
              ref_display_order_hint[rf[i] - LAST_FRAME],
1836
0
              cur_frame_display_order_hint);
1837
0
        }
1838
1839
        // One-sided compound is used only when all reference frames are
1840
        // one-sided.
1841
0
        if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1842
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1843
0
        }
1844
0
      }
1845
1846
0
      if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1847
0
          (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1848
0
          (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1849
        // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1850
0
        if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1851
          // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1852
          // reference to the current frame than ALTREF2_FRAME
1853
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1854
0
        }
1855
0
      }
1856
0
    }
1857
0
  }
1858
0
}
1859
1860
0
static int allow_deltaq_mode(AV1_COMP *cpi) {
1861
0
#if !CONFIG_REALTIME_ONLY
1862
0
  AV1_COMMON *const cm = &cpi->common;
1863
0
  BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1864
0
  int sbs_wide = mi_size_wide[sb_size];
1865
0
  int sbs_high = mi_size_high[sb_size];
1866
1867
0
  int64_t delta_rdcost = 0;
1868
0
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1869
0
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1870
0
      int64_t this_delta_rdcost = 0;
1871
0
      av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1872
0
                                     mi_row, mi_col);
1873
0
      delta_rdcost += this_delta_rdcost;
1874
0
    }
1875
0
  }
1876
0
  return delta_rdcost < 0;
1877
#else
1878
  (void)cpi;
1879
  return 1;
1880
#endif  // !CONFIG_REALTIME_ONLY
1881
0
}
1882
1883
0
static inline int disable_deltaq_for_intl_arfs(const AV1_COMP *cpi) {
1884
0
  if (cpi->oxcf.mode == GOOD && is_stat_consumption_stage_twopass(cpi) &&
1885
0
      cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
1886
0
      cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
1887
0
      !cpi->common.seg.enabled && !cpi->roi.enabled && !cpi->oxcf.sb_qp_sweep &&
1888
0
      !cpi->use_ducky_encode) {
1889
0
    return 1;
1890
0
  }
1891
0
  return 0;
1892
0
}
1893
1894
0
static inline int enable_delta_rdmult(const AV1_COMP *cpi) {
1895
0
  if (!disable_deltaq_for_intl_arfs(cpi))
1896
0
    return cpi->common.delta_q_info.delta_q_present_flag;
1897
1898
0
  const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1899
0
  return gf_group->update_type[cpi->gf_frame_index] != LF_UPDATE;
1900
0
}
1901
1902
0
static inline int enable_delta_q(const AV1_COMP *cpi) {
1903
0
  const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1904
0
  if (!disable_deltaq_for_intl_arfs(cpi))
1905
0
    return gf_group->update_type[cpi->gf_frame_index] != LF_UPDATE;
1906
1907
0
  return cpi->common.current_frame.pyramid_level <= 1;
1908
0
}
1909
1910
0
#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1911
#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1912
1913
// Populates block level thresholds for force zeromv-skip decision
1914
0
static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1915
0
  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1916
1917
  // Threshold for forcing zeromv-skip decision is as below:
1918
  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1919
  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1920
  // allowing slightly higher error for smaller blocks.
1921
  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1922
  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1923
  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1924
  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1925
  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1926
  // small blocks, the same is clipped to 4.
1927
0
  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1928
0
  const int num_128x128_pix =
1929
0
      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1930
1931
0
  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1932
0
    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1933
1934
    // Calculate the threshold for zeromv-skip decision based on area of the
1935
    // partition
1936
0
    unsigned int thresh_exit_part_blk =
1937
0
        (unsigned int)(thresh_exit_128x128_part *
1938
0
                           sqrt((double)num_block_pix / num_128x128_pix) +
1939
0
                       0.5);
1940
0
    thresh_exit_part_blk = AOMMIN(
1941
0
        thresh_exit_part_blk,
1942
0
        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1943
0
    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1944
0
  }
1945
0
}
1946
1947
0
static void free_block_hash_buffers(uint32_t *block_hash_values[2]) {
1948
0
  for (int j = 0; j < 2; ++j) {
1949
0
    aom_free(block_hash_values[j]);
1950
0
  }
1951
0
}
1952
1953
/*!\brief Determines delta_q_res value for Variance Boost modulation.
1954
 */
1955
0
static int aom_get_variance_boost_delta_q_res(int qindex) {
1956
  // Signaling delta_q changes across superblocks comes with inherent syntax
1957
  // element overhead, which adds up to total payload size. This overhead
1958
  // becomes proportionally bigger the higher the base qindex (i.e. lower
1959
  // quality, smaller file size), so a balance needs to be struck.
1960
  // - Smaller delta_q_res: more granular delta_q control, more bits spent
1961
  // signaling deltas.
1962
  // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1963
  // deltas.
1964
  //
1965
  // At the same time, SB qindex fluctuations become larger the higher
1966
  // the base qindex (between lowest and highest-variance regions):
1967
  // - For QP 5: up to 8 qindexes
1968
  // - For QP 60: up to 52 qindexes
1969
  //
1970
  // With these factors in mind, it was found that the best strategy that
1971
  // maximizes quality per bitrate is by having very finely-grained delta_q
1972
  // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1973
  // and progressively making them coarser as base qindex increases (to reduce
1974
  // total signaling overhead).
1975
0
  int delta_q_res = 1;
1976
1977
0
  if (qindex >= 160) {
1978
0
    delta_q_res = 8;
1979
0
  } else if (qindex >= 120) {
1980
0
    delta_q_res = 4;
1981
0
  } else if (qindex >= 80) {
1982
0
    delta_q_res = 2;
1983
0
  } else {
1984
0
    delta_q_res = 1;
1985
0
  }
1986
1987
0
  return delta_q_res;
1988
0
}
1989
1990
#if !CONFIG_REALTIME_ONLY
1991
0
static float get_thresh_based_on_q(int qindex, int speed) {
1992
0
  const float min_threshold_arr[3] = { 0.084f, 0.087f, 0.126f };
1993
0
  const float max_threshold_arr[3] = { 0.140f, 0.150f, 0.182f };
1994
0
  const int idx = (speed >= 3) ? 2 : (speed - 1);
1995
0
  const float min_thresh = min_threshold_arr[idx];
1996
0
  const float max_thresh = max_threshold_arr[idx];
1997
0
  const float thresh = min_thresh + (max_thresh - min_thresh) *
1998
0
                                        ((float)MAXQ - (float)qindex) /
1999
0
                                        (float)(MAXQ - MINQ);
2000
0
  return thresh;
2001
0
}
2002
2003
0
static int get_mv_err(MV cur_mv, MV ref_mv) {
2004
0
  const MV diff = { cur_mv.row - ref_mv.row, cur_mv.col - ref_mv.col };
2005
0
  const MV abs_diff = { abs(diff.row), abs(diff.col) };
2006
0
  const int mv_err = (abs_diff.row + abs_diff.col);
2007
0
  return mv_err;
2008
0
}
2009
2010
0
static void check_mv_err_and_update(MV cur_mv, MV ref_mv, int *best_mv_err) {
2011
0
  const int mv_err = get_mv_err(cur_mv, ref_mv);
2012
0
  *best_mv_err = AOMMIN(mv_err, *best_mv_err);
2013
0
}
2014
2015
static int is_inside_frame_border(int mi_row, int mi_col, int row_offset,
2016
                                  int col_offset, int num_mi_rows,
2017
0
                                  int num_mi_cols) {
2018
0
  if (mi_row + row_offset < 0 || mi_row + row_offset >= num_mi_rows ||
2019
0
      mi_col + col_offset < 0 || mi_col + col_offset >= num_mi_cols)
2020
0
    return 0;
2021
2022
0
  return 1;
2023
0
}
2024
2025
// Compute the minimum MV error between current MV and spatial MV predictors.
2026
static int get_spatial_mvpred_err(AV1_COMMON *cm, TplParams *const tpl_data,
2027
                                  int tpl_idx, int mi_row, int mi_col,
2028
                                  int ref_idx, int_mv cur_mv, int allow_hp,
2029
0
                                  int is_integer) {
2030
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2031
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
2032
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
2033
2034
0
  int mv_err = INT32_MAX;
2035
0
  const int step = 1 << block_mis_log2;
2036
0
  const int mv_pred_pos_in_mis[8][2] = {
2037
0
    { -step, 0 },     { 0, -step },     { -step, step },  { -step, -step },
2038
0
    { -2 * step, 0 }, { 0, -2 * step }, { -3 * step, 0 }, { 0, -3 * step },
2039
0
  };
2040
2041
0
  for (int i = 0; i < 8; i++) {
2042
0
    int row_offset = mv_pred_pos_in_mis[i][0];
2043
0
    int col_offset = mv_pred_pos_in_mis[i][1];
2044
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
2045
0
                                tpl_frame->mi_rows, tpl_frame->mi_cols)) {
2046
0
      continue;
2047
0
    }
2048
2049
0
    const TplDepStats *tpl_stats =
2050
0
        &tpl_ptr[av1_tpl_ptr_pos(mi_row + row_offset, mi_col + col_offset,
2051
0
                                 tpl_frame->stride, block_mis_log2)];
2052
0
    int_mv this_refmv = tpl_stats->mv[ref_idx];
2053
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
2054
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
2055
0
  }
2056
2057
  // Check MV error w.r.t. Global MV / Zero MV
2058
0
  int_mv gm_mv = { 0 };
2059
0
  if (cm->global_motion[ref_idx + LAST_FRAME].wmtype > TRANSLATION) {
2060
0
    const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
2061
0
    gm_mv = gm_get_motion_vector(&cm->global_motion[ref_idx + LAST_FRAME],
2062
0
                                 allow_hp, bsize, mi_col, mi_row, is_integer);
2063
0
  }
2064
0
  check_mv_err_and_update(cur_mv.as_mv, gm_mv.as_mv, &mv_err);
2065
2066
0
  return mv_err;
2067
0
}
2068
2069
// Compute the minimum MV error between current MV and temporal MV predictors.
2070
static int get_temporal_mvpred_err(AV1_COMMON *cm, int mi_row, int mi_col,
2071
                                   int num_mi_rows, int num_mi_cols,
2072
                                   int ref_idx, int_mv cur_mv, int allow_hp,
2073
0
                                   int is_integer) {
2074
0
  const RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ref_idx + LAST_FRAME);
2075
0
  if (ref_buf == NULL) return INT32_MAX;
2076
0
  int cur_to_ref_dist =
2077
0
      get_relative_dist(&cm->seq_params->order_hint_info,
2078
0
                        cm->cur_frame->order_hint, ref_buf->order_hint);
2079
2080
0
  int mv_err = INT32_MAX;
2081
0
  const int mv_pred_pos_in_mis[7][2] = {
2082
0
    { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, { 4, -2 }, { 4, 4 }, { 2, 4 },
2083
0
  };
2084
2085
0
  for (int i = 0; i < 7; i++) {
2086
0
    int row_offset = mv_pred_pos_in_mis[i][0];
2087
0
    int col_offset = mv_pred_pos_in_mis[i][1];
2088
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
2089
0
                                num_mi_rows, num_mi_cols)) {
2090
0
      continue;
2091
0
    }
2092
0
    const TPL_MV_REF *ref_mvs =
2093
0
        cm->tpl_mvs +
2094
0
        ((mi_row + row_offset) >> 1) * (cm->mi_params.mi_stride >> 1) +
2095
0
        ((mi_col + col_offset) >> 1);
2096
0
    if (ref_mvs->mfmv0.as_int == INVALID_MV) continue;
2097
2098
0
    int_mv this_refmv;
2099
0
    av1_get_mv_projection(&this_refmv.as_mv, ref_mvs->mfmv0.as_mv,
2100
0
                          cur_to_ref_dist, ref_mvs->ref_frame_offset);
2101
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
2102
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
2103
0
  }
2104
2105
0
  return mv_err;
2106
0
}
2107
2108
// Determine whether to disable temporal MV prediction for the current frame
2109
// based on TPL and motion field data. Temporal MV prediction is disabled if the
2110
// reduction in MV error by including temporal MVs as MV predictors is small.
2111
0
static void check_to_disable_ref_frame_mvs(AV1_COMP *cpi) {
2112
0
  AV1_COMMON *cm = &cpi->common;
2113
0
  if (!cm->features.allow_ref_frame_mvs || cpi->sf.hl_sf.ref_frame_mvs_lvl != 1)
2114
0
    return;
2115
2116
0
  const int tpl_idx = cpi->gf_frame_index;
2117
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2118
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2119
2120
0
  const SUBPEL_FORCE_STOP tpl_subpel_precision =
2121
0
      cpi->sf.tpl_sf.subpel_force_stop;
2122
0
  const int allow_high_precision_mv = tpl_subpel_precision == EIGHTH_PEL &&
2123
0
                                      cm->features.allow_high_precision_mv;
2124
0
  const int force_integer_mv = tpl_subpel_precision == FULL_PEL ||
2125
0
                               cm->features.cur_frame_force_integer_mv;
2126
2127
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2128
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
2129
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
2130
0
  const int step = 1 << block_mis_log2;
2131
2132
0
  uint64_t accum_spatial_mvpred_err = 0;
2133
0
  uint64_t accum_best_err = 0;
2134
2135
0
  for (int mi_row = 0; mi_row < tpl_frame->mi_rows; mi_row += step) {
2136
0
    for (int mi_col = 0; mi_col < tpl_frame->mi_cols; mi_col += step) {
2137
0
      TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos(
2138
0
          mi_row, mi_col, tpl_frame->stride, block_mis_log2)];
2139
0
      const int cur_best_ref_idx = tpl_stats_ptr->ref_frame_index[0];
2140
0
      if (cur_best_ref_idx == NONE_FRAME) continue;
2141
2142
0
      int_mv cur_mv = tpl_stats_ptr->mv[cur_best_ref_idx];
2143
0
      lower_mv_precision(&cur_mv.as_mv, allow_high_precision_mv,
2144
0
                         force_integer_mv);
2145
2146
0
      const int cur_spatial_mvpred_err = get_spatial_mvpred_err(
2147
0
          cm, tpl_data, tpl_idx, mi_row, mi_col, cur_best_ref_idx, cur_mv,
2148
0
          allow_high_precision_mv, force_integer_mv);
2149
2150
0
      const int cur_temporal_mvpred_err = get_temporal_mvpred_err(
2151
0
          cm, mi_row, mi_col, tpl_frame->mi_rows, tpl_frame->mi_cols,
2152
0
          cur_best_ref_idx, cur_mv, allow_high_precision_mv, force_integer_mv);
2153
2154
0
      const int cur_best_err =
2155
0
          AOMMIN(cur_spatial_mvpred_err, cur_temporal_mvpred_err);
2156
0
      accum_spatial_mvpred_err += cur_spatial_mvpred_err;
2157
0
      accum_best_err += cur_best_err;
2158
0
    }
2159
0
  }
2160
2161
0
  const float threshold =
2162
0
      get_thresh_based_on_q(cm->quant_params.base_qindex, cpi->oxcf.speed);
2163
0
  const float mv_err_reduction =
2164
0
      (float)(accum_spatial_mvpred_err - accum_best_err);
2165
2166
0
  if (mv_err_reduction <= threshold * accum_spatial_mvpred_err)
2167
0
    cm->features.allow_ref_frame_mvs = 0;
2168
0
}
2169
#endif  // !CONFIG_REALTIME_ONLY
2170
2171
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
2172
 * for a single frame
2173
 *
2174
 * \ingroup high_level_algo
2175
 */
2176
0
static inline void encode_frame_internal(AV1_COMP *cpi) {
2177
0
  ThreadData *const td = &cpi->td;
2178
0
  MACROBLOCK *const x = &td->mb;
2179
0
  AV1_COMMON *const cm = &cpi->common;
2180
0
  CommonModeInfoParams *const mi_params = &cm->mi_params;
2181
0
  FeatureFlags *const features = &cm->features;
2182
0
  MACROBLOCKD *const xd = &x->e_mbd;
2183
0
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2184
#if CONFIG_FPMT_TEST
2185
  FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
2186
  FrameProbInfo *const temp_frame_probs_simulation =
2187
      &cpi->ppi->temp_frame_probs_simulation;
2188
#endif
2189
0
  FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
2190
0
  IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
2191
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
2192
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
2193
0
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2194
0
  const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
2195
0
  int i;
2196
2197
0
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
2198
0
    mi_params->setup_mi(mi_params);
2199
0
  }
2200
2201
0
  set_mi_offsets(mi_params, xd, 0, 0);
2202
2203
0
  av1_zero(*td->counts);
2204
0
  av1_zero(rdc->tx_type_used);
2205
0
  av1_zero(rdc->obmc_used);
2206
0
  av1_zero(rdc->warped_used);
2207
0
  av1_zero(rdc->seg_tmp_pred_cost);
2208
2209
  // Reset the flag.
2210
0
  cpi->intrabc_used = 0;
2211
  // Need to disable intrabc when superres is selected
2212
0
  if (av1_superres_scaled(cm)) {
2213
0
    features->allow_intrabc = 0;
2214
0
  }
2215
2216
0
  features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
2217
2218
0
  if (features->allow_warped_motion &&
2219
0
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2220
0
    const FRAME_UPDATE_TYPE update_type =
2221
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2222
0
    int warped_probability =
2223
#if CONFIG_FPMT_TEST
2224
        cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
2225
            ? temp_frame_probs->warped_probs[update_type]
2226
            :
2227
#endif  // CONFIG_FPMT_TEST
2228
0
            frame_probs->warped_probs[update_type];
2229
0
    if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
2230
0
      features->allow_warped_motion = 0;
2231
0
  }
2232
2233
0
  int hash_table_created = 0;
2234
0
  if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
2235
0
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
2236
    // TODO(any): move this outside of the recoding loop to avoid recalculating
2237
    // the hash table.
2238
    // add to hash table
2239
0
    const int pic_width = cpi->source->y_crop_width;
2240
0
    const int pic_height = cpi->source->y_crop_height;
2241
0
    uint32_t *block_hash_values[2] = { NULL };  // two buffers used ping-pong
2242
0
    bool error = false;
2243
2244
0
    for (int j = 0; j < 2; ++j) {
2245
0
      block_hash_values[j] = (uint32_t *)aom_malloc(
2246
0
          sizeof(*block_hash_values[j]) * pic_width * pic_height);
2247
0
      if (!block_hash_values[j]) {
2248
0
        error = true;
2249
0
        break;
2250
0
      }
2251
0
    }
2252
2253
0
    av1_hash_table_init(intrabc_hash_info);
2254
0
    if (error ||
2255
0
        !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
2256
0
      free_block_hash_buffers(block_hash_values);
2257
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2258
0
                         "Error allocating intrabc_hash_table and buffers");
2259
0
    }
2260
0
    hash_table_created = 1;
2261
0
    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0]);
2262
    // Hash data generated for screen contents is used for intraBC ME
2263
0
    const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
2264
0
    int max_sb_size = (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
2265
2266
0
    if (cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks) {
2267
0
      max_sb_size = AOMMIN(8, max_sb_size);
2268
0
    }
2269
2270
0
    int src_idx = 0;
2271
0
    for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
2272
0
      const int dst_idx = !src_idx;
2273
0
      av1_generate_block_hash_value(intrabc_hash_info, cpi->source, size,
2274
0
                                    block_hash_values[src_idx],
2275
0
                                    block_hash_values[dst_idx]);
2276
0
      if (size >= min_alloc_size &&
2277
0
          !av1_add_to_hash_map_by_row_with_precal_data(
2278
0
              &intrabc_hash_info->intrabc_hash_table,
2279
0
              block_hash_values[dst_idx], pic_width, pic_height, size)) {
2280
0
        error = true;
2281
0
        break;
2282
0
      }
2283
0
    }
2284
2285
0
    free_block_hash_buffers(block_hash_values);
2286
2287
0
    if (error) {
2288
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2289
0
                         "Error adding data to intrabc_hash_table");
2290
0
    }
2291
0
  }
2292
2293
0
  const CommonQuantParams *quant_params = &cm->quant_params;
2294
0
  for (i = 0; i < MAX_SEGMENTS; ++i) {
2295
0
    const int qindex =
2296
0
        cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
2297
0
                        : quant_params->base_qindex;
2298
0
    xd->lossless[i] =
2299
0
        qindex == 0 && quant_params->y_dc_delta_q == 0 &&
2300
0
        quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
2301
0
        quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
2302
0
    if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
2303
0
    xd->qindex[i] = qindex;
2304
0
    if (xd->lossless[i]) {
2305
0
      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
2306
0
    } else {
2307
0
      cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
2308
0
    }
2309
0
  }
2310
0
  features->coded_lossless = is_coded_lossless(cm, xd);
2311
0
  features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
2312
2313
  // Fix delta q resolution for the moment
2314
2315
0
  cm->delta_q_info.delta_q_res = 0;
2316
0
  if (cpi->use_ducky_encode) {
2317
0
    cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
2318
0
  } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ &&
2319
0
             !cpi->roi.enabled) {
2320
0
    if (deltaq_mode == DELTA_Q_OBJECTIVE)
2321
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
2322
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
2323
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2324
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
2325
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2326
0
    else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
2327
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2328
0
    else if (deltaq_mode == DELTA_Q_HDR)
2329
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2330
0
    else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
2331
0
      cm->delta_q_info.delta_q_res =
2332
0
          aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
2333
    // Set delta_q_present_flag before it is used for the first time
2334
0
    cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
2335
0
    cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
2336
2337
    // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
2338
    // is used for ineligible frames. That effectively will turn off row_mt
2339
    // usage. Note objective delta_q and tpl eligible frames are only altref
2340
    // frames currently.
2341
0
    if (cm->delta_q_info.delta_q_present_flag) {
2342
0
      if (deltaq_mode == DELTA_Q_OBJECTIVE && !enable_delta_q(cpi))
2343
0
        cm->delta_q_info.delta_q_present_flag = 0;
2344
2345
0
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2346
0
          cm->delta_q_info.delta_q_present_flag) {
2347
0
        cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
2348
0
      }
2349
0
    }
2350
2351
    // Reset delta_q_used flag
2352
0
    cpi->deltaq_used = 0;
2353
2354
0
    cm->delta_q_info.delta_lf_present_flag =
2355
0
        cm->delta_q_info.delta_q_present_flag &&
2356
0
        oxcf->tool_cfg.enable_deltalf_mode;
2357
0
    cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2358
2359
    // update delta_q_present_flag and delta_lf_present_flag based on
2360
    // base_qindex
2361
0
    cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2362
0
    cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2363
0
  } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2364
0
             cpi->svc.number_temporal_layers == 1) {
2365
0
    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2366
0
    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2367
0
  }
2368
0
  cpi->rc.cnt_zeromv = 0;
2369
0
  cpi->cb_delta_rdmult_enabled = enable_delta_rdmult(cpi);
2370
2371
0
  av1_frame_init_quantizer(cpi);
2372
0
  init_encode_frame_mb_context(cpi);
2373
0
  set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2374
2375
0
  if (cm->prev_frame && cm->prev_frame->seg.enabled &&
2376
0
      cpi->svc.number_spatial_layers == 1)
2377
0
    cm->last_frame_seg_map = cm->prev_frame->seg_map;
2378
0
  else
2379
0
    cm->last_frame_seg_map = NULL;
2380
0
  if (features->allow_intrabc || features->coded_lossless) {
2381
0
    av1_set_default_ref_deltas(cm->lf.ref_deltas);
2382
0
    av1_set_default_mode_deltas(cm->lf.mode_deltas);
2383
0
  } else if (cm->prev_frame) {
2384
0
    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2385
0
    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2386
0
  }
2387
0
  cm->lf.mode_ref_delta_enabled = oxcf->algo_cfg.mode_ref_delta_enabled;
2388
0
  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2389
0
  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2390
2391
0
  cpi->all_one_sided_refs =
2392
0
      frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2393
2394
0
  cpi->prune_ref_frame_mask = 0;
2395
  // Figure out which ref frames can be skipped at frame level.
2396
0
  setup_prune_ref_frame_mask(cpi);
2397
  // Disable certain reference frame pruning based on temporal distance and
2398
  // quality of that reference frame.
2399
0
  setup_keep_ref_frame_mask(cpi);
2400
2401
0
  x->txfm_search_info.txb_split_count = 0;
2402
#if CONFIG_SPEED_STATS
2403
  x->txfm_search_info.tx_search_count = 0;
2404
#endif  // CONFIG_SPEED_STATS
2405
2406
0
#if !CONFIG_REALTIME_ONLY
2407
#if CONFIG_COLLECT_COMPONENT_TIMING
2408
  start_timing(cpi, av1_compute_global_motion_time);
2409
#endif
2410
0
  av1_compute_global_motion_facade(cpi);
2411
#if CONFIG_COLLECT_COMPONENT_TIMING
2412
  end_timing(cpi, av1_compute_global_motion_time);
2413
#endif
2414
0
#endif  // !CONFIG_REALTIME_ONLY
2415
2416
#if CONFIG_COLLECT_COMPONENT_TIMING
2417
  start_timing(cpi, av1_setup_motion_field_time);
2418
#endif
2419
0
  av1_calculate_ref_frame_side(cm);
2420
2421
0
  features->allow_ref_frame_mvs &= !(cpi->sf.hl_sf.ref_frame_mvs_lvl == 2);
2422
0
  if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2423
0
#if !CONFIG_REALTIME_ONLY
2424
0
  check_to_disable_ref_frame_mvs(cpi);
2425
0
#endif  // !CONFIG_REALTIME_ONLY
2426
2427
#if CONFIG_COLLECT_COMPONENT_TIMING
2428
  end_timing(cpi, av1_setup_motion_field_time);
2429
#endif
2430
2431
0
  cm->current_frame.skip_mode_info.skip_mode_flag =
2432
0
      check_skip_mode_enabled(cpi);
2433
2434
  // Initialization of skip mode cost depends on the value of
2435
  // 'skip_mode_flag'. This initialization happens in the function
2436
  // av1_fill_mode_rates(), which is in turn called in
2437
  // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2438
  // has to be called after 'skip_mode_flag' is initialized.
2439
0
  av1_initialize_rd_consts(cpi);
2440
0
  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2441
0
  populate_thresh_to_force_zeromv_skip(cpi);
2442
2443
0
  enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2444
0
  enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2445
0
  mt_info->row_mt_enabled = 0;
2446
0
  mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2447
0
                                       cm->tiles.cols * cm->tiles.rows) > 1;
2448
2449
0
  if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2450
0
    mt_info->row_mt_enabled = 1;
2451
0
    enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2452
0
    enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2453
0
    av1_encode_tiles_row_mt(cpi);
2454
0
  } else {
2455
0
    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2456
0
      av1_encode_tiles_mt(cpi);
2457
0
    } else {
2458
      // Preallocate the pc_tree for realtime coding to reduce the cost of
2459
      // memory allocation.
2460
0
      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2461
0
      if (use_nonrd_mode) {
2462
0
        td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2463
0
        if (!td->pc_root)
2464
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2465
0
                             "Failed to allocate PC_TREE");
2466
0
      } else {
2467
0
        td->pc_root = NULL;
2468
0
      }
2469
2470
0
      encode_tiles(cpi);
2471
0
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2472
0
                                 cpi->sf.part_sf.partition_search_type);
2473
0
      td->pc_root = NULL;
2474
0
    }
2475
0
  }
2476
2477
  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2478
0
  if (features->allow_intrabc && !cpi->intrabc_used) {
2479
0
    features->allow_intrabc = 0;
2480
0
  }
2481
0
  if (features->allow_intrabc) {
2482
0
    cm->delta_q_info.delta_lf_present_flag = 0;
2483
0
  }
2484
2485
0
  if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2486
0
    cm->delta_q_info.delta_q_present_flag = 0;
2487
0
  }
2488
2489
  // Set the transform size appropriately before bitstream creation
2490
0
  const MODE_EVAL_TYPE eval_type =
2491
0
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2492
0
          ? WINNER_MODE_EVAL
2493
0
          : DEFAULT_EVAL;
2494
0
  const TX_SIZE_SEARCH_METHOD tx_search_type =
2495
0
      cpi->winner_mode_params.tx_size_search_methods[eval_type];
2496
0
  assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2497
0
  features->tx_mode = select_tx_mode(cm, tx_search_type);
2498
2499
  // Retain the frame level probability update conditions for parallel frames.
2500
  // These conditions will be consumed during postencode stage to update the
2501
  // probability.
2502
0
  if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2503
0
    cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2504
0
        cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2505
0
    cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2506
0
        (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2507
0
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2508
0
    cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2509
0
        (features->allow_warped_motion &&
2510
0
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2511
0
    cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2512
0
        (cm->current_frame.frame_type != KEY_FRAME &&
2513
0
         cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2514
0
         features->interp_filter == SWITCHABLE);
2515
0
  }
2516
2517
0
  if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2518
0
      ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2519
0
        INT_MAX) &&
2520
0
       (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2521
0
    const FRAME_UPDATE_TYPE update_type =
2522
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2523
0
    for (i = 0; i < TX_SIZES_ALL; i++) {
2524
0
      int sum = 0;
2525
0
      int j;
2526
0
      int left = MAX_TX_TYPE_PROB;
2527
2528
0
      for (j = 0; j < TX_TYPES; j++)
2529
0
        sum += cpi->td.rd_counts.tx_type_used[i][j];
2530
2531
0
      for (j = TX_TYPES - 1; j >= 0; j--) {
2532
0
        int update_txtype_frameprobs = 1;
2533
0
        const int new_prob =
2534
0
            sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2535
0
                        cpi->td.rd_counts.tx_type_used[i][j] / sum)
2536
0
                : (j ? 0 : MAX_TX_TYPE_PROB);
2537
#if CONFIG_FPMT_TEST
2538
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2539
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2540
              0) {
2541
            int prob =
2542
                (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2543
                 new_prob) >>
2544
                1;
2545
            left -= prob;
2546
            if (j == 0) prob += left;
2547
            temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2548
                prob;
2549
            // Copy temp_frame_probs_simulation to temp_frame_probs
2550
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2551
                 update_type_idx++) {
2552
              temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2553
                  temp_frame_probs_simulation
2554
                      ->tx_type_probs[update_type_idx][i][j];
2555
            }
2556
          }
2557
          update_txtype_frameprobs = 0;
2558
        }
2559
#endif  // CONFIG_FPMT_TEST
2560
        // Track the frame probabilities of parallel encode frames to update
2561
        // during postencode stage.
2562
0
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2563
0
          update_txtype_frameprobs = 0;
2564
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2565
0
              .tx_type_probs[update_type][i][j] = new_prob;
2566
0
        }
2567
0
        if (update_txtype_frameprobs) {
2568
0
          int prob =
2569
0
              (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2570
0
          left -= prob;
2571
0
          if (j == 0) prob += left;
2572
0
          frame_probs->tx_type_probs[update_type][i][j] = prob;
2573
0
        }
2574
0
      }
2575
0
    }
2576
0
  }
2577
2578
0
  if (cm->seg.enabled) {
2579
0
    cm->seg.temporal_update = 1;
2580
0
    if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2581
0
      cm->seg.temporal_update = 0;
2582
0
  }
2583
2584
0
  if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2585
0
      cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2586
0
    const FRAME_UPDATE_TYPE update_type =
2587
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2588
2589
0
    for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2590
0
      int sum = 0;
2591
0
      int update_obmc_frameprobs = 1;
2592
0
      for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2593
2594
0
      const int new_prob =
2595
0
          sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2596
#if CONFIG_FPMT_TEST
2597
      if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2598
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2599
          temp_frame_probs_simulation->obmc_probs[update_type][i] =
2600
              (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2601
               new_prob) >>
2602
              1;
2603
          // Copy temp_frame_probs_simulation to temp_frame_probs
2604
          for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2605
               update_type_idx++) {
2606
            temp_frame_probs->obmc_probs[update_type_idx][i] =
2607
                temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2608
          }
2609
        }
2610
        update_obmc_frameprobs = 0;
2611
      }
2612
#endif  // CONFIG_FPMT_TEST
2613
      // Track the frame probabilities of parallel encode frames to update
2614
      // during postencode stage.
2615
0
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2616
0
        update_obmc_frameprobs = 0;
2617
0
        cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2618
0
            new_prob;
2619
0
      }
2620
0
      if (update_obmc_frameprobs) {
2621
0
        frame_probs->obmc_probs[update_type][i] =
2622
0
            (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2623
0
      }
2624
0
    }
2625
0
  }
2626
2627
0
  if (features->allow_warped_motion &&
2628
0
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2629
0
    const FRAME_UPDATE_TYPE update_type =
2630
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2631
0
    int update_warp_frameprobs = 1;
2632
0
    int sum = 0;
2633
0
    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2634
0
    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2635
#if CONFIG_FPMT_TEST
2636
    if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2637
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2638
        temp_frame_probs_simulation->warped_probs[update_type] =
2639
            (temp_frame_probs_simulation->warped_probs[update_type] +
2640
             new_prob) >>
2641
            1;
2642
        // Copy temp_frame_probs_simulation to temp_frame_probs
2643
        for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2644
             update_type_idx++) {
2645
          temp_frame_probs->warped_probs[update_type_idx] =
2646
              temp_frame_probs_simulation->warped_probs[update_type_idx];
2647
        }
2648
      }
2649
      update_warp_frameprobs = 0;
2650
    }
2651
#endif  // CONFIG_FPMT_TEST
2652
    // Track the frame probabilities of parallel encode frames to update
2653
    // during postencode stage.
2654
0
    if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2655
0
      update_warp_frameprobs = 0;
2656
0
      cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2657
0
          new_prob;
2658
0
    }
2659
0
    if (update_warp_frameprobs) {
2660
0
      frame_probs->warped_probs[update_type] =
2661
0
          (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2662
0
    }
2663
0
  }
2664
2665
0
  if (cm->current_frame.frame_type != KEY_FRAME &&
2666
0
      cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2667
0
      features->interp_filter == SWITCHABLE) {
2668
0
    const FRAME_UPDATE_TYPE update_type =
2669
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2670
2671
0
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2672
0
      int sum = 0;
2673
0
      int j;
2674
0
      int left = 1536;
2675
2676
0
      for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2677
0
        sum += cpi->td.counts->switchable_interp[i][j];
2678
0
      }
2679
2680
0
      for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2681
0
        int update_interpfilter_frameprobs = 1;
2682
0
        const int new_prob =
2683
0
            sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2684
0
                : (j ? 0 : 1536);
2685
#if CONFIG_FPMT_TEST
2686
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2687
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2688
              0) {
2689
            int prob = (temp_frame_probs_simulation
2690
                            ->switchable_interp_probs[update_type][i][j] +
2691
                        new_prob) >>
2692
                       1;
2693
            left -= prob;
2694
            if (j == 0) prob += left;
2695
            temp_frame_probs_simulation
2696
                ->switchable_interp_probs[update_type][i][j] = prob;
2697
            // Copy temp_frame_probs_simulation to temp_frame_probs
2698
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2699
                 update_type_idx++) {
2700
              temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2701
                  temp_frame_probs_simulation
2702
                      ->switchable_interp_probs[update_type_idx][i][j];
2703
            }
2704
          }
2705
          update_interpfilter_frameprobs = 0;
2706
        }
2707
#endif  // CONFIG_FPMT_TEST
2708
        // Track the frame probabilities of parallel encode frames to update
2709
        // during postencode stage.
2710
0
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2711
0
          update_interpfilter_frameprobs = 0;
2712
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2713
0
              .switchable_interp_probs[update_type][i][j] = new_prob;
2714
0
        }
2715
0
        if (update_interpfilter_frameprobs) {
2716
0
          int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2717
0
                      new_prob) >>
2718
0
                     1;
2719
0
          left -= prob;
2720
0
          if (j == 0) prob += left;
2721
0
          frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2722
0
        }
2723
0
      }
2724
0
    }
2725
0
  }
2726
0
  if (hash_table_created) {
2727
0
    av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2728
0
  }
2729
0
}
2730
2731
/*!\brief Setup reference frame buffers and encode a frame
2732
 *
2733
 * \ingroup high_level_algo
2734
 * \callgraph
2735
 * \callergraph
2736
 *
2737
 * \param[in]    cpi    Top-level encoder structure
2738
 */
2739
0
void av1_encode_frame(AV1_COMP *cpi) {
2740
0
  AV1_COMMON *const cm = &cpi->common;
2741
0
  CurrentFrame *const current_frame = &cm->current_frame;
2742
0
  FeatureFlags *const features = &cm->features;
2743
0
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2744
0
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2745
  // Indicates whether or not to use a default reduced set for ext-tx
2746
  // rather than the potential full set of 16 transforms
2747
0
  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2748
2749
  // Make sure segment_id is no larger than last_active_segid.
2750
0
  if (cm->seg.enabled && cm->seg.update_map) {
2751
0
    const int mi_rows = cm->mi_params.mi_rows;
2752
0
    const int mi_cols = cm->mi_params.mi_cols;
2753
0
    const int last_active_segid = cm->seg.last_active_segid;
2754
0
    uint8_t *map = cpi->enc_seg.map;
2755
0
    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2756
0
      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2757
0
        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2758
0
      }
2759
0
      map += mi_cols;
2760
0
    }
2761
0
  }
2762
2763
0
  av1_setup_frame_buf_refs(cm);
2764
0
  enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2765
0
                         cm->cur_frame->ref_display_order_hint,
2766
0
                         cm->current_frame.display_order_hint);
2767
0
  set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2768
0
                     cpi->ref_frame_flags);
2769
0
  av1_setup_frame_sign_bias(cm);
2770
2771
  // If global motion is enabled, then every buffer which is used as either
2772
  // a source or a ref frame should have an image pyramid allocated.
2773
  // Check here so that issues can be caught early in debug mode
2774
0
#if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2775
0
  if (cpi->alloc_pyramid) {
2776
0
    assert(cpi->source->y_pyramid);
2777
0
    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2778
0
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2779
0
      if (buf != NULL) {
2780
0
        assert(buf->buf.y_pyramid);
2781
0
      }
2782
0
    }
2783
0
  }
2784
0
#endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2785
2786
#if CONFIG_MISMATCH_DEBUG
2787
  mismatch_reset_frame(av1_num_planes(cm));
2788
#endif
2789
2790
0
  rdc->newmv_or_intra_blocks = 0;
2791
0
  cpi->palette_pixel_num = 0;
2792
2793
0
  if (cpi->sf.hl_sf.frame_parameter_update ||
2794
0
      cpi->sf.rt_sf.use_comp_ref_nonrd) {
2795
0
    if (frame_is_intra_only(cm))
2796
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2797
0
    else
2798
0
      current_frame->reference_mode = REFERENCE_MODE_SELECT;
2799
2800
0
    features->interp_filter = SWITCHABLE;
2801
0
    if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2802
2803
0
    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2804
0
        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2805
2806
0
    rdc->compound_ref_used_flag = 0;
2807
0
    rdc->skip_mode_used_flag = 0;
2808
2809
0
    encode_frame_internal(cpi);
2810
2811
0
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2812
      // Use a flag that includes 4x4 blocks
2813
0
      if (rdc->compound_ref_used_flag == 0) {
2814
0
        current_frame->reference_mode = SINGLE_REFERENCE;
2815
#if CONFIG_ENTROPY_STATS
2816
        av1_zero(cpi->td.counts->comp_inter);
2817
#endif  // CONFIG_ENTROPY_STATS
2818
0
      }
2819
0
    }
2820
    // Re-check on the skip mode status as reference mode may have been
2821
    // changed.
2822
0
    SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2823
0
    if (frame_is_intra_only(cm) ||
2824
0
        current_frame->reference_mode == SINGLE_REFERENCE) {
2825
0
      skip_mode_info->skip_mode_allowed = 0;
2826
0
      skip_mode_info->skip_mode_flag = 0;
2827
0
    }
2828
0
    if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2829
0
      skip_mode_info->skip_mode_flag = 0;
2830
2831
0
    if (!cm->tiles.large_scale) {
2832
0
      if (features->tx_mode == TX_MODE_SELECT &&
2833
0
          cpi->td.mb.txfm_search_info.txb_split_count == 0)
2834
0
        features->tx_mode = TX_MODE_LARGEST;
2835
0
    }
2836
0
  } else {
2837
    // This is needed if real-time speed setting is changed on the fly
2838
    // from one using compound prediction to one using single reference.
2839
0
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2840
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2841
0
    encode_frame_internal(cpi);
2842
0
  }
2843
0
}