Coverage Report

Created: 2026-05-24 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/encoder/encodeframe.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <float.h>
14
#include <math.h>
15
#include <stdbool.h>
16
#include <stdio.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/binary_codes_writer.h"
24
#include "aom_ports/mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_util/aom_pthread.h"
27
#if CONFIG_MISMATCH_DEBUG
28
#include "aom_util/debug_util.h"
29
#endif  // CONFIG_MISMATCH_DEBUG
30
31
#include "av1/common/cfl.h"
32
#include "av1/common/common.h"
33
#include "av1/common/common_data.h"
34
#include "av1/common/entropy.h"
35
#include "av1/common/entropymode.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mv.h"
38
#include "av1/common/mvref_common.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconintra.h"
42
#include "av1/common/reconinter.h"
43
#include "av1/common/seg_common.h"
44
#include "av1/common/tile_common.h"
45
#include "av1/common/warped_motion.h"
46
47
#include "av1/encoder/allintra_vis.h"
48
#include "av1/encoder/aq_complexity.h"
49
#include "av1/encoder/aq_cyclicrefresh.h"
50
#include "av1/encoder/aq_variance.h"
51
#include "av1/encoder/av1_quantize.h"
52
#include "av1/encoder/global_motion_facade.h"
53
#include "av1/encoder/encodeframe.h"
54
#include "av1/encoder/encodeframe_utils.h"
55
#include "av1/encoder/encodemb.h"
56
#include "av1/encoder/encodemv.h"
57
#include "av1/encoder/encodetxb.h"
58
#include "av1/encoder/ethread.h"
59
#include "av1/encoder/extend.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/ml.h"
62
#include "av1/encoder/motion_search_facade.h"
63
#include "av1/encoder/partition_strategy.h"
64
#if !CONFIG_REALTIME_ONLY
65
#include "av1/encoder/partition_model_weights.h"
66
#endif
67
#include "av1/encoder/partition_search.h"
68
#include "av1/encoder/rd.h"
69
#include "av1/encoder/rdopt.h"
70
#include "av1/encoder/reconinter_enc.h"
71
#include "av1/encoder/segmentation.h"
72
#include "av1/encoder/tokenize.h"
73
#include "av1/encoder/tpl_model.h"
74
#include "av1/encoder/var_based_part.h"
75
76
#if CONFIG_TUNE_VMAF
77
#include "av1/encoder/tune_vmaf.h"
78
#endif
79
80
/*!\cond */
81
// This is used as a reference when computing the source variance for the
82
//  purposes of activity masking.
83
// Eventually this should be replaced by custom no-reference routines,
84
//  which will be faster.
85
static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94
  128, 128, 128, 128, 128, 128, 128, 128
95
};
96
97
#if CONFIG_AV1_HIGHBITDEPTH
98
static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
99
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
107
  128, 128, 128, 128, 128, 128, 128, 128
108
};
109
110
static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
126
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
127
};
128
129
static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
130
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
148
  128 * 16, 128 * 16
149
};
150
#endif  // CONFIG_AV1_HIGHBITDEPTH
151
/*!\endcond */
152
153
// For the given bit depth, returns a constant array used to assist the
154
// calculation of source block variance, which will then be used to decide
155
// adaptive quantizers.
156
0
static const uint8_t *get_var_offs(int use_hbd, int bd) {
157
0
#if CONFIG_AV1_HIGHBITDEPTH
158
0
  if (use_hbd) {
159
0
    assert(bd == 8 || bd == 10 || bd == 12);
160
0
    const int off_index = (bd - 8) >> 1;
161
0
    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
162
0
                                                AV1_HIGH_VAR_OFFS_10,
163
0
                                                AV1_HIGH_VAR_OFFS_12 };
164
0
    return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
165
0
  }
166
#else
167
  (void)use_hbd;
168
  (void)bd;
169
  assert(!use_hbd);
170
#endif
171
0
  assert(bd == 8);
172
0
  return AV1_VAR_OFFS;
173
0
}
174
175
0
void av1_init_rtc_counters(MACROBLOCK *const x) {
176
0
  av1_init_cyclic_refresh_counters(x);
177
0
  x->cnt_zeromv = 0;
178
0
  x->sb_col_scroll = 0;
179
0
  x->sb_row_scroll = 0;
180
0
}
181
182
0
void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
183
0
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
184
0
    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
185
0
  cpi->rc.cnt_zeromv += x->cnt_zeromv;
186
0
  cpi->rc.num_col_blscroll_last_tl0 += x->sb_col_scroll;
187
0
  cpi->rc.num_row_blscroll_last_tl0 += x->sb_row_scroll;
188
0
}
189
190
unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
191
                                       const MACROBLOCKD *xd,
192
                                       const struct buf_2d *ref,
193
                                       BLOCK_SIZE bsize, int plane,
194
0
                                       int use_hbd) {
195
0
  const int subsampling_x = xd->plane[plane].subsampling_x;
196
0
  const int subsampling_y = xd->plane[plane].subsampling_y;
197
0
  const BLOCK_SIZE plane_bsize =
198
0
      get_plane_block_size(bsize, subsampling_x, subsampling_y);
199
0
  unsigned int sse;
200
0
  const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
201
0
      ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
202
0
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
203
0
}
204
205
unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
206
                                              const MACROBLOCKD *xd,
207
                                              const struct buf_2d *ref,
208
0
                                              BLOCK_SIZE bsize, int plane) {
209
0
  const int use_hbd = is_cur_buf_hbd(xd);
210
0
  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
211
0
}
212
213
void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
214
                          int mi_row, int mi_col, const int num_planes,
215
0
                          BLOCK_SIZE bsize) {
216
  // Set current frame pointer.
217
0
  x->e_mbd.cur_buf = src;
218
219
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
220
  // the static analysis warnings.
221
0
  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
222
0
    const int is_uv = i > 0;
223
0
    setup_pred_plane(
224
0
        &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
225
0
        src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
226
0
        x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
227
0
  }
228
0
}
229
230
#if !CONFIG_REALTIME_ONLY
231
/*!\brief Assigns different quantization parameters to each superblock
232
 * based on statistics relevant to the selected delta-q mode (variance).
233
 * This is the non-rd version.
234
 *
235
 * \param[in]     cpi         Top level encoder instance structure
236
 * \param[in,out] td          Thread data structure
237
 * \param[in,out] x           Superblock level data for this block.
238
 * \param[in]     tile_info   Tile information / identification
239
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
240
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
241
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
242
 *
243
 * \remark No return value but updates superblock and thread data
244
 * related to the q / q delta to be used.
245
 */
246
static inline void setup_delta_q_nonrd(AV1_COMP *const cpi, ThreadData *td,
247
                                       MACROBLOCK *const x,
248
                                       const TileInfo *const tile_info,
249
0
                                       int mi_row, int mi_col, int num_planes) {
250
0
  AV1_COMMON *const cm = &cpi->common;
251
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
252
0
  assert(delta_q_info->delta_q_present_flag);
253
254
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
255
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
256
257
0
  const int delta_q_res = delta_q_info->delta_q_res;
258
0
  int current_qindex = cm->quant_params.base_qindex;
259
260
0
  if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
261
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
262
0
  }
263
264
0
  x->rdmult_cur_qindex = current_qindex;
265
0
  MACROBLOCKD *const xd = &x->e_mbd;
266
0
  current_qindex = av1_adjust_q_from_delta_q_res(
267
0
      delta_q_res, xd->current_base_qindex, current_qindex);
268
269
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
270
0
  x->rdmult_delta_qindex = x->delta_qindex;
271
272
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
273
0
  xd->mi[0]->current_qindex = current_qindex;
274
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
275
276
  // keep track of any non-zero delta-q used
277
0
  td->deltaq_used |= (x->delta_qindex != 0);
278
0
}
279
280
/*!\brief Assigns different quantization parameters to each superblock
281
 * based on statistics relevant to the selected delta-q mode (TPL weight,
282
 * variance, HDR, etc).
283
 *
284
 * \ingroup tpl_modelling
285
 *
286
 * \param[in]     cpi         Top level encoder instance structure
287
 * \param[in,out] td          Thread data structure
288
 * \param[in,out] x           Superblock level data for this block.
289
 * \param[in]     tile_info   Tile information / identification
290
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
291
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
292
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
293
 *
294
 * \remark No return value but updates superblock and thread data
295
 * related to the q / q delta to be used.
296
 */
297
static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
298
                                 MACROBLOCK *const x,
299
                                 const TileInfo *const tile_info, int mi_row,
300
0
                                 int mi_col, int num_planes) {
301
0
  AV1_COMMON *const cm = &cpi->common;
302
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
303
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
304
305
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
306
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
307
308
0
  const int delta_q_res = delta_q_info->delta_q_res;
309
0
  int current_qindex = cm->quant_params.base_qindex;
310
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
311
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
312
0
  const int sb_cols =
313
0
      CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
314
0
  const int sb_index = sb_row * sb_cols + sb_col;
315
0
  if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
316
0
                                   DUCKY_ENCODE_FRAME_MODE_QINDEX) {
317
0
    current_qindex =
318
0
        cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
319
0
  } else if (cpi->ext_ratectrl.ready &&
320
0
             (cpi->ext_ratectrl.funcs.rc_type & AOM_RC_QP) != 0 &&
321
0
             cpi->ext_ratectrl.funcs.get_encodeframe_decision != NULL &&
322
0
             cpi->ext_ratectrl.sb_params_list != NULL) {
323
0
    if (cpi->ext_ratectrl.use_delta_q) {
324
0
      const int q_index = cpi->ext_ratectrl.sb_params_list[sb_index].q_index;
325
0
      if (q_index != AOM_DEFAULT_Q) {
326
0
        current_qindex = q_index;
327
0
      }
328
0
    }
329
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
330
0
    if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
331
0
      const int block_wavelet_energy_level =
332
0
          av1_block_wavelet_energy_level(cpi, x, sb_size);
333
0
      x->sb_energy_level = block_wavelet_energy_level;
334
0
      current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
335
0
          cpi, block_wavelet_energy_level);
336
0
    } else {
337
0
      const int block_var_level = av1_log_block_var(cpi, x, sb_size);
338
0
      x->sb_energy_level = block_var_level;
339
0
      current_qindex =
340
0
          av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
341
0
    }
342
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
343
0
             cpi->oxcf.algo_cfg.enable_tpl_model) {
344
    // Setup deltaq based on tpl stats
345
0
    current_qindex =
346
0
        av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
347
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
348
0
    current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
349
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
350
0
    current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
351
0
  } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
352
0
    current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
353
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
354
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
355
0
  }
356
357
0
  x->rdmult_cur_qindex = current_qindex;
358
0
  MACROBLOCKD *const xd = &x->e_mbd;
359
0
  const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
360
0
      delta_q_res, xd->current_base_qindex, current_qindex);
361
0
  if (cpi->use_ducky_encode) {
362
0
    assert(adjusted_qindex == current_qindex);
363
0
  }
364
0
  current_qindex = adjusted_qindex;
365
366
0
  x->delta_qindex = cm->delta_q_info.delta_q_present_flag
367
0
                        ? current_qindex - cm->quant_params.base_qindex
368
0
                        : 0;
369
0
  x->rdmult_delta_qindex = current_qindex - cm->quant_params.base_qindex;
370
371
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
372
0
  xd->mi[0]->current_qindex = cm->delta_q_info.delta_q_present_flag
373
0
                                  ? current_qindex
374
0
                                  : cm->quant_params.base_qindex;
375
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
376
377
  // keep track of any non-zero delta-q used
378
0
  td->deltaq_used |= (x->delta_qindex != 0);
379
380
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode &&
381
0
      cm->delta_q_info.delta_q_present_flag) {
382
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
383
0
    const int lfmask = ~(delta_lf_res - 1);
384
0
    const int delta_lf_from_base =
385
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
386
0
    const int8_t delta_lf =
387
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
388
0
    const int frame_lf_count =
389
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
390
0
    const int mib_size = cm->seq_params->mib_size;
391
392
    // pre-set the delta lf for loop filter. Note that this value is set
393
    // before mi is assigned for each block in current superblock
394
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
395
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
396
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
397
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
398
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
399
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
400
0
        }
401
0
      }
402
0
    }
403
0
  }
404
0
}
405
406
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
407
0
                                 int mi_col) {
408
0
  const AV1_COMMON *cm = &cpi->common;
409
0
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
410
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
411
0
  MACROBLOCK *x = &td->mb;
412
0
  const int frame_idx = cpi->gf_frame_index;
413
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
414
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
415
416
0
  av1_zero(x->tpl_keep_ref_frame);
417
418
0
  if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
419
0
  if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
420
0
  if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
421
422
0
  const int is_overlay =
423
0
      cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
424
0
  if (is_overlay) {
425
0
    memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
426
0
    return;
427
0
  }
428
429
0
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
430
0
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
431
0
  const int tpl_stride = tpl_frame->stride;
432
0
  int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
433
0
  const int step = 1 << block_mis_log2;
434
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
435
436
0
  const int mi_row_end =
437
0
      AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
438
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
439
0
  const int mi_col_sr =
440
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
441
0
  const int mi_col_end_sr =
442
0
      AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
443
0
                                  cm->superres_scale_denominator),
444
0
             mi_cols_sr);
445
0
  const int row_step = step;
446
0
  const int col_step_sr =
447
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
448
0
  for (int row = mi_row; row < mi_row_end; row += row_step) {
449
0
    for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
450
0
      const TplDepStats *this_stats =
451
0
          &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
452
0
      int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
453
      // Find the winner ref frame idx for the current block
454
0
      int64_t best_inter_cost = this_stats->pred_error[0];
455
0
      int best_rf_idx = 0;
456
0
      for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
457
0
        if ((this_stats->pred_error[idx] < best_inter_cost) &&
458
0
            (this_stats->pred_error[idx] != 0)) {
459
0
          best_inter_cost = this_stats->pred_error[idx];
460
0
          best_rf_idx = idx;
461
0
        }
462
0
      }
463
      // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
464
      // LAST_FRAME.
465
0
      tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
466
0
                                    this_stats->pred_error[LAST_FRAME - 1];
467
468
0
      for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
469
0
        inter_cost[rf_idx] += tpl_pred_error[rf_idx];
470
0
    }
471
0
  }
472
473
0
  int rank_index[INTER_REFS_PER_FRAME - 1];
474
0
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
475
0
    rank_index[idx] = idx + 1;
476
0
    for (int i = idx; i > 0; --i) {
477
0
      if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
478
0
        const int tmp = rank_index[i - 1];
479
0
        rank_index[i - 1] = rank_index[i];
480
0
        rank_index[i] = tmp;
481
0
      }
482
0
    }
483
0
  }
484
485
0
  x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
486
0
  x->tpl_keep_ref_frame[LAST_FRAME] = 1;
487
488
0
  int cutoff_ref = 0;
489
0
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
490
0
    x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
491
0
    if (idx > 2) {
492
0
      if (!cutoff_ref) {
493
        // If the predictive coding gains are smaller than the previous more
494
        // relevant frame over certain amount, discard this frame and all the
495
        // frames afterwards.
496
0
        if (llabs(inter_cost[rank_index[idx]]) <
497
0
                llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
498
0
            inter_cost[rank_index[idx]] == 0)
499
0
          cutoff_ref = 1;
500
0
      }
501
502
0
      if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
503
0
    }
504
0
  }
505
0
}
506
507
static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
508
0
                                           int mi_row, int mi_col) {
509
0
  const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
510
0
  const int orig_rdmult = cpi->rd.RDMULT;
511
512
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
513
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
514
0
  const int gf_group_index = cpi->gf_frame_index;
515
0
  if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
516
0
      cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
517
0
      cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
518
0
    const int dr =
519
0
        av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
520
0
    x->rdmult = dr;
521
0
  }
522
0
}
523
#endif  // !CONFIG_REALTIME_ONLY
524
525
#if CONFIG_RT_ML_PARTITIONING
526
// Get a prediction(stored in x->est_pred) for the whole superblock.
527
static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
528
                               MACROBLOCK *x, int mi_row, int mi_col) {
529
  AV1_COMMON *const cm = &cpi->common;
530
  const int is_key_frame = frame_is_intra_only(cm);
531
  MACROBLOCKD *xd = &x->e_mbd;
532
533
  // TODO(kyslov) Extend to 128x128
534
  assert(cm->seq_params->sb_size == BLOCK_64X64);
535
536
  av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
537
538
  if (!is_key_frame) {
539
    MB_MODE_INFO *mi = xd->mi[0];
540
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
541
542
    assert(yv12 != NULL);
543
544
    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
545
                         get_ref_scale_factors(cm, LAST_FRAME), 1);
546
    mi->ref_frame[0] = LAST_FRAME;
547
    mi->ref_frame[1] = NONE;
548
    mi->bsize = BLOCK_64X64;
549
    mi->mv[0].as_int = 0;
550
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
551
552
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
553
554
    xd->plane[0].dst.buf = x->est_pred;
555
    xd->plane[0].dst.stride = 64;
556
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
557
  } else {
558
#if CONFIG_AV1_HIGHBITDEPTH
559
    switch (xd->bd) {
560
      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
561
      case 10:
562
        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
563
        break;
564
      case 12:
565
        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
566
        break;
567
    }
568
#else
569
    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
570
#endif  // CONFIG_VP9_HIGHBITDEPTH
571
  }
572
}
573
#endif  // CONFIG_RT_ML_PARTITIONING
574
575
0
#define AVG_CDF_WEIGHT_LEFT 3
576
0
#define AVG_CDF_WEIGHT_TOP_RIGHT 1
577
578
/*!\brief Encode a superblock (minimal RD search involved)
579
 *
580
 * \ingroup partition_search
581
 * Encodes the superblock by a pre-determined partition pattern, only minor
582
 * rd-based searches are allowed to adjust the initial pattern. It is only used
583
 * by realtime encoding.
584
 */
585
static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
586
                                   TileDataEnc *tile_data, TokenExtra **tp,
587
                                   const int mi_row, const int mi_col,
588
0
                                   const int seg_skip) {
589
0
  AV1_COMMON *const cm = &cpi->common;
590
0
  MACROBLOCK *const x = &td->mb;
591
0
  const SPEED_FEATURES *const sf = &cpi->sf;
592
0
  const TileInfo *const tile_info = &tile_data->tile_info;
593
0
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
594
0
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
595
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
596
0
  PC_TREE *const pc_root = td->pc_root;
597
598
0
#if !CONFIG_REALTIME_ONLY
599
0
  if (cm->delta_q_info.delta_q_present_flag) {
600
0
    const int num_planes = av1_num_planes(cm);
601
602
0
    setup_delta_q_nonrd(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
603
0
  }
604
0
#endif
605
#if CONFIG_RT_ML_PARTITIONING
606
  if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
607
    RD_STATS dummy_rdc;
608
    get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
609
    av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
610
                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
611
    return;
612
  }
613
#endif
614
  // Set the partition
615
0
  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
616
0
      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
617
0
       (!frame_is_intra_only(cm) &&
618
0
        (!cpi->ppi->use_svc ||
619
0
         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
620
    // set a fixed-size partition
621
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
622
0
    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
623
0
    if (sf->rt_sf.use_fast_fixed_part &&
624
0
        x->content_state_sb.source_sad_nonrd < kLowSad) {
625
0
      bsize_select = cm->seq_params->sb_size;
626
0
    }
627
0
    if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
628
0
        cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
629
0
      bsize_select = cm->seq_params->sb_size;
630
0
      x->force_zeromv_skip_for_sb = 1;
631
0
    }
632
0
    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
633
0
    if (x->content_state_sb.source_sad_nonrd > kZeroSad)
634
0
      x->force_color_check_block_level = 1;
635
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
636
0
  } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
637
    // set a variance-based partition
638
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
639
0
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
640
0
  }
641
0
  assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
642
0
         sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
643
0
  set_cb_offsets(td->mb.cb_offset, 0, 0);
644
645
  // Initialize the flag to skip cdef to 1.
646
0
  if (sf->rt_sf.skip_cdef_sb) {
647
0
    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
648
    // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
649
    // "blocks".
650
0
    for (int r = 0; r < block64_in_sb; ++r) {
651
0
      for (int c = 0; c < block64_in_sb; ++c) {
652
0
        const int idx_in_sb =
653
0
            r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
654
0
        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
655
0
      }
656
0
    }
657
0
  }
658
659
#if CONFIG_COLLECT_COMPONENT_TIMING
660
  start_timing(cpi, nonrd_use_partition_time);
661
#endif
662
0
  av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
663
0
                          pc_root);
664
#if CONFIG_COLLECT_COMPONENT_TIMING
665
  end_timing(cpi, nonrd_use_partition_time);
666
#endif
667
0
}
668
669
// This function initializes the stats for encode_rd_sb.
670
static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
671
                                     const TileDataEnc *tile_data,
672
                                     SIMPLE_MOTION_DATA_TREE *sms_root,
673
                                     RD_STATS *rd_cost, int mi_row, int mi_col,
674
                                     int gather_tpl_data) {
675
  const AV1_COMMON *cm = &cpi->common;
676
  const TileInfo *tile_info = &tile_data->tile_info;
677
  MACROBLOCK *x = &td->mb;
678
679
  const SPEED_FEATURES *sf = &cpi->sf;
680
  const int use_simple_motion_search =
681
      (sf->part_sf.simple_motion_search_split ||
682
       sf->part_sf.simple_motion_search_prune_rect ||
683
       sf->part_sf.simple_motion_search_early_term_none ||
684
       sf->part_sf.ml_early_term_after_part_split_level) &&
685
      !frame_is_intra_only(cm);
686
  if (use_simple_motion_search) {
687
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
688
                                             mi_row, mi_col);
689
  }
690
691
#if !CONFIG_REALTIME_ONLY
692
  if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
693
        cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
694
    init_ref_frame_space(cpi, td, mi_row, mi_col);
695
    x->sb_energy_level = 0;
696
    x->part_search_info.cnn_output_valid = 0;
697
    if (gather_tpl_data) {
698
      if (cpi->cb_delta_rdmult_enabled) {
699
        const int num_planes = av1_num_planes(cm);
700
        const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
701
        setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
702
        av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
703
      }
704
705
      // TODO(jingning): revisit this function.
706
      if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
707
        adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
708
      }
709
    }
710
  }
711
#else
712
  (void)tile_info;
713
  (void)mi_row;
714
  (void)mi_col;
715
  (void)gather_tpl_data;
716
#endif
717
718
  x->reuse_inter_pred = false;
719
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
720
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
721
  av1_zero(x->picked_ref_frames_mask);
722
  av1_invalid_rd_stats(rd_cost);
723
}
724
725
#if !CONFIG_REALTIME_ONLY
726
static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
727
                                        const TileDataEnc *tile_data,
728
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
729
                                        RD_STATS *rd_cost, int mi_row,
730
0
                                        int mi_col, int delta_qp_ofs) {
731
0
  AV1_COMMON *const cm = &cpi->common;
732
0
  MACROBLOCK *const x = &td->mb;
733
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
734
0
  const TileInfo *tile_info = &tile_data->tile_info;
735
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
736
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
737
0
  assert(delta_q_info->delta_q_present_flag);
738
0
  const int delta_q_res = delta_q_info->delta_q_res;
739
740
0
  const SPEED_FEATURES *sf = &cpi->sf;
741
0
  const int use_simple_motion_search =
742
0
      (sf->part_sf.simple_motion_search_split ||
743
0
       sf->part_sf.simple_motion_search_prune_rect ||
744
0
       sf->part_sf.simple_motion_search_early_term_none ||
745
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
746
0
      !frame_is_intra_only(cm);
747
0
  if (use_simple_motion_search) {
748
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
749
0
                                             mi_row, mi_col);
750
0
  }
751
752
0
  int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
753
754
0
  MACROBLOCKD *const xd = &x->e_mbd;
755
0
  current_qindex = av1_adjust_q_from_delta_q_res(
756
0
      delta_q_res, xd->current_base_qindex, current_qindex);
757
758
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
759
760
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
761
0
  xd->mi[0]->current_qindex = current_qindex;
762
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
763
764
  // keep track of any non-zero delta-q used
765
0
  td->deltaq_used |= (x->delta_qindex != 0);
766
767
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
768
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
769
0
    const int lfmask = ~(delta_lf_res - 1);
770
0
    const int delta_lf_from_base =
771
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
772
0
    const int8_t delta_lf =
773
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
774
0
    const int frame_lf_count =
775
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
776
0
    const int mib_size = cm->seq_params->mib_size;
777
778
    // pre-set the delta lf for loop filter. Note that this value is set
779
    // before mi is assigned for each block in current superblock
780
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
781
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
782
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
783
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
784
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
785
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
786
0
        }
787
0
      }
788
0
    }
789
0
  }
790
791
0
  x->reuse_inter_pred = false;
792
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
793
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
794
0
  av1_zero(x->picked_ref_frames_mask);
795
0
  av1_invalid_rd_stats(rd_cost);
796
0
}
797
798
static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
799
                       TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
800
                       int mi_col, BLOCK_SIZE bsize,
801
                       SIMPLE_MOTION_DATA_TREE *sms_tree,
802
0
                       SB_FIRST_PASS_STATS *sb_org_stats) {
803
0
  AV1_COMMON *const cm = &cpi->common;
804
0
  MACROBLOCK *const x = &td->mb;
805
0
  RD_STATS rdc_winner, cur_rdc;
806
0
  av1_invalid_rd_stats(&rdc_winner);
807
808
0
  int best_qindex = td->mb.rdmult_delta_qindex;
809
0
  const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
810
0
  const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
811
0
  const int step = cm->delta_q_info.delta_q_res;
812
813
0
  for (int sweep_qp_delta = start; sweep_qp_delta <= end;
814
0
       sweep_qp_delta += step) {
815
0
    sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
816
0
                                mi_col, sweep_qp_delta);
817
818
0
    const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
819
0
    const int backup_current_qindex =
820
0
        cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
821
822
0
    av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
823
0
    av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
824
0
    cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
825
826
0
    td->pc_root = av1_alloc_pc_tree_node(bsize);
827
0
    if (!td->pc_root)
828
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
829
0
                         "Failed to allocate PC_TREE");
830
0
    av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
831
0
                          &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
832
0
                          SB_DRY_PASS, NULL);
833
834
0
    if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
835
0
        (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
836
0
         rdc_winner.rdcost == cur_rdc.rdcost)) {
837
0
      rdc_winner = cur_rdc;
838
0
      best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
839
0
    }
840
0
  }
841
842
0
  return best_qindex;
843
0
}
844
#endif  //! CONFIG_REALTIME_ONLY
845
846
/*!\brief Encode a superblock (RD-search-based)
847
 *
848
 * \ingroup partition_search
849
 * Conducts partition search for a superblock, based on rate-distortion costs,
850
 * from scratch or adjusting from a pre-calculated partition pattern.
851
 */
852
static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
853
                                TileDataEnc *tile_data, TokenExtra **tp,
854
                                const int mi_row, const int mi_col,
855
0
                                const int seg_skip) {
856
0
  AV1_COMMON *const cm = &cpi->common;
857
0
  MACROBLOCK *const x = &td->mb;
858
0
  MACROBLOCKD *const xd = &x->e_mbd;
859
0
  const SPEED_FEATURES *const sf = &cpi->sf;
860
0
  const TileInfo *const tile_info = &tile_data->tile_info;
861
0
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
862
0
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
863
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
864
0
  const int num_planes = av1_num_planes(cm);
865
0
  int dummy_rate;
866
0
  int64_t dummy_dist;
867
0
  RD_STATS dummy_rdc;
868
0
  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
869
870
#if CONFIG_REALTIME_ONLY
871
  (void)seg_skip;
872
#endif  // CONFIG_REALTIME_ONLY
873
874
0
  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
875
0
                    1);
876
877
  // Encode the superblock
878
0
  if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
879
    // partition search starting from a variance-based partition
880
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
881
0
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
882
883
#if CONFIG_COLLECT_COMPONENT_TIMING
884
    start_timing(cpi, rd_use_partition_time);
885
#endif
886
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
887
0
    if (!td->pc_root)
888
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
889
0
                         "Failed to allocate PC_TREE");
890
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
891
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
892
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
893
0
                               sf->part_sf.partition_search_type);
894
0
    td->pc_root = NULL;
895
#if CONFIG_COLLECT_COMPONENT_TIMING
896
    end_timing(cpi, rd_use_partition_time);
897
#endif
898
0
  }
899
0
#if !CONFIG_REALTIME_ONLY
900
0
  else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
901
    // partition search by adjusting a fixed-size partition
902
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
903
0
    const BLOCK_SIZE bsize =
904
0
        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
905
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
906
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
907
0
    if (!td->pc_root)
908
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
909
0
                         "Failed to allocate PC_TREE");
910
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
911
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
912
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
913
0
                               sf->part_sf.partition_search_type);
914
0
    td->pc_root = NULL;
915
0
  } else {
916
    // The most exhaustive recursive partition search
917
0
    SuperBlockEnc *sb_enc = &x->sb_enc;
918
    // No stats for overlay frames. Exclude key frame.
919
0
    av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
920
921
    // Reset the tree for simple motion search data
922
0
    av1_reset_simple_motion_tree_partition(sms_root, sb_size);
923
924
#if CONFIG_COLLECT_COMPONENT_TIMING
925
    start_timing(cpi, rd_pick_partition_time);
926
#endif
927
928
    // Estimate the maximum square partition block size, which will be used
929
    // as the starting block size for partitioning the sb
930
0
    set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
931
932
    // The superblock can be searched only once, or twice consecutively for
933
    // better quality. Note that the meaning of passes here is different from
934
    // the general concept of 1-pass/2-pass encoders.
935
0
    const int num_passes =
936
0
        cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
937
938
0
    if (cpi->oxcf.sb_qp_sweep &&
939
0
        !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
940
0
          cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
941
0
        cm->delta_q_info.delta_q_present_flag) {
942
0
      AOM_CHECK_MEM_ERROR(
943
0
          x->e_mbd.error_info, td->mb.sb_stats_cache,
944
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
945
0
      av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
946
0
                          mi_col);
947
0
      assert(x->rdmult_delta_qindex == x->delta_qindex);
948
949
0
      const int best_qp_diff =
950
0
          sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
951
0
                      td->mb.sb_stats_cache) -
952
0
          x->rdmult_delta_qindex;
953
954
0
      sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
955
0
                                  mi_row, mi_col, best_qp_diff);
956
957
0
      const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
958
0
      const int backup_current_qindex =
959
0
          cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
960
961
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
962
0
      av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
963
0
                           mi_col);
964
965
0
      cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
966
0
          backup_current_qindex;
967
0
      aom_free(td->mb.sb_stats_cache);
968
0
      td->mb.sb_stats_cache = NULL;
969
0
    }
970
0
    if (num_passes == 1) {
971
#if CONFIG_PARTITION_SEARCH_ORDER
972
      if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
973
        av1_reset_part_sf(&cpi->sf.part_sf);
974
        av1_reset_sf_for_ext_part(cpi);
975
        RD_STATS this_rdc;
976
        av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
977
                                mi_col, sb_size, &this_rdc);
978
      } else {
979
        td->pc_root = av1_alloc_pc_tree_node(sb_size);
980
        if (!td->pc_root)
981
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
982
                             "Failed to allocate PC_TREE");
983
        av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
984
                              &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
985
                              NULL, SB_SINGLE_PASS, NULL);
986
      }
987
#else
988
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
989
0
      if (!td->pc_root)
990
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
991
0
                           "Failed to allocate PC_TREE");
992
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
993
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
994
0
                            SB_SINGLE_PASS, NULL);
995
0
#endif  // CONFIG_PARTITION_SEARCH_ORDER
996
0
    } else {
997
      // First pass
998
0
      AOM_CHECK_MEM_ERROR(
999
0
          x->e_mbd.error_info, td->mb.sb_fp_stats,
1000
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
1001
0
      av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1002
0
                          mi_col);
1003
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1004
0
      if (!td->pc_root)
1005
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1006
0
                           "Failed to allocate PC_TREE");
1007
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1008
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1009
0
                            SB_DRY_PASS, NULL);
1010
1011
      // Second pass
1012
0
      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
1013
0
                        mi_col, 0);
1014
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
1015
0
      av1_reset_simple_motion_tree_partition(sms_root, sb_size);
1016
1017
0
      av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1018
0
                           mi_col);
1019
1020
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1021
0
      if (!td->pc_root)
1022
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1023
0
                           "Failed to allocate PC_TREE");
1024
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1025
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1026
0
                            SB_WET_PASS, NULL);
1027
0
      aom_free(td->mb.sb_fp_stats);
1028
0
      td->mb.sb_fp_stats = NULL;
1029
0
    }
1030
1031
    // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
1032
0
    sb_enc->tpl_data_count = 0;
1033
#if CONFIG_COLLECT_COMPONENT_TIMING
1034
    end_timing(cpi, rd_pick_partition_time);
1035
#endif
1036
0
  }
1037
0
#endif  // !CONFIG_REALTIME_ONLY
1038
1039
  // Update the inter rd model
1040
  // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
1041
0
  if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
1042
0
      cm->tiles.cols == 1 && cm->tiles.rows == 1) {
1043
0
    av1_inter_mode_data_fit(tile_data, x->rdmult);
1044
0
  }
1045
0
}
1046
1047
// Check if the cost update of symbols mode, coeff and dv are tile or off.
1048
static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
1049
0
    const AV1_COMP *const cpi) {
1050
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
1051
1052
0
  return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1053
0
          inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1054
0
          cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1055
0
}
1056
1057
// When row-mt is enabled and cost update frequencies are set to off/tile,
1058
// processing of current SB can start even before processing of top-right SB
1059
// is finished. This function checks if it is sufficient to wait for top SB
1060
// to finish processing before current SB starts processing.
1061
0
static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
1062
0
  const MODE mode = cpi->oxcf.mode;
1063
0
  if (mode == GOOD) return 0;
1064
1065
0
  if (mode == ALLINTRA)
1066
0
    return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
1067
0
  else if (mode == REALTIME)
1068
0
    return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
1069
0
            cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1070
0
  else
1071
0
    return 0;
1072
0
}
1073
1074
/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1075
 *
1076
 * \ingroup partition_search
1077
 * \callgraph
1078
 * \callergraph
1079
 */
1080
static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1081
0
                                         int mi_col) {
1082
0
  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1083
1084
0
  const AV1_COMMON *const cm = &cpi->common;
1085
0
  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1086
0
                                   ? (cm->seq_params->mib_size >> 1)
1087
0
                                   : cm->seq_params->mib_size;
1088
0
  const int num_blk_64x64_cols =
1089
0
      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1090
0
  const int num_blk_64x64_rows =
1091
0
      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1092
0
  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1093
0
  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1094
0
  uint64_t curr_sb_sad = UINT64_MAX;
1095
  // Avoid the border as sad_blk_64x64 may not be set for the border
1096
  // in the scene detection.
1097
0
  if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1098
0
      (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1099
0
    return curr_sb_sad;
1100
0
  }
1101
0
  const uint64_t *const src_sad_blk_64x64_data =
1102
0
      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1103
0
                              blk_64x64_row_index * num_blk_64x64_cols];
1104
0
  if (cm->seq_params->sb_size == BLOCK_128X128) {
1105
    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1106
    // superblock
1107
0
    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1108
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
1109
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1110
0
  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1111
0
    curr_sb_sad = src_sad_blk_64x64_data[0];
1112
0
  }
1113
0
  return curr_sb_sad;
1114
0
}
1115
1116
/*!\brief Determine whether grading content can be skipped based on sad stat
1117
 *
1118
 * \ingroup partition_search
1119
 * \callgraph
1120
 * \callergraph
1121
 */
1122
static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1123
                                              MACROBLOCK *const x, int mi_row,
1124
0
                                              int mi_col) {
1125
0
  if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1126
0
    return true;
1127
0
  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1128
0
  if (curr_sb_sad == UINT64_MAX) return true;
1129
0
  if (curr_sb_sad == 0) {
1130
0
    x->content_state_sb.source_sad_nonrd = kZeroSad;
1131
0
    return false;
1132
0
  }
1133
0
  AV1_COMMON *const cm = &cpi->common;
1134
0
  bool do_calc_src_content = true;
1135
1136
0
  if (cpi->oxcf.speed < 9) return do_calc_src_content;
1137
1138
  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1139
0
  if (AOMMIN(cm->width, cm->height) < 360) {
1140
    // Derive Average 64x64 block source SAD from SB source SAD
1141
0
    const uint64_t avg_64x64_blk_sad =
1142
0
        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1143
0
                                                   : curr_sb_sad;
1144
1145
    // The threshold is determined based on kLowSad and kHighSad threshold and
1146
    // test results.
1147
0
    uint64_t thresh_low = 15000;
1148
0
    uint64_t thresh_high = 40000;
1149
1150
0
    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1151
0
      thresh_low = thresh_low << 1;
1152
0
      thresh_high = thresh_high << 1;
1153
0
    }
1154
1155
0
    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1156
0
      do_calc_src_content = false;
1157
      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1158
      // to RTC rd path.
1159
0
      x->content_state_sb.source_sad_nonrd = kMedSad;
1160
0
    }
1161
0
  }
1162
1163
0
  return do_calc_src_content;
1164
0
}
1165
1166
/*!\brief Determine whether grading content is needed based on sf and frame stat
1167
 *
1168
 * \ingroup partition_search
1169
 * \callgraph
1170
 * \callergraph
1171
 */
1172
// TODO(any): consolidate sfs to make interface cleaner
1173
static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1174
                                           TileDataEnc *tile_data, int mi_row,
1175
0
                                           int mi_col) {
1176
0
  AV1_COMMON *const cm = &cpi->common;
1177
0
  if (cm->current_frame.frame_type == KEY_FRAME ||
1178
0
      (cpi->ppi->use_svc &&
1179
0
       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1180
0
    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1181
0
    assert(x->content_state_sb.source_sad_rd == kMedSad);
1182
0
    return;
1183
0
  }
1184
0
  bool calc_src_content = false;
1185
1186
0
  if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1187
0
    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1188
0
      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1189
0
    } else {
1190
0
      x->content_state_sb.source_sad_nonrd = kZeroSad;
1191
0
    }
1192
0
  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1193
0
             (cm->width * cm->height <= 352 * 288)) {
1194
0
    if (cpi->rc.frame_source_sad > 0)
1195
0
      calc_src_content = true;
1196
0
    else
1197
0
      x->content_state_sb.source_sad_rd = kZeroSad;
1198
0
  }
1199
0
  if (calc_src_content)
1200
0
    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1201
0
}
1202
1203
/*!\brief Encode a superblock row by breaking it into superblocks
1204
 *
1205
 * \ingroup partition_search
1206
 * \callgraph
1207
 * \callergraph
1208
 * Do partition and mode search for an sb row: one row of superblocks filling up
1209
 * the width of the current tile.
1210
 */
1211
static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1212
                                 TileDataEnc *tile_data, int mi_row,
1213
0
                                 TokenExtra **tp) {
1214
0
  AV1_COMMON *const cm = &cpi->common;
1215
0
  const TileInfo *const tile_info = &tile_data->tile_info;
1216
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1217
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1218
0
  AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1219
0
  bool row_mt_enabled = mt_info->row_mt_enabled;
1220
0
  MACROBLOCK *const x = &td->mb;
1221
0
  MACROBLOCKD *const xd = &x->e_mbd;
1222
0
  const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1223
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1224
0
  const int mib_size = cm->seq_params->mib_size;
1225
0
  const int mib_size_log2 = cm->seq_params->mib_size_log2;
1226
0
  const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1227
0
  const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1228
1229
#if CONFIG_COLLECT_COMPONENT_TIMING
1230
  start_timing(cpi, encode_sb_row_time);
1231
#endif
1232
1233
  // Initialize the left context for the new SB row
1234
0
  av1_zero_left_context(xd);
1235
1236
  // Reset delta for quantizer and loof filters at the beginning of every tile
1237
0
  if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1238
0
    if (cpi->cb_delta_rdmult_enabled)
1239
0
      xd->current_base_qindex = cm->quant_params.base_qindex;
1240
0
    if (cm->delta_q_info.delta_lf_present_flag) {
1241
0
      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1242
0
    }
1243
0
  }
1244
1245
0
  reset_thresh_freq_fact(x);
1246
1247
  // Code each SB in the row
1248
0
  for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1249
0
       mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1250
    // In realtime/allintra mode and when frequency of cost updates is off/tile,
1251
    // wait for the top superblock to finish encoding. Otherwise, wait for the
1252
    // top-right superblock to finish encoding.
1253
0
    enc_row_mt->sync_read_ptr(
1254
0
        row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1255
1256
0
#if CONFIG_MULTITHREAD
1257
0
    if (row_mt_enabled) {
1258
0
      pthread_mutex_lock(enc_row_mt->mutex_);
1259
0
      const bool row_mt_exit = enc_row_mt->row_mt_exit;
1260
0
      pthread_mutex_unlock(enc_row_mt->mutex_);
1261
      // Exit in case any worker has encountered an error.
1262
0
      if (row_mt_exit) return;
1263
0
    }
1264
0
#endif
1265
1266
0
    const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1267
0
    if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1268
0
      if ((tile_info->mi_col_start == mi_col)) {
1269
        // restore frame context at the 1st column sb
1270
0
        *xd->tile_ctx = *x->row_ctx;
1271
0
      } else {
1272
        // update context
1273
0
        int wt_left = AVG_CDF_WEIGHT_LEFT;
1274
0
        int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1275
0
        if (tile_info->mi_col_end > (mi_col + mib_size))
1276
0
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1277
0
                              wt_left, wt_tr);
1278
0
        else
1279
0
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1280
0
                              wt_left, wt_tr);
1281
0
      }
1282
0
    }
1283
1284
    // Update the rate cost tables for some symbols
1285
0
    av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1286
1287
    // Reset color coding related parameters
1288
0
    av1_zero(x->color_sensitivity_sb);
1289
0
    av1_zero(x->color_sensitivity_sb_g);
1290
0
    av1_zero(x->color_sensitivity_sb_alt);
1291
0
    av1_zero(x->color_sensitivity);
1292
0
    x->content_state_sb.source_sad_nonrd = kMedSad;
1293
0
    x->content_state_sb.source_sad_rd = kMedSad;
1294
0
    x->content_state_sb.lighting_change = 0;
1295
0
    x->content_state_sb.low_sumdiff = 0;
1296
0
    x->force_zeromv_skip_for_sb = 0;
1297
0
    x->sb_me_block = 0;
1298
0
    x->sb_me_partition = 0;
1299
0
    x->sb_me_mv.as_int = 0;
1300
0
    x->sb_force_fixed_part = 1;
1301
0
    x->color_palette_thresh = 64;
1302
0
    x->force_color_check_block_level = 0;
1303
0
    x->nonrd_prune_ref_frame_search =
1304
0
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1305
1306
0
    if (cpi->oxcf.mode == ALLINTRA) {
1307
0
      x->intra_sb_rdmult_modifier = 128;
1308
0
    }
1309
1310
0
    xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1311
0
    x->source_variance = UINT_MAX;
1312
0
    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1313
1314
    // Get segment id and skip flag
1315
0
    const struct segmentation *const seg = &cm->seg;
1316
0
    int seg_skip = 0;
1317
0
    if (seg->enabled) {
1318
0
      const uint8_t *const map =
1319
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1320
0
      const uint8_t segment_id =
1321
0
          map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1322
0
              : 0;
1323
0
      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1324
0
    }
1325
1326
0
    produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1327
1328
0
    init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1329
0
                                        sb_size);
1330
1331
    // Grade the temporal variation of the sb, the grade will be used to decide
1332
    // fast mode search strategy for coding blocks
1333
0
    if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1334
1335
    // encode the superblock
1336
0
    if (use_nonrd_mode) {
1337
0
      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1338
0
    } else {
1339
0
      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1340
0
    }
1341
1342
    // Update the top-right context in row_mt coding
1343
0
    if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1344
0
      if (sb_cols_in_tile == 1)
1345
0
        x->row_ctx[0] = *xd->tile_ctx;
1346
0
      else if (sb_col_in_tile >= 1)
1347
0
        x->row_ctx[sb_col_in_tile - 1] = *xd->tile_ctx;
1348
0
    }
1349
0
    enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1350
0
                               sb_cols_in_tile);
1351
0
  }
1352
1353
#if CONFIG_COLLECT_COMPONENT_TIMING
1354
  end_timing(cpi, encode_sb_row_time);
1355
#endif
1356
0
}
1357
1358
0
static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1359
0
  AV1_COMMON *const cm = &cpi->common;
1360
0
  const int num_planes = av1_num_planes(cm);
1361
0
  MACROBLOCK *const x = &cpi->td.mb;
1362
0
  MACROBLOCKD *const xd = &x->e_mbd;
1363
1364
  // Copy data over into macro block data structures.
1365
0
  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1366
0
                       cm->seq_params->sb_size);
1367
1368
0
  av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1369
0
                         cm->seq_params->subsampling_y, num_planes);
1370
0
}
1371
1372
0
void av1_alloc_tile_data(AV1_COMP *cpi) {
1373
0
  AV1_COMMON *const cm = &cpi->common;
1374
0
  const int tile_cols = cm->tiles.cols;
1375
0
  const int tile_rows = cm->tiles.rows;
1376
1377
0
  av1_row_mt_mem_dealloc(cpi);
1378
1379
0
  aom_free(cpi->tile_data);
1380
0
  cpi->allocated_tiles = 0;
1381
1382
0
  CHECK_MEM_ERROR(
1383
0
      cm, cpi->tile_data,
1384
0
      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1385
1386
0
  cpi->allocated_tiles = tile_cols * tile_rows;
1387
0
  for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1388
0
    for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1389
0
      const int tile_index = tile_row * tile_cols + tile_col;
1390
0
      TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1391
0
      av1_zero(this_tile->row_mt_sync);
1392
0
      this_tile->row_ctx = NULL;
1393
0
    }
1394
0
  }
1395
0
}
1396
1397
0
void av1_init_tile_data(AV1_COMP *cpi) {
1398
0
  AV1_COMMON *const cm = &cpi->common;
1399
0
  const int num_planes = av1_num_planes(cm);
1400
0
  const int tile_cols = cm->tiles.cols;
1401
0
  const int tile_rows = cm->tiles.rows;
1402
0
  int tile_col, tile_row;
1403
0
  TokenInfo *const token_info = &cpi->token_info;
1404
0
  TokenExtra *pre_tok = token_info->tile_tok[0][0];
1405
0
  TokenList *tplist = token_info->tplist[0][0];
1406
0
  unsigned int tile_tok = 0;
1407
0
  int tplist_count = 0;
1408
1409
0
  if (!is_stat_generation_stage(cpi) &&
1410
0
      cm->features.allow_screen_content_tools) {
1411
    // Number of tokens for which token info needs to be allocated.
1412
0
    unsigned int tokens_required =
1413
0
        get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1414
0
                        MAX_SB_SIZE_LOG2, num_planes);
1415
    // Allocate/reallocate memory for token related info if the number of tokens
1416
    // required is more than the number of tokens already allocated. This could
1417
    // occur in case of the following:
1418
    // 1) If the memory is not yet allocated
1419
    // 2) If the frame dimensions have changed
1420
0
    const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1421
0
    if (realloc_tokens) {
1422
0
      free_token_info(token_info);
1423
0
      alloc_token_info(cm, token_info, tokens_required);
1424
0
      pre_tok = token_info->tile_tok[0][0];
1425
0
      tplist = token_info->tplist[0][0];
1426
0
    }
1427
0
  }
1428
1429
0
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1430
0
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1431
0
      TileDataEnc *const tile_data =
1432
0
          &cpi->tile_data[tile_row * tile_cols + tile_col];
1433
0
      TileInfo *const tile_info = &tile_data->tile_info;
1434
0
      av1_tile_init(tile_info, cm, tile_row, tile_col);
1435
0
      tile_data->firstpass_top_mv = kZeroMv;
1436
0
      tile_data->abs_sum_level = 0;
1437
1438
0
      if (is_token_info_allocated(token_info)) {
1439
0
        token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1440
0
        pre_tok = token_info->tile_tok[tile_row][tile_col];
1441
0
        tile_tok = allocated_tokens(
1442
0
            tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1443
0
            num_planes);
1444
0
        token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1445
0
        tplist = token_info->tplist[tile_row][tile_col];
1446
0
        tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1447
0
      }
1448
0
      tile_data->allow_update_cdf = !cm->tiles.large_scale;
1449
0
      tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1450
0
                                    !cm->features.disable_cdf_update &&
1451
0
                                    !delay_wait_for_top_right_sb(cpi);
1452
0
      tile_data->tctx = *cm->fc;
1453
0
    }
1454
0
  }
1455
0
}
1456
1457
// Populate the start palette token info prior to encoding an SB row.
1458
static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1459
                                   int tile_row, int tile_col, int mi_row,
1460
0
                                   TokenExtra **tp) {
1461
0
  const TokenInfo *token_info = &cpi->token_info;
1462
0
  if (!is_token_info_allocated(token_info)) return;
1463
1464
0
  const AV1_COMMON *cm = &cpi->common;
1465
0
  const int num_planes = av1_num_planes(cm);
1466
0
  TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1467
0
  const int sb_row_in_tile =
1468
0
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1469
1470
0
  get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1471
0
                cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1472
0
  assert(tplist != NULL);
1473
0
  tplist[sb_row_in_tile].start = *tp;
1474
0
}
1475
1476
// Populate the token count after encoding an SB row.
1477
static inline void populate_token_count(AV1_COMP *cpi,
1478
                                        const TileInfo *tile_info, int tile_row,
1479
                                        int tile_col, int mi_row,
1480
0
                                        TokenExtra *tok) {
1481
0
  const TokenInfo *token_info = &cpi->token_info;
1482
0
  if (!is_token_info_allocated(token_info)) return;
1483
1484
0
  const AV1_COMMON *cm = &cpi->common;
1485
0
  const int num_planes = av1_num_planes(cm);
1486
0
  TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1487
0
  const int sb_row_in_tile =
1488
0
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1489
0
  const int tile_mb_cols =
1490
0
      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1491
0
  const int num_mb_rows_in_sb =
1492
0
      ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1493
0
  tplist[sb_row_in_tile].count =
1494
0
      (unsigned int)(tok - tplist[sb_row_in_tile].start);
1495
1496
0
  assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1497
0
         get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1498
0
                         cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1499
0
                         num_planes));
1500
1501
0
  (void)num_planes;
1502
0
  (void)tile_mb_cols;
1503
0
  (void)num_mb_rows_in_sb;
1504
0
}
1505
1506
/*!\brief Encode a superblock row
1507
 *
1508
 * \ingroup partition_search
1509
 */
1510
void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1511
0
                       int tile_col, int mi_row) {
1512
0
  AV1_COMMON *const cm = &cpi->common;
1513
0
  const int tile_cols = cm->tiles.cols;
1514
0
  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1515
0
  const TileInfo *const tile_info = &this_tile->tile_info;
1516
0
  TokenExtra *tok = NULL;
1517
1518
0
  get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1519
1520
0
  encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1521
1522
0
  populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1523
0
}
1524
1525
/*!\brief Encode a tile
1526
 *
1527
 * \ingroup partition_search
1528
 */
1529
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1530
0
                     int tile_col) {
1531
0
  AV1_COMMON *const cm = &cpi->common;
1532
0
  TileDataEnc *const this_tile =
1533
0
      &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1534
0
  const TileInfo *const tile_info = &this_tile->tile_info;
1535
1536
0
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1537
1538
0
  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1539
0
                         tile_info->mi_col_end, tile_row);
1540
0
  av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1541
0
                         &td->mb.e_mbd);
1542
1543
0
#if !CONFIG_REALTIME_ONLY
1544
0
  if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1545
0
    cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1546
0
#endif
1547
1548
0
  if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1549
0
    av1_crc32c_calculator_init(
1550
0
        &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1551
0
  }
1552
1553
0
  for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1554
0
       mi_row += cm->seq_params->mib_size) {
1555
0
    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1556
0
  }
1557
0
  this_tile->abs_sum_level = td->abs_sum_level;
1558
0
}
1559
1560
/*!\brief Break one frame into tiles and encode the tiles
1561
 *
1562
 * \ingroup partition_search
1563
 *
1564
 * \param[in]    cpi    Top-level encoder structure
1565
 */
1566
0
static inline void encode_tiles(AV1_COMP *cpi) {
1567
0
  AV1_COMMON *const cm = &cpi->common;
1568
0
  const int tile_cols = cm->tiles.cols;
1569
0
  const int tile_rows = cm->tiles.rows;
1570
0
  int tile_col, tile_row;
1571
1572
0
  MACROBLOCK *const mb = &cpi->td.mb;
1573
0
  assert(IMPLIES(cpi->tile_data == NULL, cpi->allocated_tiles == 0));
1574
0
  if (cpi->allocated_tiles != tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1575
1576
0
  av1_init_tile_data(cpi);
1577
0
  av1_alloc_mb_data(cpi, mb);
1578
1579
0
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1580
0
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1581
0
      TileDataEnc *const this_tile =
1582
0
          &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1583
0
      cpi->td.intrabc_used = 0;
1584
0
      cpi->td.deltaq_used = 0;
1585
0
      cpi->td.abs_sum_level = 0;
1586
0
      cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1587
0
      cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1588
0
      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1589
0
      cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1590
0
      av1_init_rtc_counters(&cpi->td.mb);
1591
0
      cpi->td.mb.palette_pixels = 0;
1592
0
      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1593
0
      if (!frame_is_intra_only(&cpi->common))
1594
0
        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1595
0
      cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1596
0
      cpi->intrabc_used |= cpi->td.intrabc_used;
1597
0
      cpi->deltaq_used |= cpi->td.deltaq_used;
1598
0
    }
1599
0
  }
1600
1601
0
  av1_dealloc_mb_data(mb, av1_num_planes(cm));
1602
0
}
1603
1604
// Set the relative distance of a reference frame w.r.t. current frame
1605
static inline void set_rel_frame_dist(
1606
    const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1607
0
    const int ref_frame_flags) {
1608
0
  MV_REFERENCE_FRAME ref_frame;
1609
0
  int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1610
0
  ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1611
0
  ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1612
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1613
0
    ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1614
0
    if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1615
0
      int dist = av1_encoder_get_relative_dist(
1616
0
          cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1617
0
          cm->current_frame.display_order_hint);
1618
0
      ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1619
      // Get the nearest ref_frame in the past
1620
0
      if (abs(dist) < min_past_dist && dist < 0) {
1621
0
        ref_frame_dist_info->nearest_past_ref = ref_frame;
1622
0
        min_past_dist = abs(dist);
1623
0
      }
1624
      // Get the nearest ref_frame in the future
1625
0
      if (dist < min_future_dist && dist > 0) {
1626
0
        ref_frame_dist_info->nearest_future_ref = ref_frame;
1627
0
        min_future_dist = dist;
1628
0
      }
1629
0
    }
1630
0
  }
1631
0
}
1632
1633
0
static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1634
0
  assert(!frame_is_intra_only(cm));
1635
1636
0
  int one_sided_refs = 1;
1637
0
  const int cur_display_order_hint = cm->current_frame.display_order_hint;
1638
0
  for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1639
0
    const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1640
0
    if (buf == NULL) continue;
1641
0
    if (av1_encoder_get_relative_dist(buf->display_order_hint,
1642
0
                                      cur_display_order_hint) > 0) {
1643
0
      one_sided_refs = 0;  // bwd reference
1644
0
      break;
1645
0
    }
1646
0
  }
1647
0
  return one_sided_refs;
1648
0
}
1649
1650
static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1651
0
                                             int ref_order_hint[2]) {
1652
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1653
0
  ref_order_hint[0] = ref_order_hint[1] = 0;
1654
0
  if (!skip_mode_info->skip_mode_allowed) return;
1655
1656
0
  const RefCntBuffer *const buf_0 =
1657
0
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1658
0
  const RefCntBuffer *const buf_1 =
1659
0
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1660
0
  assert(buf_0 != NULL && buf_1 != NULL);
1661
1662
0
  ref_order_hint[0] = buf_0->order_hint;
1663
0
  ref_order_hint[1] = buf_1->order_hint;
1664
0
}
1665
1666
0
static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1667
0
  AV1_COMMON *const cm = &cpi->common;
1668
1669
0
  av1_setup_skip_mode_allowed(cm);
1670
0
  if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1671
1672
  // Turn off skip mode if the temporal distances of the reference pair to the
1673
  // current frame are different by more than 1 frame.
1674
0
  const int cur_offset = (int)cm->current_frame.order_hint;
1675
0
  int ref_offset[2];
1676
0
  get_skip_mode_ref_offsets(cm, ref_offset);
1677
0
  const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1678
0
                                            cur_offset, ref_offset[0]);
1679
0
  const int cur_to_ref1 = abs(get_relative_dist(
1680
0
      &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1681
0
  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1682
1683
  // High Latency: Turn off skip mode if all refs are fwd.
1684
0
  if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1685
1686
0
  const int ref_frame[2] = {
1687
0
    cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1688
0
    cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1689
0
  };
1690
0
  if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1691
0
      !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1692
0
    return 0;
1693
1694
0
  return 1;
1695
0
}
1696
1697
static inline void set_default_interp_skip_flags(
1698
0
    const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1699
0
  const int num_planes = av1_num_planes(cm);
1700
0
  interp_search_flags->default_interp_skip_flags =
1701
0
      (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1702
0
                        : INTERP_SKIP_LUMA_SKIP_CHROMA;
1703
0
}
1704
1705
/*!\cond */
1706
typedef struct {
1707
  // Scoring function for usefulness of references (the lower score, the more
1708
  // useful)
1709
  int score;
1710
  // Index in the reference buffer
1711
  int index;
1712
} RefScoreData;
1713
/*!\endcond */
1714
1715
// Comparison function to sort reference frames in ascending score order.
1716
0
static int compare_score_data_asc(const void *a, const void *b) {
1717
0
  const RefScoreData *ra = (const RefScoreData *)a;
1718
0
  const RefScoreData *rb = (const RefScoreData *)b;
1719
1720
0
  const int score_diff = ra->score - rb->score;
1721
0
  if (score_diff != 0) return score_diff;
1722
1723
0
  return ra->index - rb->index;
1724
0
}
1725
1726
// Determines whether a given reference frame is "good" based on temporal
1727
// distance and base_qindex. The "good" reference frames are not allowed to be
1728
// pruned by the speed feature "prune_single_ref" and "prune_comp_ref_frames"
1729
// at block level.
1730
0
static inline void setup_keep_ref_frame_mask(AV1_COMP *cpi) {
1731
0
  const int prune_single_ref = cpi->sf.inter_sf.prune_single_ref;
1732
0
  const int prune_comp_ref_frames = cpi->sf.inter_sf.prune_comp_ref_frames;
1733
0
  const AV1_COMMON *const cm = &cpi->common;
1734
0
  cpi->keep_single_ref_frame_mask = 0;
1735
0
  cpi->keep_comp_ref_frame_mask = 0;
1736
0
  if (frame_is_intra_only(cm)) return;
1737
1738
0
  RefScoreData ref_score_data[INTER_REFS_PER_FRAME];
1739
0
  for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
1740
0
    ref_score_data[i].score = INT_MAX;
1741
0
    ref_score_data[i].index = i;
1742
0
  }
1743
1744
  // Calculate score for each reference frame based on relative distance to
1745
  // the current frame and its base_qindex. A lower score means that the
1746
  // reference is potentially more useful.
1747
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
1748
0
       ++ref_frame) {
1749
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1750
0
      const RefFrameDistanceInfo *const ref_frame_dist_info =
1751
0
          &cpi->ref_frame_dist_info;
1752
0
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
1753
0
      ref_score_data[ref_frame - LAST_FRAME].score =
1754
0
          abs(ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME]) +
1755
0
          buf->base_qindex;
1756
0
    }
1757
0
  }
1758
1759
0
  qsort(ref_score_data, INTER_REFS_PER_FRAME, sizeof(ref_score_data[0]),
1760
0
        compare_score_data_asc);
1761
1762
  // Decide the number of reference frames for which pruning via the speed
1763
  // feature prune_single_ref is disallowed.
1764
  // prune_single_ref = 0 => None of the 7 reference frames are pruned.
1765
  // prune_single_ref = 1 => The best 5 reference frames are not pruned.
1766
  // prune_single_ref = 2 => The best 3 reference frames are not pruned.
1767
  // prune_single_ref = 3, 4 => All the 7 references are allowed to be pruned.
1768
0
  static const int num_single_ref_to_keep_lookup[5] = { INTER_REFS_PER_FRAME, 5,
1769
0
                                                        3, 0, 0 };
1770
0
  assert(prune_single_ref >= 0 && prune_single_ref <= 4);
1771
0
  const int num_single_ref_to_keep =
1772
0
      num_single_ref_to_keep_lookup[prune_single_ref];
1773
0
  for (int i = 0; i < num_single_ref_to_keep; ++i) {
1774
0
    const int idx = ref_score_data[i].index;
1775
0
    cpi->keep_single_ref_frame_mask |= 1 << idx;
1776
0
  }
1777
1778
  // Decide the number of reference frame pairs for which pruning via the speed
1779
  // feature "prune_comp_ref_frames" is disallowed.
1780
  // prune_comp_ref_frames = 0    => None of the allowed reference frame pairs
1781
  //                                 are pruned.
1782
  // prune_comp_ref_frames = 1    => The best 3 reference frame pairs are not
1783
  //                                 allowed to be pruned, i.e, reference frame
1784
  //                                 pairs with rank (1, 2), (1, 3), (2, 3) are
1785
  //                                 not  pruned.
1786
  // prune_comp_ref_frames = 2, 3 => All the reference frame pairs are allowed
1787
  //                                 to be pruned.
1788
0
  static const int num_comp_ref_to_keep_lookup[4] = { INTER_REFS_PER_FRAME, 3,
1789
0
                                                      0, 0 };
1790
0
  assert(prune_comp_ref_frames >= 0 && prune_comp_ref_frames <= 3);
1791
0
  const int num_comp_ref_to_keep =
1792
0
      num_comp_ref_to_keep_lookup[prune_comp_ref_frames];
1793
0
  for (int i = 0; i < num_comp_ref_to_keep; ++i) {
1794
0
    const int idx = ref_score_data[i].index;
1795
0
    cpi->keep_comp_ref_frame_mask |= 1 << idx;
1796
0
  }
1797
0
}
1798
1799
0
static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1800
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1801
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
1802
0
      cpi->all_one_sided_refs) {
1803
    // Disable all compound references
1804
0
    cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1805
0
  } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1806
0
             cpi->sf.inter_sf.selective_ref_frame >= 2) {
1807
0
    AV1_COMMON *const cm = &cpi->common;
1808
0
    const int cur_frame_display_order_hint =
1809
0
        cm->current_frame.display_order_hint;
1810
0
    unsigned int *ref_display_order_hint =
1811
0
        cm->cur_frame->ref_display_order_hint;
1812
0
    const int arf2_dist = av1_encoder_get_relative_dist(
1813
0
        ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1814
0
        cur_frame_display_order_hint);
1815
0
    const int bwd_dist = av1_encoder_get_relative_dist(
1816
0
        ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1817
0
        cur_frame_display_order_hint);
1818
1819
0
    for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1820
0
      MV_REFERENCE_FRAME rf[2];
1821
0
      av1_set_ref_frame(rf, ref_idx);
1822
0
      if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1823
0
          !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1824
0
        continue;
1825
0
      }
1826
1827
0
      if (!cpi->all_one_sided_refs) {
1828
0
        int ref_dist[2];
1829
0
        for (int i = 0; i < 2; ++i) {
1830
0
          ref_dist[i] = av1_encoder_get_relative_dist(
1831
0
              ref_display_order_hint[rf[i] - LAST_FRAME],
1832
0
              cur_frame_display_order_hint);
1833
0
        }
1834
1835
        // One-sided compound is used only when all reference frames are
1836
        // one-sided.
1837
0
        if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1838
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1839
0
        }
1840
0
      }
1841
1842
0
      if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1843
0
          (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1844
0
          (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1845
        // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1846
0
        if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1847
          // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1848
          // reference to the current frame than ALTREF2_FRAME
1849
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1850
0
        }
1851
0
      }
1852
0
    }
1853
0
  }
1854
0
}
1855
1856
0
static int allow_deltaq_mode(AV1_COMP *cpi) {
1857
0
#if !CONFIG_REALTIME_ONLY
1858
0
  AV1_COMMON *const cm = &cpi->common;
1859
0
  BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1860
0
  int sbs_wide = mi_size_wide[sb_size];
1861
0
  int sbs_high = mi_size_high[sb_size];
1862
1863
0
  int64_t delta_rdcost = 0;
1864
0
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1865
0
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1866
0
      int64_t this_delta_rdcost = 0;
1867
0
      av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1868
0
                                     mi_row, mi_col);
1869
0
      delta_rdcost += this_delta_rdcost;
1870
0
    }
1871
0
  }
1872
0
  return delta_rdcost < 0;
1873
#else
1874
  (void)cpi;
1875
  return 1;
1876
#endif  // !CONFIG_REALTIME_ONLY
1877
0
}
1878
1879
0
static inline int disable_deltaq_for_intl_arfs(const AV1_COMP *cpi) {
1880
0
  if (cpi->oxcf.mode == GOOD && is_stat_consumption_stage_twopass(cpi) &&
1881
0
      cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
1882
0
      cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
1883
0
      !cpi->common.seg.enabled && !cpi->roi.enabled && !cpi->oxcf.sb_qp_sweep &&
1884
0
      !cpi->use_ducky_encode) {
1885
0
    return 1;
1886
0
  }
1887
0
  return 0;
1888
0
}
1889
1890
0
static inline int enable_delta_rdmult(const AV1_COMP *cpi) {
1891
0
  if (!disable_deltaq_for_intl_arfs(cpi))
1892
0
    return cpi->common.delta_q_info.delta_q_present_flag;
1893
1894
0
  const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1895
0
  return gf_group->update_type[cpi->gf_frame_index] != LF_UPDATE;
1896
0
}
1897
1898
0
static inline int enable_delta_q(const AV1_COMP *cpi) {
1899
0
  const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1900
0
  if (!disable_deltaq_for_intl_arfs(cpi))
1901
0
    return gf_group->update_type[cpi->gf_frame_index] != LF_UPDATE;
1902
1903
0
  return cpi->common.current_frame.pyramid_level <= 1;
1904
0
}
1905
1906
0
#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1907
#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1908
1909
// Populates block level thresholds for force zeromv-skip decision
1910
0
static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1911
0
  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1912
1913
  // Threshold for forcing zeromv-skip decision is as below:
1914
  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1915
  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1916
  // allowing slightly higher error for smaller blocks.
1917
  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1918
  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1919
  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1920
  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1921
  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1922
  // small blocks, the same is clipped to 4.
1923
0
  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1924
0
  const int num_128x128_pix =
1925
0
      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1926
1927
0
  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1928
0
    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1929
1930
    // Calculate the threshold for zeromv-skip decision based on area of the
1931
    // partition
1932
0
    unsigned int thresh_exit_part_blk =
1933
0
        (unsigned int)(thresh_exit_128x128_part *
1934
0
                           sqrt((double)num_block_pix / num_128x128_pix) +
1935
0
                       0.5);
1936
0
    thresh_exit_part_blk = AOMMIN(
1937
0
        thresh_exit_part_blk,
1938
0
        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1939
0
    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1940
0
  }
1941
0
}
1942
1943
0
static void free_block_hash_buffers(uint32_t *block_hash_values[2]) {
1944
0
  for (int j = 0; j < 2; ++j) {
1945
0
    aom_free(block_hash_values[j]);
1946
0
  }
1947
0
}
1948
1949
/*!\brief Determines delta_q_res value for Variance Boost modulation.
1950
 */
1951
0
static int aom_get_variance_boost_delta_q_res(int qindex) {
1952
  // Signaling delta_q changes across superblocks comes with inherent syntax
1953
  // element overhead, which adds up to total payload size. This overhead
1954
  // becomes proportionally bigger the higher the base qindex (i.e. lower
1955
  // quality, smaller file size), so a balance needs to be struck.
1956
  // - Smaller delta_q_res: more granular delta_q control, more bits spent
1957
  // signaling deltas.
1958
  // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1959
  // deltas.
1960
  //
1961
  // At the same time, SB qindex fluctuations become larger the higher
1962
  // the base qindex (between lowest and highest-variance regions):
1963
  // - For QP 5: up to 8 qindexes
1964
  // - For QP 60: up to 52 qindexes
1965
  //
1966
  // With these factors in mind, it was found that the best strategy that
1967
  // maximizes quality per bitrate is by having very finely-grained delta_q
1968
  // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1969
  // and progressively making them coarser as base qindex increases (to reduce
1970
  // total signaling overhead).
1971
0
  int delta_q_res = 1;
1972
1973
0
  if (qindex >= 160) {
1974
0
    delta_q_res = 8;
1975
0
  } else if (qindex >= 120) {
1976
0
    delta_q_res = 4;
1977
0
  } else if (qindex >= 80) {
1978
0
    delta_q_res = 2;
1979
0
  } else {
1980
0
    delta_q_res = 1;
1981
0
  }
1982
1983
0
  return delta_q_res;
1984
0
}
1985
1986
#if !CONFIG_REALTIME_ONLY
1987
0
static float get_thresh_based_on_q(int qindex, int speed) {
1988
0
  const float min_threshold_arr[3] = { 0.084f, 0.087f, 0.126f };
1989
0
  const float max_threshold_arr[3] = { 0.140f, 0.150f, 0.182f };
1990
0
  const int idx = (speed >= 3) ? 2 : (speed - 1);
1991
0
  const float min_thresh = min_threshold_arr[idx];
1992
0
  const float max_thresh = max_threshold_arr[idx];
1993
0
  const float thresh = min_thresh + (max_thresh - min_thresh) *
1994
0
                                        ((float)MAXQ - (float)qindex) /
1995
0
                                        (float)(MAXQ - MINQ);
1996
0
  return thresh;
1997
0
}
1998
1999
0
static int get_mv_err(MV cur_mv, MV ref_mv) {
2000
0
  const MV diff = { cur_mv.row - ref_mv.row, cur_mv.col - ref_mv.col };
2001
0
  const MV abs_diff = { abs(diff.row), abs(diff.col) };
2002
0
  const int mv_err = (abs_diff.row + abs_diff.col);
2003
0
  return mv_err;
2004
0
}
2005
2006
0
static void check_mv_err_and_update(MV cur_mv, MV ref_mv, int *best_mv_err) {
2007
0
  const int mv_err = get_mv_err(cur_mv, ref_mv);
2008
0
  *best_mv_err = AOMMIN(mv_err, *best_mv_err);
2009
0
}
2010
2011
static int is_inside_frame_border(int mi_row, int mi_col, int row_offset,
2012
                                  int col_offset, int num_mi_rows,
2013
0
                                  int num_mi_cols) {
2014
0
  if (mi_row + row_offset < 0 || mi_row + row_offset >= num_mi_rows ||
2015
0
      mi_col + col_offset < 0 || mi_col + col_offset >= num_mi_cols)
2016
0
    return 0;
2017
2018
0
  return 1;
2019
0
}
2020
2021
// Compute the minimum MV error between current MV and spatial MV predictors.
2022
static int get_spatial_mvpred_err(AV1_COMMON *cm, TplParams *const tpl_data,
2023
                                  int tpl_idx, int mi_row, int mi_col,
2024
                                  int ref_idx, int_mv cur_mv, int allow_hp,
2025
0
                                  int is_integer) {
2026
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2027
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
2028
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
2029
2030
0
  int mv_err = INT32_MAX;
2031
0
  const int step = 1 << block_mis_log2;
2032
0
  const int mv_pred_pos_in_mis[8][2] = {
2033
0
    { -step, 0 },     { 0, -step },     { -step, step },  { -step, -step },
2034
0
    { -2 * step, 0 }, { 0, -2 * step }, { -3 * step, 0 }, { 0, -3 * step },
2035
0
  };
2036
2037
0
  for (int i = 0; i < 8; i++) {
2038
0
    int row_offset = mv_pred_pos_in_mis[i][0];
2039
0
    int col_offset = mv_pred_pos_in_mis[i][1];
2040
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
2041
0
                                tpl_frame->mi_rows, tpl_frame->mi_cols)) {
2042
0
      continue;
2043
0
    }
2044
2045
0
    const TplDepStats *tpl_stats =
2046
0
        &tpl_ptr[av1_tpl_ptr_pos(mi_row + row_offset, mi_col + col_offset,
2047
0
                                 tpl_frame->stride, block_mis_log2)];
2048
0
    int_mv this_refmv = tpl_stats->mv[ref_idx];
2049
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
2050
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
2051
0
  }
2052
2053
  // Check MV error w.r.t. Global MV / Zero MV
2054
0
  int_mv gm_mv = { 0 };
2055
0
  if (cm->global_motion[ref_idx + LAST_FRAME].wmtype > TRANSLATION) {
2056
0
    const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
2057
0
    gm_mv = gm_get_motion_vector(&cm->global_motion[ref_idx + LAST_FRAME],
2058
0
                                 allow_hp, bsize, mi_col, mi_row, is_integer);
2059
0
  }
2060
0
  check_mv_err_and_update(cur_mv.as_mv, gm_mv.as_mv, &mv_err);
2061
2062
0
  return mv_err;
2063
0
}
2064
2065
// Compute the minimum MV error between current MV and temporal MV predictors.
2066
static int get_temporal_mvpred_err(AV1_COMMON *cm, int mi_row, int mi_col,
2067
                                   int num_mi_rows, int num_mi_cols,
2068
                                   int ref_idx, int_mv cur_mv, int allow_hp,
2069
0
                                   int is_integer) {
2070
0
  const RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ref_idx + LAST_FRAME);
2071
0
  if (ref_buf == NULL) return INT32_MAX;
2072
0
  int cur_to_ref_dist =
2073
0
      get_relative_dist(&cm->seq_params->order_hint_info,
2074
0
                        cm->cur_frame->order_hint, ref_buf->order_hint);
2075
2076
0
  int mv_err = INT32_MAX;
2077
0
  const int mv_pred_pos_in_mis[7][2] = {
2078
0
    { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, { 4, -2 }, { 4, 4 }, { 2, 4 },
2079
0
  };
2080
2081
0
  for (int i = 0; i < 7; i++) {
2082
0
    int row_offset = mv_pred_pos_in_mis[i][0];
2083
0
    int col_offset = mv_pred_pos_in_mis[i][1];
2084
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
2085
0
                                num_mi_rows, num_mi_cols)) {
2086
0
      continue;
2087
0
    }
2088
0
    const TPL_MV_REF *ref_mvs =
2089
0
        cm->tpl_mvs +
2090
0
        ((mi_row + row_offset) >> 1) * (cm->mi_params.mi_stride >> 1) +
2091
0
        ((mi_col + col_offset) >> 1);
2092
0
    if (ref_mvs->mfmv0.as_int == INVALID_MV) continue;
2093
2094
0
    int_mv this_refmv;
2095
0
    av1_get_mv_projection(&this_refmv.as_mv, ref_mvs->mfmv0.as_mv,
2096
0
                          cur_to_ref_dist, ref_mvs->ref_frame_offset);
2097
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
2098
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
2099
0
  }
2100
2101
0
  return mv_err;
2102
0
}
2103
2104
// Determine whether to disable temporal MV prediction for the current frame
2105
// based on TPL and motion field data. Temporal MV prediction is disabled if the
2106
// reduction in MV error by including temporal MVs as MV predictors is small.
2107
0
static void check_to_disable_ref_frame_mvs(AV1_COMP *cpi) {
2108
0
  AV1_COMMON *cm = &cpi->common;
2109
0
  if (!cm->features.allow_ref_frame_mvs || cpi->sf.hl_sf.ref_frame_mvs_lvl != 1)
2110
0
    return;
2111
2112
0
  const int tpl_idx = cpi->gf_frame_index;
2113
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2114
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2115
2116
0
  const SUBPEL_FORCE_STOP tpl_subpel_precision =
2117
0
      cpi->sf.tpl_sf.subpel_force_stop;
2118
0
  const int allow_high_precision_mv = tpl_subpel_precision == EIGHTH_PEL &&
2119
0
                                      cm->features.allow_high_precision_mv;
2120
0
  const int force_integer_mv = tpl_subpel_precision == FULL_PEL ||
2121
0
                               cm->features.cur_frame_force_integer_mv;
2122
2123
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2124
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
2125
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
2126
0
  const int step = 1 << block_mis_log2;
2127
2128
0
  uint64_t accum_spatial_mvpred_err = 0;
2129
0
  uint64_t accum_best_err = 0;
2130
2131
0
  for (int mi_row = 0; mi_row < tpl_frame->mi_rows; mi_row += step) {
2132
0
    for (int mi_col = 0; mi_col < tpl_frame->mi_cols; mi_col += step) {
2133
0
      TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos(
2134
0
          mi_row, mi_col, tpl_frame->stride, block_mis_log2)];
2135
0
      const int cur_best_ref_idx = tpl_stats_ptr->ref_frame_index[0];
2136
0
      if (cur_best_ref_idx == NONE_FRAME) continue;
2137
2138
0
      int_mv cur_mv = tpl_stats_ptr->mv[cur_best_ref_idx];
2139
0
      lower_mv_precision(&cur_mv.as_mv, allow_high_precision_mv,
2140
0
                         force_integer_mv);
2141
2142
0
      const int cur_spatial_mvpred_err = get_spatial_mvpred_err(
2143
0
          cm, tpl_data, tpl_idx, mi_row, mi_col, cur_best_ref_idx, cur_mv,
2144
0
          allow_high_precision_mv, force_integer_mv);
2145
2146
0
      const int cur_temporal_mvpred_err = get_temporal_mvpred_err(
2147
0
          cm, mi_row, mi_col, tpl_frame->mi_rows, tpl_frame->mi_cols,
2148
0
          cur_best_ref_idx, cur_mv, allow_high_precision_mv, force_integer_mv);
2149
2150
0
      const int cur_best_err =
2151
0
          AOMMIN(cur_spatial_mvpred_err, cur_temporal_mvpred_err);
2152
0
      accum_spatial_mvpred_err += cur_spatial_mvpred_err;
2153
0
      accum_best_err += cur_best_err;
2154
0
    }
2155
0
  }
2156
2157
0
  const float threshold =
2158
0
      get_thresh_based_on_q(cm->quant_params.base_qindex, cpi->oxcf.speed);
2159
0
  const float mv_err_reduction =
2160
0
      (float)(accum_spatial_mvpred_err - accum_best_err);
2161
2162
0
  if (mv_err_reduction <= threshold * accum_spatial_mvpred_err)
2163
0
    cm->features.allow_ref_frame_mvs = 0;
2164
0
}
2165
#endif  // !CONFIG_REALTIME_ONLY
2166
2167
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
2168
 * for a single frame
2169
 *
2170
 * \ingroup high_level_algo
2171
 */
2172
0
static inline void encode_frame_internal(AV1_COMP *cpi) {
2173
0
  ThreadData *const td = &cpi->td;
2174
0
  MACROBLOCK *const x = &td->mb;
2175
0
  AV1_COMMON *const cm = &cpi->common;
2176
0
  CommonModeInfoParams *const mi_params = &cm->mi_params;
2177
0
  FeatureFlags *const features = &cm->features;
2178
0
  MACROBLOCKD *const xd = &x->e_mbd;
2179
0
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2180
#if CONFIG_FPMT_TEST
2181
  FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
2182
  FrameProbInfo *const temp_frame_probs_simulation =
2183
      &cpi->ppi->temp_frame_probs_simulation;
2184
#endif
2185
0
  FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
2186
0
  IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
2187
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
2188
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
2189
0
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2190
0
  const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
2191
0
  int i;
2192
2193
0
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
2194
0
    mi_params->setup_mi(mi_params);
2195
0
  }
2196
2197
0
  set_mi_offsets(mi_params, xd, 0, 0);
2198
2199
0
  av1_zero(*td->counts);
2200
0
  av1_zero(rdc->tx_type_used);
2201
0
  av1_zero(rdc->obmc_used);
2202
0
  av1_zero(rdc->warped_used);
2203
0
  av1_zero(rdc->seg_tmp_pred_cost);
2204
2205
  // Reset the flag.
2206
0
  cpi->intrabc_used = 0;
2207
  // Need to disable intrabc when superres is selected
2208
0
  if (av1_superres_scaled(cm)) {
2209
0
    features->allow_intrabc = 0;
2210
0
  }
2211
2212
0
  features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
2213
2214
0
  if (features->allow_warped_motion &&
2215
0
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2216
0
    const FRAME_UPDATE_TYPE update_type =
2217
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2218
0
    int warped_probability =
2219
#if CONFIG_FPMT_TEST
2220
        cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
2221
            ? temp_frame_probs->warped_probs[update_type]
2222
            :
2223
#endif  // CONFIG_FPMT_TEST
2224
0
            frame_probs->warped_probs[update_type];
2225
0
    if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
2226
0
      features->allow_warped_motion = 0;
2227
0
  }
2228
2229
0
  int hash_table_created = 0;
2230
0
  if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
2231
0
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
2232
    // TODO(any): move this outside of the recoding loop to avoid recalculating
2233
    // the hash table.
2234
    // add to hash table
2235
0
    const int pic_width = cpi->source->y_crop_width;
2236
0
    const int pic_height = cpi->source->y_crop_height;
2237
0
    uint32_t *block_hash_values[2] = { NULL };  // two buffers used ping-pong
2238
0
    bool error = false;
2239
2240
0
    for (int j = 0; j < 2; ++j) {
2241
0
      block_hash_values[j] = (uint32_t *)aom_malloc(
2242
0
          sizeof(*block_hash_values[j]) * pic_width * pic_height);
2243
0
      if (!block_hash_values[j]) {
2244
0
        error = true;
2245
0
        break;
2246
0
      }
2247
0
    }
2248
2249
0
    av1_hash_table_init(intrabc_hash_info);
2250
0
    if (error ||
2251
0
        !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
2252
0
      free_block_hash_buffers(block_hash_values);
2253
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2254
0
                         "Error allocating intrabc_hash_table and buffers");
2255
0
    }
2256
0
    hash_table_created = 1;
2257
0
    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0]);
2258
    // Hash data generated for screen contents is used for intraBC ME
2259
0
    const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
2260
0
    int max_sb_size = (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
2261
2262
0
    if (cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks) {
2263
0
      max_sb_size = AOMMIN(8, max_sb_size);
2264
0
    }
2265
2266
0
    int src_idx = 0;
2267
0
    for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
2268
0
      const int dst_idx = !src_idx;
2269
0
      av1_generate_block_hash_value(intrabc_hash_info, cpi->source, size,
2270
0
                                    block_hash_values[src_idx],
2271
0
                                    block_hash_values[dst_idx]);
2272
0
      if (size >= min_alloc_size &&
2273
0
          !av1_add_to_hash_map_by_row_with_precal_data(
2274
0
              &intrabc_hash_info->intrabc_hash_table,
2275
0
              block_hash_values[dst_idx], pic_width, pic_height, size)) {
2276
0
        error = true;
2277
0
        break;
2278
0
      }
2279
0
    }
2280
2281
0
    free_block_hash_buffers(block_hash_values);
2282
2283
0
    if (error) {
2284
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2285
0
                         "Error adding data to intrabc_hash_table");
2286
0
    }
2287
0
  }
2288
2289
0
  const CommonQuantParams *quant_params = &cm->quant_params;
2290
0
  for (i = 0; i < MAX_SEGMENTS; ++i) {
2291
0
    const int qindex =
2292
0
        cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
2293
0
                        : quant_params->base_qindex;
2294
0
    xd->lossless[i] =
2295
0
        qindex == 0 && quant_params->y_dc_delta_q == 0 &&
2296
0
        quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
2297
0
        quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
2298
0
    if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
2299
0
    xd->qindex[i] = qindex;
2300
0
    if (xd->lossless[i]) {
2301
0
      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
2302
0
    } else {
2303
0
      cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
2304
0
    }
2305
0
  }
2306
0
  features->coded_lossless = is_coded_lossless(cm, xd);
2307
0
  features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
2308
2309
  // Fix delta q resolution for the moment
2310
2311
0
  cm->delta_q_info.delta_q_res = 0;
2312
0
  if (cpi->use_ducky_encode) {
2313
0
    cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
2314
0
  } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ &&
2315
0
             !cpi->roi.enabled) {
2316
0
    if (deltaq_mode == DELTA_Q_OBJECTIVE)
2317
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
2318
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
2319
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2320
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
2321
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2322
0
    else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
2323
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2324
0
    else if (deltaq_mode == DELTA_Q_HDR)
2325
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2326
0
    else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
2327
0
      cm->delta_q_info.delta_q_res =
2328
0
          aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
2329
    // Set delta_q_present_flag before it is used for the first time
2330
0
    cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
2331
0
    cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
2332
2333
    // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
2334
    // is used for ineligible frames. That effectively will turn off row_mt
2335
    // usage. Note objective delta_q and tpl eligible frames are only altref
2336
    // frames currently.
2337
0
    if (cm->delta_q_info.delta_q_present_flag) {
2338
0
      if (deltaq_mode == DELTA_Q_OBJECTIVE && !enable_delta_q(cpi))
2339
0
        cm->delta_q_info.delta_q_present_flag = 0;
2340
2341
0
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2342
0
          cm->delta_q_info.delta_q_present_flag) {
2343
0
        cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
2344
0
      }
2345
0
    }
2346
2347
    // Reset delta_q_used flag
2348
0
    cpi->deltaq_used = 0;
2349
2350
0
    cm->delta_q_info.delta_lf_present_flag =
2351
0
        cm->delta_q_info.delta_q_present_flag &&
2352
0
        oxcf->tool_cfg.enable_deltalf_mode;
2353
0
    cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2354
2355
    // update delta_q_present_flag and delta_lf_present_flag based on
2356
    // base_qindex
2357
0
    cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2358
0
    cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2359
0
  } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2360
0
             cpi->svc.number_temporal_layers == 1) {
2361
0
    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2362
0
    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2363
0
  }
2364
0
  cpi->rc.cnt_zeromv = 0;
2365
0
  cpi->cb_delta_rdmult_enabled = enable_delta_rdmult(cpi);
2366
2367
0
  av1_frame_init_quantizer(cpi);
2368
0
  init_encode_frame_mb_context(cpi);
2369
0
  set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2370
2371
0
  if (cm->prev_frame && cm->prev_frame->seg.enabled &&
2372
0
      cpi->svc.number_spatial_layers == 1)
2373
0
    cm->last_frame_seg_map = cm->prev_frame->seg_map;
2374
0
  else
2375
0
    cm->last_frame_seg_map = NULL;
2376
0
  if (features->allow_intrabc || features->coded_lossless) {
2377
0
    av1_set_default_ref_deltas(cm->lf.ref_deltas);
2378
0
    av1_set_default_mode_deltas(cm->lf.mode_deltas);
2379
0
  } else if (cm->prev_frame) {
2380
0
    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2381
0
    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2382
0
  }
2383
0
  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2384
0
  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2385
2386
0
  cpi->all_one_sided_refs =
2387
0
      frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2388
2389
0
  cpi->prune_ref_frame_mask = 0;
2390
  // Figure out which ref frames can be skipped at frame level.
2391
0
  setup_prune_ref_frame_mask(cpi);
2392
  // Disable certain reference frame pruning based on temporal distance and
2393
  // quality of that reference frame.
2394
0
  setup_keep_ref_frame_mask(cpi);
2395
2396
0
  x->txfm_search_info.txb_split_count = 0;
2397
#if CONFIG_SPEED_STATS
2398
  x->txfm_search_info.tx_search_count = 0;
2399
#endif  // CONFIG_SPEED_STATS
2400
2401
0
#if !CONFIG_REALTIME_ONLY
2402
#if CONFIG_COLLECT_COMPONENT_TIMING
2403
  start_timing(cpi, av1_compute_global_motion_time);
2404
#endif
2405
0
  av1_compute_global_motion_facade(cpi);
2406
#if CONFIG_COLLECT_COMPONENT_TIMING
2407
  end_timing(cpi, av1_compute_global_motion_time);
2408
#endif
2409
0
#endif  // !CONFIG_REALTIME_ONLY
2410
2411
#if CONFIG_COLLECT_COMPONENT_TIMING
2412
  start_timing(cpi, av1_setup_motion_field_time);
2413
#endif
2414
0
  av1_calculate_ref_frame_side(cm);
2415
2416
0
  features->allow_ref_frame_mvs &= !(cpi->sf.hl_sf.ref_frame_mvs_lvl == 2);
2417
0
  if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2418
0
#if !CONFIG_REALTIME_ONLY
2419
0
  check_to_disable_ref_frame_mvs(cpi);
2420
0
#endif  // !CONFIG_REALTIME_ONLY
2421
2422
#if CONFIG_COLLECT_COMPONENT_TIMING
2423
  end_timing(cpi, av1_setup_motion_field_time);
2424
#endif
2425
2426
0
  cm->current_frame.skip_mode_info.skip_mode_flag =
2427
0
      check_skip_mode_enabled(cpi);
2428
2429
  // Initialization of skip mode cost depends on the value of
2430
  // 'skip_mode_flag'. This initialization happens in the function
2431
  // av1_fill_mode_rates(), which is in turn called in
2432
  // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2433
  // has to be called after 'skip_mode_flag' is initialized.
2434
0
  av1_initialize_rd_consts(cpi);
2435
0
  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2436
0
  populate_thresh_to_force_zeromv_skip(cpi);
2437
2438
0
  enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2439
0
  enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2440
0
  mt_info->row_mt_enabled = 0;
2441
0
  mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2442
0
                                       cm->tiles.cols * cm->tiles.rows) > 1;
2443
2444
0
  if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2445
0
    mt_info->row_mt_enabled = 1;
2446
0
    enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2447
0
    enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2448
0
    av1_encode_tiles_row_mt(cpi);
2449
0
  } else {
2450
0
    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2451
0
      av1_encode_tiles_mt(cpi);
2452
0
    } else {
2453
      // Preallocate the pc_tree for realtime coding to reduce the cost of
2454
      // memory allocation.
2455
0
      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2456
0
      if (use_nonrd_mode) {
2457
0
        td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2458
0
        if (!td->pc_root)
2459
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2460
0
                             "Failed to allocate PC_TREE");
2461
0
      } else {
2462
0
        td->pc_root = NULL;
2463
0
      }
2464
2465
0
      encode_tiles(cpi);
2466
0
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2467
0
                                 cpi->sf.part_sf.partition_search_type);
2468
0
      td->pc_root = NULL;
2469
0
    }
2470
0
  }
2471
2472
  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2473
0
  if (features->allow_intrabc && !cpi->intrabc_used) {
2474
0
    features->allow_intrabc = 0;
2475
0
  }
2476
0
  if (features->allow_intrabc) {
2477
0
    cm->delta_q_info.delta_lf_present_flag = 0;
2478
0
  }
2479
2480
0
  if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2481
0
    cm->delta_q_info.delta_q_present_flag = 0;
2482
0
  }
2483
2484
  // Set the transform size appropriately before bitstream creation
2485
0
  const MODE_EVAL_TYPE eval_type =
2486
0
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2487
0
          ? WINNER_MODE_EVAL
2488
0
          : DEFAULT_EVAL;
2489
0
  const TX_SIZE_SEARCH_METHOD tx_search_type =
2490
0
      cpi->winner_mode_params.tx_size_search_methods[eval_type];
2491
0
  assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2492
0
  features->tx_mode = select_tx_mode(cm, tx_search_type);
2493
2494
  // Retain the frame level probability update conditions for parallel frames.
2495
  // These conditions will be consumed during postencode stage to update the
2496
  // probability.
2497
0
  if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2498
0
    cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2499
0
        cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2500
0
    cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2501
0
        (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2502
0
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2503
0
    cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2504
0
        (features->allow_warped_motion &&
2505
0
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2506
0
    cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2507
0
        (cm->current_frame.frame_type != KEY_FRAME &&
2508
0
         cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2509
0
         features->interp_filter == SWITCHABLE);
2510
0
  }
2511
2512
0
  if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2513
0
      ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2514
0
        INT_MAX) &&
2515
0
       (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2516
0
    const FRAME_UPDATE_TYPE update_type =
2517
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2518
0
    for (i = 0; i < TX_SIZES_ALL; i++) {
2519
0
      int sum = 0;
2520
0
      int j;
2521
0
      int left = MAX_TX_TYPE_PROB;
2522
2523
0
      for (j = 0; j < TX_TYPES; j++)
2524
0
        sum += cpi->td.rd_counts.tx_type_used[i][j];
2525
2526
0
      for (j = TX_TYPES - 1; j >= 0; j--) {
2527
0
        int update_txtype_frameprobs = 1;
2528
0
        const int new_prob =
2529
0
            sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2530
0
                        cpi->td.rd_counts.tx_type_used[i][j] / sum)
2531
0
                : (j ? 0 : MAX_TX_TYPE_PROB);
2532
#if CONFIG_FPMT_TEST
2533
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2534
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2535
              0) {
2536
            int prob =
2537
                (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2538
                 new_prob) >>
2539
                1;
2540
            left -= prob;
2541
            if (j == 0) prob += left;
2542
            temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2543
                prob;
2544
            // Copy temp_frame_probs_simulation to temp_frame_probs
2545
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2546
                 update_type_idx++) {
2547
              temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2548
                  temp_frame_probs_simulation
2549
                      ->tx_type_probs[update_type_idx][i][j];
2550
            }
2551
          }
2552
          update_txtype_frameprobs = 0;
2553
        }
2554
#endif  // CONFIG_FPMT_TEST
2555
        // Track the frame probabilities of parallel encode frames to update
2556
        // during postencode stage.
2557
0
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2558
0
          update_txtype_frameprobs = 0;
2559
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2560
0
              .tx_type_probs[update_type][i][j] = new_prob;
2561
0
        }
2562
0
        if (update_txtype_frameprobs) {
2563
0
          int prob =
2564
0
              (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2565
0
          left -= prob;
2566
0
          if (j == 0) prob += left;
2567
0
          frame_probs->tx_type_probs[update_type][i][j] = prob;
2568
0
        }
2569
0
      }
2570
0
    }
2571
0
  }
2572
2573
0
  if (cm->seg.enabled) {
2574
0
    cm->seg.temporal_update = 1;
2575
0
    if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2576
0
      cm->seg.temporal_update = 0;
2577
0
  }
2578
2579
0
  if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2580
0
      cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2581
0
    const FRAME_UPDATE_TYPE update_type =
2582
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2583
2584
0
    for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2585
0
      int sum = 0;
2586
0
      int update_obmc_frameprobs = 1;
2587
0
      for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2588
2589
0
      const int new_prob =
2590
0
          sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2591
#if CONFIG_FPMT_TEST
2592
      if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2593
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2594
          temp_frame_probs_simulation->obmc_probs[update_type][i] =
2595
              (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2596
               new_prob) >>
2597
              1;
2598
          // Copy temp_frame_probs_simulation to temp_frame_probs
2599
          for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2600
               update_type_idx++) {
2601
            temp_frame_probs->obmc_probs[update_type_idx][i] =
2602
                temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2603
          }
2604
        }
2605
        update_obmc_frameprobs = 0;
2606
      }
2607
#endif  // CONFIG_FPMT_TEST
2608
      // Track the frame probabilities of parallel encode frames to update
2609
      // during postencode stage.
2610
0
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2611
0
        update_obmc_frameprobs = 0;
2612
0
        cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2613
0
            new_prob;
2614
0
      }
2615
0
      if (update_obmc_frameprobs) {
2616
0
        frame_probs->obmc_probs[update_type][i] =
2617
0
            (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2618
0
      }
2619
0
    }
2620
0
  }
2621
2622
0
  if (features->allow_warped_motion &&
2623
0
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2624
0
    const FRAME_UPDATE_TYPE update_type =
2625
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2626
0
    int update_warp_frameprobs = 1;
2627
0
    int sum = 0;
2628
0
    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2629
0
    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2630
#if CONFIG_FPMT_TEST
2631
    if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2632
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2633
        temp_frame_probs_simulation->warped_probs[update_type] =
2634
            (temp_frame_probs_simulation->warped_probs[update_type] +
2635
             new_prob) >>
2636
            1;
2637
        // Copy temp_frame_probs_simulation to temp_frame_probs
2638
        for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2639
             update_type_idx++) {
2640
          temp_frame_probs->warped_probs[update_type_idx] =
2641
              temp_frame_probs_simulation->warped_probs[update_type_idx];
2642
        }
2643
      }
2644
      update_warp_frameprobs = 0;
2645
    }
2646
#endif  // CONFIG_FPMT_TEST
2647
    // Track the frame probabilities of parallel encode frames to update
2648
    // during postencode stage.
2649
0
    if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2650
0
      update_warp_frameprobs = 0;
2651
0
      cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2652
0
          new_prob;
2653
0
    }
2654
0
    if (update_warp_frameprobs) {
2655
0
      frame_probs->warped_probs[update_type] =
2656
0
          (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2657
0
    }
2658
0
  }
2659
2660
0
  if (cm->current_frame.frame_type != KEY_FRAME &&
2661
0
      cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2662
0
      features->interp_filter == SWITCHABLE) {
2663
0
    const FRAME_UPDATE_TYPE update_type =
2664
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2665
2666
0
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2667
0
      int sum = 0;
2668
0
      int j;
2669
0
      int left = 1536;
2670
2671
0
      for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2672
0
        sum += cpi->td.counts->switchable_interp[i][j];
2673
0
      }
2674
2675
0
      for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2676
0
        int update_interpfilter_frameprobs = 1;
2677
0
        const int new_prob =
2678
0
            sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2679
0
                : (j ? 0 : 1536);
2680
#if CONFIG_FPMT_TEST
2681
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2682
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2683
              0) {
2684
            int prob = (temp_frame_probs_simulation
2685
                            ->switchable_interp_probs[update_type][i][j] +
2686
                        new_prob) >>
2687
                       1;
2688
            left -= prob;
2689
            if (j == 0) prob += left;
2690
            temp_frame_probs_simulation
2691
                ->switchable_interp_probs[update_type][i][j] = prob;
2692
            // Copy temp_frame_probs_simulation to temp_frame_probs
2693
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2694
                 update_type_idx++) {
2695
              temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2696
                  temp_frame_probs_simulation
2697
                      ->switchable_interp_probs[update_type_idx][i][j];
2698
            }
2699
          }
2700
          update_interpfilter_frameprobs = 0;
2701
        }
2702
#endif  // CONFIG_FPMT_TEST
2703
        // Track the frame probabilities of parallel encode frames to update
2704
        // during postencode stage.
2705
0
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2706
0
          update_interpfilter_frameprobs = 0;
2707
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2708
0
              .switchable_interp_probs[update_type][i][j] = new_prob;
2709
0
        }
2710
0
        if (update_interpfilter_frameprobs) {
2711
0
          int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2712
0
                      new_prob) >>
2713
0
                     1;
2714
0
          left -= prob;
2715
0
          if (j == 0) prob += left;
2716
0
          frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2717
0
        }
2718
0
      }
2719
0
    }
2720
0
  }
2721
0
  if (hash_table_created) {
2722
0
    av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2723
0
  }
2724
0
}
2725
2726
/*!\brief Setup reference frame buffers and encode a frame
2727
 *
2728
 * \ingroup high_level_algo
2729
 * \callgraph
2730
 * \callergraph
2731
 *
2732
 * \param[in]    cpi    Top-level encoder structure
2733
 */
2734
0
void av1_encode_frame(AV1_COMP *cpi) {
2735
0
  AV1_COMMON *const cm = &cpi->common;
2736
0
  CurrentFrame *const current_frame = &cm->current_frame;
2737
0
  FeatureFlags *const features = &cm->features;
2738
0
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2739
0
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2740
  // Indicates whether or not to use a default reduced set for ext-tx
2741
  // rather than the potential full set of 16 transforms
2742
0
  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2743
2744
  // Make sure segment_id is no larger than last_active_segid.
2745
0
  if (cm->seg.enabled && cm->seg.update_map) {
2746
0
    const int mi_rows = cm->mi_params.mi_rows;
2747
0
    const int mi_cols = cm->mi_params.mi_cols;
2748
0
    const int last_active_segid = cm->seg.last_active_segid;
2749
0
    uint8_t *map = cpi->enc_seg.map;
2750
0
    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2751
0
      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2752
0
        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2753
0
      }
2754
0
      map += mi_cols;
2755
0
    }
2756
0
  }
2757
2758
0
  av1_setup_frame_buf_refs(cm);
2759
0
  enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2760
0
                         cm->cur_frame->ref_display_order_hint,
2761
0
                         cm->current_frame.display_order_hint);
2762
0
  set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2763
0
                     cpi->ref_frame_flags);
2764
0
  av1_setup_frame_sign_bias(cm);
2765
2766
  // If global motion is enabled, then every buffer which is used as either
2767
  // a source or a ref frame should have an image pyramid allocated.
2768
  // Check here so that issues can be caught early in debug mode
2769
0
#if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2770
0
  if (cpi->alloc_pyramid) {
2771
0
    assert(cpi->source->y_pyramid);
2772
0
    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2773
0
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2774
0
      if (buf != NULL) {
2775
0
        assert(buf->buf.y_pyramid);
2776
0
      }
2777
0
    }
2778
0
  }
2779
0
#endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2780
2781
#if CONFIG_MISMATCH_DEBUG
2782
  mismatch_reset_frame(av1_num_planes(cm));
2783
#endif
2784
2785
0
  rdc->newmv_or_intra_blocks = 0;
2786
0
  cpi->palette_pixel_num = 0;
2787
2788
0
  if (cpi->sf.hl_sf.frame_parameter_update ||
2789
0
      cpi->sf.rt_sf.use_comp_ref_nonrd) {
2790
0
    if (frame_is_intra_only(cm))
2791
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2792
0
    else
2793
0
      current_frame->reference_mode = REFERENCE_MODE_SELECT;
2794
2795
0
    features->interp_filter = SWITCHABLE;
2796
0
    if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2797
2798
0
    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2799
0
        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2800
2801
0
    rdc->compound_ref_used_flag = 0;
2802
0
    rdc->skip_mode_used_flag = 0;
2803
2804
0
    encode_frame_internal(cpi);
2805
2806
0
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2807
      // Use a flag that includes 4x4 blocks
2808
0
      if (rdc->compound_ref_used_flag == 0) {
2809
0
        current_frame->reference_mode = SINGLE_REFERENCE;
2810
#if CONFIG_ENTROPY_STATS
2811
        av1_zero(cpi->td.counts->comp_inter);
2812
#endif  // CONFIG_ENTROPY_STATS
2813
0
      }
2814
0
    }
2815
    // Re-check on the skip mode status as reference mode may have been
2816
    // changed.
2817
0
    SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2818
0
    if (frame_is_intra_only(cm) ||
2819
0
        current_frame->reference_mode == SINGLE_REFERENCE) {
2820
0
      skip_mode_info->skip_mode_allowed = 0;
2821
0
      skip_mode_info->skip_mode_flag = 0;
2822
0
    }
2823
0
    if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2824
0
      skip_mode_info->skip_mode_flag = 0;
2825
2826
0
    if (!cm->tiles.large_scale) {
2827
0
      if (features->tx_mode == TX_MODE_SELECT &&
2828
0
          cpi->td.mb.txfm_search_info.txb_split_count == 0)
2829
0
        features->tx_mode = TX_MODE_LARGEST;
2830
0
    }
2831
0
  } else {
2832
    // This is needed if real-time speed setting is changed on the fly
2833
    // from one using compound prediction to one using single reference.
2834
0
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2835
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2836
0
    encode_frame_internal(cpi);
2837
0
  }
2838
0
}