Coverage Report

Created: 2025-10-10 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/encodeframe.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <float.h>
14
#include <math.h>
15
#include <stdbool.h>
16
#include <stdio.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/binary_codes_writer.h"
24
#include "aom_ports/mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_util/aom_pthread.h"
27
#if CONFIG_MISMATCH_DEBUG
28
#include "aom_util/debug_util.h"
29
#endif  // CONFIG_MISMATCH_DEBUG
30
31
#include "av1/common/cfl.h"
32
#include "av1/common/common.h"
33
#include "av1/common/common_data.h"
34
#include "av1/common/entropy.h"
35
#include "av1/common/entropymode.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mv.h"
38
#include "av1/common/mvref_common.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconintra.h"
42
#include "av1/common/reconinter.h"
43
#include "av1/common/seg_common.h"
44
#include "av1/common/tile_common.h"
45
#include "av1/common/warped_motion.h"
46
47
#include "av1/encoder/allintra_vis.h"
48
#include "av1/encoder/aq_complexity.h"
49
#include "av1/encoder/aq_cyclicrefresh.h"
50
#include "av1/encoder/aq_variance.h"
51
#include "av1/encoder/av1_quantize.h"
52
#include "av1/encoder/global_motion_facade.h"
53
#include "av1/encoder/encodeframe.h"
54
#include "av1/encoder/encodeframe_utils.h"
55
#include "av1/encoder/encodemb.h"
56
#include "av1/encoder/encodemv.h"
57
#include "av1/encoder/encodetxb.h"
58
#include "av1/encoder/ethread.h"
59
#include "av1/encoder/extend.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/ml.h"
62
#include "av1/encoder/motion_search_facade.h"
63
#include "av1/encoder/partition_strategy.h"
64
#if !CONFIG_REALTIME_ONLY
65
#include "av1/encoder/partition_model_weights.h"
66
#endif
67
#include "av1/encoder/partition_search.h"
68
#include "av1/encoder/rd.h"
69
#include "av1/encoder/rdopt.h"
70
#include "av1/encoder/reconinter_enc.h"
71
#include "av1/encoder/segmentation.h"
72
#include "av1/encoder/tokenize.h"
73
#include "av1/encoder/tpl_model.h"
74
#include "av1/encoder/var_based_part.h"
75
76
#if CONFIG_TUNE_VMAF
77
#include "av1/encoder/tune_vmaf.h"
78
#endif
79
80
/*!\cond */
81
// This is used as a reference when computing the source variance for the
82
//  purposes of activity masking.
83
// Eventually this should be replaced by custom no-reference routines,
84
//  which will be faster.
85
static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94
  128, 128, 128, 128, 128, 128, 128, 128
95
};
96
97
#if CONFIG_AV1_HIGHBITDEPTH
98
static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
99
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
107
  128, 128, 128, 128, 128, 128, 128, 128
108
};
109
110
static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
126
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
127
};
128
129
static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
130
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
148
  128 * 16, 128 * 16
149
};
150
#endif  // CONFIG_AV1_HIGHBITDEPTH
151
/*!\endcond */
152
153
// For the given bit depth, returns a constant array used to assist the
154
// calculation of source block variance, which will then be used to decide
155
// adaptive quantizers.
156
25.2M
static const uint8_t *get_var_offs(int use_hbd, int bd) {
157
25.2M
#if CONFIG_AV1_HIGHBITDEPTH
158
25.2M
  if (use_hbd) {
159
3.38M
    assert(bd == 8 || bd == 10 || bd == 12);
160
3.38M
    const int off_index = (bd - 8) >> 1;
161
3.38M
    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
162
3.38M
                                                AV1_HIGH_VAR_OFFS_10,
163
3.38M
                                                AV1_HIGH_VAR_OFFS_12 };
164
3.38M
    return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
165
3.38M
  }
166
#else
167
  (void)use_hbd;
168
  (void)bd;
169
  assert(!use_hbd);
170
#endif
171
25.2M
  assert(bd == 8);
172
21.8M
  return AV1_VAR_OFFS;
173
25.2M
}
174
175
248k
void av1_init_rtc_counters(MACROBLOCK *const x) {
176
248k
  av1_init_cyclic_refresh_counters(x);
177
248k
  x->cnt_zeromv = 0;
178
248k
}
179
180
51.4k
void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
181
51.4k
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
182
0
    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
183
51.4k
  cpi->rc.cnt_zeromv += x->cnt_zeromv;
184
51.4k
  cpi->rc.num_col_blscroll_last_tl0 += x->sb_col_scroll;
185
51.4k
  cpi->rc.num_row_blscroll_last_tl0 += x->sb_row_scroll;
186
51.4k
}
187
188
unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
189
                                       const MACROBLOCKD *xd,
190
                                       const struct buf_2d *ref,
191
                                       BLOCK_SIZE bsize, int plane,
192
25.2M
                                       int use_hbd) {
193
25.2M
  const int subsampling_x = xd->plane[plane].subsampling_x;
194
25.2M
  const int subsampling_y = xd->plane[plane].subsampling_y;
195
25.2M
  const BLOCK_SIZE plane_bsize =
196
25.2M
      get_plane_block_size(bsize, subsampling_x, subsampling_y);
197
25.2M
  unsigned int sse;
198
25.2M
  const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
199
25.2M
      ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
200
25.2M
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
201
25.2M
}
202
203
unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
204
                                              const MACROBLOCKD *xd,
205
                                              const struct buf_2d *ref,
206
25.2M
                                              BLOCK_SIZE bsize, int plane) {
207
25.2M
  const int use_hbd = is_cur_buf_hbd(xd);
208
25.2M
  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
209
25.2M
}
210
211
void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
212
                          int mi_row, int mi_col, const int num_planes,
213
44.6M
                          BLOCK_SIZE bsize) {
214
  // Set current frame pointer.
215
44.6M
  x->e_mbd.cur_buf = src;
216
217
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
218
  // the static analysis warnings.
219
124M
  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
220
80.1M
    const int is_uv = i > 0;
221
80.1M
    setup_pred_plane(
222
80.1M
        &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
223
80.1M
        src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
224
80.1M
        x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
225
80.1M
  }
226
44.6M
}
227
228
#if !CONFIG_REALTIME_ONLY
229
/*!\brief Assigns different quantization parameters to each superblock
230
 * based on statistics relevant to the selected delta-q mode (variance).
231
 * This is the non-rd version.
232
 *
233
 * \param[in]     cpi         Top level encoder instance structure
234
 * \param[in,out] td          Thread data structure
235
 * \param[in,out] x           Superblock level data for this block.
236
 * \param[in]     tile_info   Tile information / identification
237
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
238
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
239
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
240
 *
241
 * \remark No return value but updates superblock and thread data
242
 * related to the q / q delta to be used.
243
 */
244
static inline void setup_delta_q_nonrd(AV1_COMP *const cpi, ThreadData *td,
245
                                       MACROBLOCK *const x,
246
                                       const TileInfo *const tile_info,
247
0
                                       int mi_row, int mi_col, int num_planes) {
248
0
  AV1_COMMON *const cm = &cpi->common;
249
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
250
0
  assert(delta_q_info->delta_q_present_flag);
251
252
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
253
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255
0
  const int delta_q_res = delta_q_info->delta_q_res;
256
0
  int current_qindex = cm->quant_params.base_qindex;
257
258
0
  if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
259
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
260
0
  }
261
262
0
  x->rdmult_cur_qindex = current_qindex;
263
0
  MACROBLOCKD *const xd = &x->e_mbd;
264
0
  current_qindex = av1_adjust_q_from_delta_q_res(
265
0
      delta_q_res, xd->current_base_qindex, current_qindex);
266
267
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
268
0
  x->rdmult_delta_qindex = x->delta_qindex;
269
270
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
271
0
  xd->mi[0]->current_qindex = current_qindex;
272
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
273
274
  // keep track of any non-zero delta-q used
275
0
  td->deltaq_used |= (x->delta_qindex != 0);
276
0
}
277
278
/*!\brief Assigns different quantization parameters to each superblock
279
 * based on statistics relevant to the selected delta-q mode (TPL weight,
280
 * variance, HDR, etc).
281
 *
282
 * \ingroup tpl_modelling
283
 *
284
 * \param[in]     cpi         Top level encoder instance structure
285
 * \param[in,out] td          Thread data structure
286
 * \param[in,out] x           Superblock level data for this block.
287
 * \param[in]     tile_info   Tile information / identification
288
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
289
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
290
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
291
 *
292
 * \remark No return value but updates superblock and thread data
293
 * related to the q / q delta to be used.
294
 */
295
static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
296
                                 MACROBLOCK *const x,
297
                                 const TileInfo *const tile_info, int mi_row,
298
2.27k
                                 int mi_col, int num_planes) {
299
2.27k
  AV1_COMMON *const cm = &cpi->common;
300
2.27k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
301
2.27k
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
302
2.27k
  assert(delta_q_info->delta_q_present_flag);
303
304
2.27k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
305
2.27k
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
306
307
2.27k
  const int delta_q_res = delta_q_info->delta_q_res;
308
2.27k
  int current_qindex = cm->quant_params.base_qindex;
309
2.27k
  if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
310
0
                                   DUCKY_ENCODE_FRAME_MODE_QINDEX) {
311
0
    const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
312
0
    const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
313
0
    const int sb_cols =
314
0
        CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
315
0
    const int sb_index = sb_row * sb_cols + sb_col;
316
0
    current_qindex =
317
0
        cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
318
2.27k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
319
0
    if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
320
0
      const int block_wavelet_energy_level =
321
0
          av1_block_wavelet_energy_level(cpi, x, sb_size);
322
0
      x->sb_energy_level = block_wavelet_energy_level;
323
0
      current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
324
0
          cpi, block_wavelet_energy_level);
325
0
    } else {
326
0
      const int block_var_level = av1_log_block_var(cpi, x, sb_size);
327
0
      x->sb_energy_level = block_var_level;
328
0
      current_qindex =
329
0
          av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
330
0
    }
331
2.27k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
332
2.27k
             cpi->oxcf.algo_cfg.enable_tpl_model) {
333
    // Setup deltaq based on tpl stats
334
2.27k
    current_qindex =
335
2.27k
        av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
336
18.4E
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
337
0
    current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
338
18.4E
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
339
0
    current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
340
18.4E
  } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
341
0
    current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
342
18.4E
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
343
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
344
0
  }
345
346
2.27k
  x->rdmult_cur_qindex = current_qindex;
347
2.27k
  MACROBLOCKD *const xd = &x->e_mbd;
348
2.27k
  const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
349
2.27k
      delta_q_res, xd->current_base_qindex, current_qindex);
350
2.27k
  if (cpi->use_ducky_encode) {
351
0
    assert(adjusted_qindex == current_qindex);
352
0
  }
353
2.27k
  current_qindex = adjusted_qindex;
354
355
2.27k
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
356
2.27k
  x->rdmult_delta_qindex = x->delta_qindex;
357
358
2.27k
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
359
2.27k
  xd->mi[0]->current_qindex = current_qindex;
360
2.27k
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
361
362
  // keep track of any non-zero delta-q used
363
2.27k
  td->deltaq_used |= (x->delta_qindex != 0);
364
365
2.27k
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
366
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
367
0
    const int lfmask = ~(delta_lf_res - 1);
368
0
    const int delta_lf_from_base =
369
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
370
0
    const int8_t delta_lf =
371
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
372
0
    const int frame_lf_count =
373
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
374
0
    const int mib_size = cm->seq_params->mib_size;
375
376
    // pre-set the delta lf for loop filter. Note that this value is set
377
    // before mi is assigned for each block in current superblock
378
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
379
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
380
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
381
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
382
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
383
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
384
0
        }
385
0
      }
386
0
    }
387
0
  }
388
2.27k
}
389
390
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
391
227k
                                 int mi_col) {
392
227k
  const AV1_COMMON *cm = &cpi->common;
393
227k
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
394
227k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
395
227k
  MACROBLOCK *x = &td->mb;
396
227k
  const int frame_idx = cpi->gf_frame_index;
397
227k
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
398
227k
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
399
400
227k
  av1_zero(x->tpl_keep_ref_frame);
401
402
227k
  if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
403
44.0k
  if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
404
26.3k
  if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
405
406
26.3k
  const int is_overlay =
407
26.3k
      cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
408
26.3k
  if (is_overlay) {
409
0
    memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
410
0
    return;
411
0
  }
412
413
26.3k
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
414
26.3k
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
415
26.3k
  const int tpl_stride = tpl_frame->stride;
416
26.3k
  int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
417
26.3k
  const int step = 1 << block_mis_log2;
418
26.3k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
419
420
26.3k
  const int mi_row_end =
421
26.3k
      AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
422
26.3k
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
423
26.3k
  const int mi_col_sr =
424
26.3k
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
425
26.3k
  const int mi_col_end_sr =
426
26.3k
      AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
427
26.3k
                                  cm->superres_scale_denominator),
428
26.3k
             mi_cols_sr);
429
26.3k
  const int row_step = step;
430
26.3k
  const int col_step_sr =
431
26.3k
      coded_to_superres_mi(step, cm->superres_scale_denominator);
432
90.9k
  for (int row = mi_row; row < mi_row_end; row += row_step) {
433
226k
    for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
434
161k
      const TplDepStats *this_stats =
435
161k
          &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
436
161k
      int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
437
      // Find the winner ref frame idx for the current block
438
161k
      int64_t best_inter_cost = this_stats->pred_error[0];
439
161k
      int best_rf_idx = 0;
440
1.13M
      for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
441
968k
        if ((this_stats->pred_error[idx] < best_inter_cost) &&
442
0
            (this_stats->pred_error[idx] != 0)) {
443
0
          best_inter_cost = this_stats->pred_error[idx];
444
0
          best_rf_idx = idx;
445
0
        }
446
968k
      }
447
      // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
448
      // LAST_FRAME.
449
161k
      tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
450
161k
                                    this_stats->pred_error[LAST_FRAME - 1];
451
452
1.13M
      for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
453
970k
        inter_cost[rf_idx] += tpl_pred_error[rf_idx];
454
161k
    }
455
64.6k
  }
456
457
26.3k
  int rank_index[INTER_REFS_PER_FRAME - 1];
458
183k
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
459
157k
    rank_index[idx] = idx + 1;
460
551k
    for (int i = idx; i > 0; --i) {
461
393k
      if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
462
0
        const int tmp = rank_index[i - 1];
463
0
        rank_index[i - 1] = rank_index[i];
464
0
        rank_index[i] = tmp;
465
0
      }
466
393k
    }
467
157k
  }
468
469
26.3k
  x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
470
26.3k
  x->tpl_keep_ref_frame[LAST_FRAME] = 1;
471
472
26.3k
  int cutoff_ref = 0;
473
184k
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
474
157k
    x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
475
157k
    if (idx > 2) {
476
78.8k
      if (!cutoff_ref) {
477
        // If the predictive coding gains are smaller than the previous more
478
        // relevant frame over certain amount, discard this frame and all the
479
        // frames afterwards.
480
26.2k
        if (llabs(inter_cost[rank_index[idx]]) <
481
26.2k
                llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
482
26.3k
            inter_cost[rank_index[idx]] == 0)
483
26.2k
          cutoff_ref = 1;
484
26.2k
      }
485
486
78.8k
      if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
487
78.8k
    }
488
157k
  }
489
26.3k
}
490
491
static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
492
0
                                           int mi_row, int mi_col) {
493
0
  const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
494
0
  const int orig_rdmult = cpi->rd.RDMULT;
495
496
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
497
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
498
0
  const int gf_group_index = cpi->gf_frame_index;
499
0
  if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
500
0
      cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
501
0
      cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
502
0
    const int dr =
503
0
        av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
504
0
    x->rdmult = dr;
505
0
  }
506
0
}
507
#endif  // !CONFIG_REALTIME_ONLY
508
509
#if CONFIG_RT_ML_PARTITIONING
510
// Get a prediction(stored in x->est_pred) for the whole superblock.
511
static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
512
                               MACROBLOCK *x, int mi_row, int mi_col) {
513
  AV1_COMMON *const cm = &cpi->common;
514
  const int is_key_frame = frame_is_intra_only(cm);
515
  MACROBLOCKD *xd = &x->e_mbd;
516
517
  // TODO(kyslov) Extend to 128x128
518
  assert(cm->seq_params->sb_size == BLOCK_64X64);
519
520
  av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
521
522
  if (!is_key_frame) {
523
    MB_MODE_INFO *mi = xd->mi[0];
524
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
525
526
    assert(yv12 != NULL);
527
528
    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
529
                         get_ref_scale_factors(cm, LAST_FRAME), 1);
530
    mi->ref_frame[0] = LAST_FRAME;
531
    mi->ref_frame[1] = NONE;
532
    mi->bsize = BLOCK_64X64;
533
    mi->mv[0].as_int = 0;
534
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
535
536
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
537
538
    xd->plane[0].dst.buf = x->est_pred;
539
    xd->plane[0].dst.stride = 64;
540
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
541
  } else {
542
#if CONFIG_AV1_HIGHBITDEPTH
543
    switch (xd->bd) {
544
      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
545
      case 10:
546
        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
547
        break;
548
      case 12:
549
        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
550
        break;
551
    }
552
#else
553
    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
554
#endif  // CONFIG_VP9_HIGHBITDEPTH
555
  }
556
}
557
#endif  // CONFIG_RT_ML_PARTITIONING
558
559
6.49k
#define AVG_CDF_WEIGHT_LEFT 3
560
6.49k
#define AVG_CDF_WEIGHT_TOP_RIGHT 1
561
562
/*!\brief Encode a superblock (minimal RD search involved)
563
 *
564
 * \ingroup partition_search
565
 * Encodes the superblock by a pre-determined partition pattern, only minor
566
 * rd-based searches are allowed to adjust the initial pattern. It is only used
567
 * by realtime encoding.
568
 */
569
static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
570
                                   TileDataEnc *tile_data, TokenExtra **tp,
571
                                   const int mi_row, const int mi_col,
572
121k
                                   const int seg_skip) {
573
121k
  AV1_COMMON *const cm = &cpi->common;
574
121k
  MACROBLOCK *const x = &td->mb;
575
121k
  const SPEED_FEATURES *const sf = &cpi->sf;
576
121k
  const TileInfo *const tile_info = &tile_data->tile_info;
577
121k
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
578
121k
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
579
121k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
580
121k
  PC_TREE *const pc_root = td->pc_root;
581
582
121k
#if !CONFIG_REALTIME_ONLY
583
121k
  if (cm->delta_q_info.delta_q_present_flag) {
584
0
    const int num_planes = av1_num_planes(cm);
585
586
0
    setup_delta_q_nonrd(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
587
0
  }
588
121k
#endif
589
#if CONFIG_RT_ML_PARTITIONING
590
  if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
591
    RD_STATS dummy_rdc;
592
    get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
593
    av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
594
                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
595
    return;
596
  }
597
#endif
598
  // Set the partition
599
121k
  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
600
121k
      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
601
0
       (!frame_is_intra_only(cm) &&
602
0
        (!cpi->ppi->use_svc ||
603
0
         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
604
    // set a fixed-size partition
605
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
606
0
    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
607
0
    if (sf->rt_sf.use_fast_fixed_part &&
608
0
        x->content_state_sb.source_sad_nonrd < kLowSad) {
609
0
      bsize_select = cm->seq_params->sb_size;
610
0
    }
611
0
    if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
612
0
        cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
613
0
      bsize_select = cm->seq_params->sb_size;
614
0
      x->force_zeromv_skip_for_sb = 1;
615
0
    }
616
0
    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
617
0
    if (x->content_state_sb.source_sad_nonrd > kZeroSad)
618
0
      x->force_color_check_block_level = 1;
619
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
620
121k
  } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
621
    // set a variance-based partition
622
121k
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
623
121k
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
624
121k
  }
625
121k
  assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
626
121k
         sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
627
121k
  set_cb_offsets(td->mb.cb_offset, 0, 0);
628
629
  // Initialize the flag to skip cdef to 1.
630
121k
  if (sf->rt_sf.skip_cdef_sb) {
631
0
    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
632
    // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
633
    // "blocks".
634
0
    for (int r = 0; r < block64_in_sb; ++r) {
635
0
      for (int c = 0; c < block64_in_sb; ++c) {
636
0
        const int idx_in_sb =
637
0
            r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
638
0
        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
639
0
      }
640
0
    }
641
0
  }
642
643
#if CONFIG_COLLECT_COMPONENT_TIMING
644
  start_timing(cpi, nonrd_use_partition_time);
645
#endif
646
121k
  av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
647
121k
                          pc_root);
648
#if CONFIG_COLLECT_COMPONENT_TIMING
649
  end_timing(cpi, nonrd_use_partition_time);
650
#endif
651
121k
}
652
653
// This function initializes the stats for encode_rd_sb.
654
static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
655
                                     const TileDataEnc *tile_data,
656
                                     SIMPLE_MOTION_DATA_TREE *sms_root,
657
                                     RD_STATS *rd_cost, int mi_row, int mi_col,
658
227k
                                     int gather_tpl_data) {
659
227k
  const AV1_COMMON *cm = &cpi->common;
660
227k
  const TileInfo *tile_info = &tile_data->tile_info;
661
227k
  MACROBLOCK *x = &td->mb;
662
663
227k
  const SPEED_FEATURES *sf = &cpi->sf;
664
227k
  const int use_simple_motion_search =
665
227k
      (sf->part_sf.simple_motion_search_split ||
666
0
       sf->part_sf.simple_motion_search_prune_rect ||
667
0
       sf->part_sf.simple_motion_search_early_term_none ||
668
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
669
227k
      !frame_is_intra_only(cm);
670
227k
  if (use_simple_motion_search) {
671
42.4k
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
672
42.4k
                                             mi_row, mi_col);
673
42.4k
  }
674
675
227k
#if !CONFIG_REALTIME_ONLY
676
227k
  if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
677
227k
        cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
678
227k
    init_ref_frame_space(cpi, td, mi_row, mi_col);
679
227k
    x->sb_energy_level = 0;
680
227k
    x->part_search_info.cnn_output_valid = 0;
681
227k
    if (gather_tpl_data) {
682
227k
      if (cm->delta_q_info.delta_q_present_flag) {
683
2.27k
        const int num_planes = av1_num_planes(cm);
684
2.27k
        const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
685
2.27k
        setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
686
2.27k
        av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
687
2.27k
      }
688
689
      // TODO(jingning): revisit this function.
690
227k
      if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
691
0
        adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
692
0
      }
693
227k
    }
694
227k
  }
695
#else
696
  (void)tile_info;
697
  (void)mi_row;
698
  (void)mi_col;
699
  (void)gather_tpl_data;
700
#endif
701
702
227k
  x->reuse_inter_pred = false;
703
227k
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
704
227k
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
705
227k
  av1_zero(x->picked_ref_frames_mask);
706
227k
  av1_invalid_rd_stats(rd_cost);
707
227k
}
708
709
#if !CONFIG_REALTIME_ONLY
710
static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
711
                                        const TileDataEnc *tile_data,
712
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
713
                                        RD_STATS *rd_cost, int mi_row,
714
0
                                        int mi_col, int delta_qp_ofs) {
715
0
  AV1_COMMON *const cm = &cpi->common;
716
0
  MACROBLOCK *const x = &td->mb;
717
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
718
0
  const TileInfo *tile_info = &tile_data->tile_info;
719
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
720
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
721
0
  assert(delta_q_info->delta_q_present_flag);
722
0
  const int delta_q_res = delta_q_info->delta_q_res;
723
724
0
  const SPEED_FEATURES *sf = &cpi->sf;
725
0
  const int use_simple_motion_search =
726
0
      (sf->part_sf.simple_motion_search_split ||
727
0
       sf->part_sf.simple_motion_search_prune_rect ||
728
0
       sf->part_sf.simple_motion_search_early_term_none ||
729
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
730
0
      !frame_is_intra_only(cm);
731
0
  if (use_simple_motion_search) {
732
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
733
0
                                             mi_row, mi_col);
734
0
  }
735
736
0
  int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
737
738
0
  MACROBLOCKD *const xd = &x->e_mbd;
739
0
  current_qindex = av1_adjust_q_from_delta_q_res(
740
0
      delta_q_res, xd->current_base_qindex, current_qindex);
741
742
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
743
744
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
745
0
  xd->mi[0]->current_qindex = current_qindex;
746
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
747
748
  // keep track of any non-zero delta-q used
749
0
  td->deltaq_used |= (x->delta_qindex != 0);
750
751
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
752
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
753
0
    const int lfmask = ~(delta_lf_res - 1);
754
0
    const int delta_lf_from_base =
755
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
756
0
    const int8_t delta_lf =
757
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
758
0
    const int frame_lf_count =
759
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
760
0
    const int mib_size = cm->seq_params->mib_size;
761
762
    // pre-set the delta lf for loop filter. Note that this value is set
763
    // before mi is assigned for each block in current superblock
764
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
765
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
766
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
767
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
768
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
769
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
770
0
        }
771
0
      }
772
0
    }
773
0
  }
774
775
0
  x->reuse_inter_pred = false;
776
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
777
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
778
0
  av1_zero(x->picked_ref_frames_mask);
779
0
  av1_invalid_rd_stats(rd_cost);
780
0
}
781
782
static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
783
                       TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
784
                       int mi_col, BLOCK_SIZE bsize,
785
                       SIMPLE_MOTION_DATA_TREE *sms_tree,
786
0
                       SB_FIRST_PASS_STATS *sb_org_stats) {
787
0
  AV1_COMMON *const cm = &cpi->common;
788
0
  MACROBLOCK *const x = &td->mb;
789
0
  RD_STATS rdc_winner, cur_rdc;
790
0
  av1_invalid_rd_stats(&rdc_winner);
791
792
0
  int best_qindex = td->mb.rdmult_delta_qindex;
793
0
  const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
794
0
  const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
795
0
  const int step = cm->delta_q_info.delta_q_res;
796
797
0
  for (int sweep_qp_delta = start; sweep_qp_delta <= end;
798
0
       sweep_qp_delta += step) {
799
0
    sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
800
0
                                mi_col, sweep_qp_delta);
801
802
0
    const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
803
0
    const int backup_current_qindex =
804
0
        cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
805
806
0
    av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
807
0
    av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
808
0
    cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
809
810
0
    td->pc_root = av1_alloc_pc_tree_node(bsize);
811
0
    if (!td->pc_root)
812
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
813
0
                         "Failed to allocate PC_TREE");
814
0
    av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
815
0
                          &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
816
0
                          SB_DRY_PASS, NULL);
817
818
0
    if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
819
0
        (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
820
0
         rdc_winner.rdcost == cur_rdc.rdcost)) {
821
0
      rdc_winner = cur_rdc;
822
0
      best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
823
0
    }
824
0
  }
825
826
0
  return best_qindex;
827
0
}
828
#endif  //! CONFIG_REALTIME_ONLY
829
830
/*!\brief Encode a superblock (RD-search-based)
831
 *
832
 * \ingroup partition_search
833
 * Conducts partition search for a superblock, based on rate-distortion costs,
834
 * from scratch or adjusting from a pre-calculated partition pattern.
835
 */
836
static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
837
                                TileDataEnc *tile_data, TokenExtra **tp,
838
                                const int mi_row, const int mi_col,
839
227k
                                const int seg_skip) {
840
227k
  AV1_COMMON *const cm = &cpi->common;
841
227k
  MACROBLOCK *const x = &td->mb;
842
227k
  MACROBLOCKD *const xd = &x->e_mbd;
843
227k
  const SPEED_FEATURES *const sf = &cpi->sf;
844
227k
  const TileInfo *const tile_info = &tile_data->tile_info;
845
227k
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
846
227k
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
847
227k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
848
227k
  const int num_planes = av1_num_planes(cm);
849
227k
  int dummy_rate;
850
227k
  int64_t dummy_dist;
851
227k
  RD_STATS dummy_rdc;
852
227k
  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
853
854
#if CONFIG_REALTIME_ONLY
855
  (void)seg_skip;
856
#endif  // CONFIG_REALTIME_ONLY
857
858
227k
  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
859
227k
                    1);
860
861
  // Encode the superblock
862
227k
  if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
863
    // partition search starting from a variance-based partition
864
26.0k
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
865
26.0k
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
866
867
#if CONFIG_COLLECT_COMPONENT_TIMING
868
    start_timing(cpi, rd_use_partition_time);
869
#endif
870
26.0k
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
871
26.0k
    if (!td->pc_root)
872
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
873
0
                         "Failed to allocate PC_TREE");
874
26.0k
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
875
26.0k
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
876
26.0k
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
877
26.0k
                               sf->part_sf.partition_search_type);
878
26.0k
    td->pc_root = NULL;
879
#if CONFIG_COLLECT_COMPONENT_TIMING
880
    end_timing(cpi, rd_use_partition_time);
881
#endif
882
26.0k
  }
883
201k
#if !CONFIG_REALTIME_ONLY
884
201k
  else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
885
    // partition search by adjusting a fixed-size partition
886
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
887
0
    const BLOCK_SIZE bsize =
888
0
        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
889
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
890
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
891
0
    if (!td->pc_root)
892
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
893
0
                         "Failed to allocate PC_TREE");
894
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
895
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
896
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
897
0
                               sf->part_sf.partition_search_type);
898
0
    td->pc_root = NULL;
899
201k
  } else {
900
    // The most exhaustive recursive partition search
901
201k
    SuperBlockEnc *sb_enc = &x->sb_enc;
902
    // No stats for overlay frames. Exclude key frame.
903
201k
    av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
904
905
    // Reset the tree for simple motion search data
906
201k
    av1_reset_simple_motion_tree_partition(sms_root, sb_size);
907
908
#if CONFIG_COLLECT_COMPONENT_TIMING
909
    start_timing(cpi, rd_pick_partition_time);
910
#endif
911
912
    // Estimate the maximum square partition block size, which will be used
913
    // as the starting block size for partitioning the sb
914
201k
    set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
915
916
    // The superblock can be searched only once, or twice consecutively for
917
    // better quality. Note that the meaning of passes here is different from
918
    // the general concept of 1-pass/2-pass encoders.
919
201k
    const int num_passes =
920
201k
        cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
921
922
201k
    if (cpi->oxcf.sb_qp_sweep &&
923
0
        !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
924
0
          cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
925
0
        cm->delta_q_info.delta_q_present_flag) {
926
0
      AOM_CHECK_MEM_ERROR(
927
0
          x->e_mbd.error_info, td->mb.sb_stats_cache,
928
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
929
0
      av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
930
0
                          mi_col);
931
0
      assert(x->rdmult_delta_qindex == x->delta_qindex);
932
933
0
      const int best_qp_diff =
934
0
          sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
935
0
                      td->mb.sb_stats_cache) -
936
0
          x->rdmult_delta_qindex;
937
938
0
      sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
939
0
                                  mi_row, mi_col, best_qp_diff);
940
941
0
      const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
942
0
      const int backup_current_qindex =
943
0
          cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
944
945
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
946
0
      av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
947
0
                           mi_col);
948
949
0
      cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
950
0
          backup_current_qindex;
951
0
      aom_free(td->mb.sb_stats_cache);
952
0
      td->mb.sb_stats_cache = NULL;
953
0
    }
954
201k
    if (num_passes == 1) {
955
#if CONFIG_PARTITION_SEARCH_ORDER
956
      if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
957
        av1_reset_part_sf(&cpi->sf.part_sf);
958
        av1_reset_sf_for_ext_part(cpi);
959
        RD_STATS this_rdc;
960
        av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
961
                                mi_col, sb_size, &this_rdc);
962
      } else {
963
        td->pc_root = av1_alloc_pc_tree_node(sb_size);
964
        if (!td->pc_root)
965
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
966
                             "Failed to allocate PC_TREE");
967
        av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
968
                              &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
969
                              NULL, SB_SINGLE_PASS, NULL);
970
      }
971
#else
972
200k
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
973
200k
      if (!td->pc_root)
974
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
975
0
                           "Failed to allocate PC_TREE");
976
200k
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
977
200k
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
978
200k
                            SB_SINGLE_PASS, NULL);
979
200k
#endif  // CONFIG_PARTITION_SEARCH_ORDER
980
200k
    } else {
981
      // First pass
982
322
      AOM_CHECK_MEM_ERROR(
983
322
          x->e_mbd.error_info, td->mb.sb_fp_stats,
984
322
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
985
322
      av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
986
322
                          mi_col);
987
322
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
988
322
      if (!td->pc_root)
989
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
990
0
                           "Failed to allocate PC_TREE");
991
322
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
992
322
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
993
322
                            SB_DRY_PASS, NULL);
994
995
      // Second pass
996
322
      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
997
322
                        mi_col, 0);
998
322
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
999
322
      av1_reset_simple_motion_tree_partition(sms_root, sb_size);
1000
1001
322
      av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1002
322
                           mi_col);
1003
1004
322
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1005
322
      if (!td->pc_root)
1006
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1007
0
                           "Failed to allocate PC_TREE");
1008
322
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1009
322
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1010
322
                            SB_WET_PASS, NULL);
1011
322
      aom_free(td->mb.sb_fp_stats);
1012
322
      td->mb.sb_fp_stats = NULL;
1013
322
    }
1014
1015
    // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
1016
201k
    sb_enc->tpl_data_count = 0;
1017
#if CONFIG_COLLECT_COMPONENT_TIMING
1018
    end_timing(cpi, rd_pick_partition_time);
1019
#endif
1020
201k
  }
1021
227k
#endif  // !CONFIG_REALTIME_ONLY
1022
1023
  // Update the inter rd model
1024
  // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
1025
227k
  if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
1026
31.9k
      cm->tiles.cols == 1 && cm->tiles.rows == 1) {
1027
21.9k
    av1_inter_mode_data_fit(tile_data, x->rdmult);
1028
21.9k
  }
1029
227k
}
1030
1031
// Check if the cost update of symbols mode, coeff and dv are tile or off.
1032
static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
1033
424k
    const AV1_COMP *const cpi) {
1034
424k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
1035
1036
424k
  return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1037
63.2k
          inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1038
63.2k
          cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1039
424k
}
1040
1041
// When row-mt is enabled and cost update frequencies are set to off/tile,
1042
// processing of current SB can start even before processing of top-right SB
1043
// is finished. This function checks if it is sufficient to wait for top SB
1044
// to finish processing before current SB starts processing.
1045
663k
static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
1046
663k
  const MODE mode = cpi->oxcf.mode;
1047
663k
  if (mode == GOOD) return 0;
1048
1049
424k
  if (mode == ALLINTRA)
1050
324k
    return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
1051
99.6k
  else if (mode == REALTIME)
1052
99.6k
    return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
1053
0
            cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1054
5
  else
1055
5
    return 0;
1056
424k
}
1057
1058
/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1059
 *
1060
 * \ingroup partition_search
1061
 * \callgraph
1062
 * \callergraph
1063
 */
1064
static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1065
27.5k
                                         int mi_col) {
1066
27.5k
  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1067
1068
27.5k
  const AV1_COMMON *const cm = &cpi->common;
1069
27.5k
  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1070
27.5k
                                   ? (cm->seq_params->mib_size >> 1)
1071
27.5k
                                   : cm->seq_params->mib_size;
1072
27.5k
  const int num_blk_64x64_cols =
1073
27.5k
      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1074
27.5k
  const int num_blk_64x64_rows =
1075
27.5k
      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1076
27.5k
  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1077
27.5k
  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1078
27.5k
  uint64_t curr_sb_sad = UINT64_MAX;
1079
  // Avoid the border as sad_blk_64x64 may not be set for the border
1080
  // in the scene detection.
1081
27.5k
  if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1082
22.7k
      (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1083
22.7k
    return curr_sb_sad;
1084
22.7k
  }
1085
4.86k
  const uint64_t *const src_sad_blk_64x64_data =
1086
4.86k
      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1087
4.86k
                              blk_64x64_row_index * num_blk_64x64_cols];
1088
4.86k
  if (cm->seq_params->sb_size == BLOCK_128X128) {
1089
    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1090
    // superblock
1091
0
    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1092
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
1093
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1094
4.86k
  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1095
4.86k
    curr_sb_sad = src_sad_blk_64x64_data[0];
1096
4.86k
  }
1097
4.86k
  return curr_sb_sad;
1098
27.5k
}
1099
1100
/*!\brief Determine whether grading content can be skipped based on sad stat
1101
 *
1102
 * \ingroup partition_search
1103
 * \callgraph
1104
 * \callergraph
1105
 */
1106
static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1107
                                              MACROBLOCK *const x, int mi_row,
1108
27.5k
                                              int mi_col) {
1109
27.5k
  if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1110
0
    return true;
1111
27.5k
  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1112
27.5k
  if (curr_sb_sad == UINT64_MAX) return true;
1113
4.86k
  if (curr_sb_sad == 0) {
1114
0
    x->content_state_sb.source_sad_nonrd = kZeroSad;
1115
0
    return false;
1116
0
  }
1117
4.86k
  AV1_COMMON *const cm = &cpi->common;
1118
4.86k
  bool do_calc_src_content = true;
1119
1120
4.86k
  if (cpi->oxcf.speed < 9) return do_calc_src_content;
1121
1122
  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1123
1.26k
  if (AOMMIN(cm->width, cm->height) < 360) {
1124
    // Derive Average 64x64 block source SAD from SB source SAD
1125
1.26k
    const uint64_t avg_64x64_blk_sad =
1126
1.26k
        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1127
1.26k
                                                   : curr_sb_sad;
1128
1129
    // The threshold is determined based on kLowSad and kHighSad threshold and
1130
    // test results.
1131
1.26k
    uint64_t thresh_low = 15000;
1132
1.26k
    uint64_t thresh_high = 40000;
1133
1134
1.26k
    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1135
0
      thresh_low = thresh_low << 1;
1136
0
      thresh_high = thresh_high << 1;
1137
0
    }
1138
1139
1.26k
    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1140
0
      do_calc_src_content = false;
1141
      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1142
      // to RTC rd path.
1143
0
      x->content_state_sb.source_sad_nonrd = kMedSad;
1144
0
    }
1145
1.26k
  }
1146
1147
1.26k
  return do_calc_src_content;
1148
4.86k
}
1149
1150
/*!\brief Determine whether grading content is needed based on sf and frame stat
1151
 *
1152
 * \ingroup partition_search
1153
 * \callgraph
1154
 * \callergraph
1155
 */
1156
// TODO(any): consolidate sfs to make interface cleaner
1157
static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1158
                                           TileDataEnc *tile_data, int mi_row,
1159
348k
                                           int mi_col) {
1160
348k
  AV1_COMMON *const cm = &cpi->common;
1161
348k
  if (cm->current_frame.frame_type == KEY_FRAME ||
1162
70.1k
      (cpi->ppi->use_svc &&
1163
278k
       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1164
278k
    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1165
278k
    assert(x->content_state_sb.source_sad_rd == kMedSad);
1166
278k
    return;
1167
278k
  }
1168
348k
  bool calc_src_content = false;
1169
1170
70.1k
  if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1171
27.7k
    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1172
27.5k
      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1173
27.5k
    } else {
1174
111
      x->content_state_sb.source_sad_nonrd = kZeroSad;
1175
111
    }
1176
42.4k
  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1177
0
             (cm->width * cm->height <= 352 * 288)) {
1178
0
    if (cpi->rc.frame_source_sad > 0)
1179
0
      calc_src_content = true;
1180
0
    else
1181
0
      x->content_state_sb.source_sad_rd = kZeroSad;
1182
0
  }
1183
70.1k
  if (calc_src_content)
1184
27.5k
    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1185
70.1k
}
1186
1187
/*!\brief Encode a superblock row by breaking it into superblocks
1188
 *
1189
 * \ingroup partition_search
1190
 * \callgraph
1191
 * \callergraph
1192
 * Do partition and mode search for an sb row: one row of superblocks filling up
1193
 * the width of the current tile.
1194
 */
1195
static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1196
                                 TileDataEnc *tile_data, int mi_row,
1197
298k
                                 TokenExtra **tp) {
1198
298k
  AV1_COMMON *const cm = &cpi->common;
1199
298k
  const TileInfo *const tile_info = &tile_data->tile_info;
1200
298k
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1201
298k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1202
298k
  AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1203
298k
  bool row_mt_enabled = mt_info->row_mt_enabled;
1204
298k
  MACROBLOCK *const x = &td->mb;
1205
298k
  MACROBLOCKD *const xd = &x->e_mbd;
1206
298k
  const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1207
298k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1208
298k
  const int mib_size = cm->seq_params->mib_size;
1209
298k
  const int mib_size_log2 = cm->seq_params->mib_size_log2;
1210
298k
  const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1211
298k
  const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1212
1213
#if CONFIG_COLLECT_COMPONENT_TIMING
1214
  start_timing(cpi, encode_sb_row_time);
1215
#endif
1216
1217
  // Initialize the left context for the new SB row
1218
298k
  av1_zero_left_context(xd);
1219
1220
  // Reset delta for quantizer and loof filters at the beginning of every tile
1221
298k
  if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1222
292k
    if (cm->delta_q_info.delta_q_present_flag)
1223
1.85k
      xd->current_base_qindex = cm->quant_params.base_qindex;
1224
292k
    if (cm->delta_q_info.delta_lf_present_flag) {
1225
0
      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1226
0
    }
1227
292k
  }
1228
1229
298k
  reset_thresh_freq_fact(x);
1230
1231
  // Code each SB in the row
1232
298k
  for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1233
646k
       mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1234
    // In realtime/allintra mode and when frequency of cost updates is off/tile,
1235
    // wait for the top superblock to finish encoding. Otherwise, wait for the
1236
    // top-right superblock to finish encoding.
1237
348k
    enc_row_mt->sync_read_ptr(
1238
348k
        row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1239
1240
348k
#if CONFIG_MULTITHREAD
1241
348k
    if (row_mt_enabled) {
1242
266k
      pthread_mutex_lock(enc_row_mt->mutex_);
1243
266k
      const bool row_mt_exit = enc_row_mt->row_mt_exit;
1244
266k
      pthread_mutex_unlock(enc_row_mt->mutex_);
1245
      // Exit in case any worker has encountered an error.
1246
266k
      if (row_mt_exit) return;
1247
266k
    }
1248
348k
#endif
1249
1250
348k
    const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1251
348k
    if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1252
39.7k
      if ((tile_info->mi_col_start == mi_col)) {
1253
        // restore frame context at the 1st column sb
1254
33.2k
        *xd->tile_ctx = *x->row_ctx;
1255
33.2k
      } else {
1256
        // update context
1257
6.49k
        int wt_left = AVG_CDF_WEIGHT_LEFT;
1258
6.49k
        int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1259
6.49k
        if (tile_info->mi_col_end > (mi_col + mib_size))
1260
3.29k
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1261
3.29k
                              wt_left, wt_tr);
1262
3.20k
        else
1263
3.20k
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1264
3.20k
                              wt_left, wt_tr);
1265
6.49k
      }
1266
39.7k
    }
1267
1268
    // Update the rate cost tables for some symbols
1269
348k
    av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1270
1271
    // Reset color coding related parameters
1272
348k
    av1_zero(x->color_sensitivity_sb);
1273
348k
    av1_zero(x->color_sensitivity_sb_g);
1274
348k
    av1_zero(x->color_sensitivity_sb_alt);
1275
348k
    av1_zero(x->color_sensitivity);
1276
348k
    x->content_state_sb.source_sad_nonrd = kMedSad;
1277
348k
    x->content_state_sb.source_sad_rd = kMedSad;
1278
348k
    x->content_state_sb.lighting_change = 0;
1279
348k
    x->content_state_sb.low_sumdiff = 0;
1280
348k
    x->force_zeromv_skip_for_sb = 0;
1281
348k
    x->sb_me_block = 0;
1282
348k
    x->sb_me_partition = 0;
1283
348k
    x->sb_me_mv.as_int = 0;
1284
348k
    x->sb_col_scroll = 0;
1285
348k
    x->sb_row_scroll = 0;
1286
348k
    x->sb_force_fixed_part = 1;
1287
348k
    x->color_palette_thresh = 64;
1288
348k
    x->force_color_check_block_level = 0;
1289
348k
    x->nonrd_prune_ref_frame_search =
1290
348k
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1291
1292
348k
    if (cpi->oxcf.mode == ALLINTRA) {
1293
189k
      x->intra_sb_rdmult_modifier = 128;
1294
189k
    }
1295
1296
348k
    xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1297
348k
    x->source_variance = UINT_MAX;
1298
348k
    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1299
1300
    // Get segment id and skip flag
1301
348k
    const struct segmentation *const seg = &cm->seg;
1302
348k
    int seg_skip = 0;
1303
348k
    if (seg->enabled) {
1304
0
      const uint8_t *const map =
1305
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1306
0
      const uint8_t segment_id =
1307
0
          map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1308
0
              : 0;
1309
0
      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1310
0
    }
1311
1312
348k
    produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1313
1314
348k
    init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1315
348k
                                        sb_size);
1316
1317
    // Grade the temporal variation of the sb, the grade will be used to decide
1318
    // fast mode search strategy for coding blocks
1319
348k
    if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1320
1321
    // encode the superblock
1322
348k
    if (use_nonrd_mode) {
1323
121k
      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1324
227k
    } else {
1325
227k
      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1326
227k
    }
1327
1328
    // Update the top-right context in row_mt coding
1329
348k
    if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1330
39.7k
      if (sb_cols_in_tile == 1)
1331
30.0k
        x->row_ctx[0] = *xd->tile_ctx;
1332
9.69k
      else if (sb_col_in_tile >= 1)
1333
6.49k
        x->row_ctx[sb_col_in_tile - 1] = *xd->tile_ctx;
1334
39.7k
    }
1335
348k
    enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1336
348k
                               sb_cols_in_tile);
1337
348k
  }
1338
1339
#if CONFIG_COLLECT_COMPONENT_TIMING
1340
  end_timing(cpi, encode_sb_row_time);
1341
#endif
1342
298k
}
1343
1344
110k
static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1345
110k
  AV1_COMMON *const cm = &cpi->common;
1346
110k
  const int num_planes = av1_num_planes(cm);
1347
110k
  MACROBLOCK *const x = &cpi->td.mb;
1348
110k
  MACROBLOCKD *const xd = &x->e_mbd;
1349
1350
  // Copy data over into macro block data structures.
1351
110k
  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1352
110k
                       cm->seq_params->sb_size);
1353
1354
110k
  av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1355
110k
                         cm->seq_params->subsampling_y, num_planes);
1356
110k
}
1357
1358
75.1k
void av1_alloc_tile_data(AV1_COMP *cpi) {
1359
75.1k
  AV1_COMMON *const cm = &cpi->common;
1360
75.1k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1361
75.1k
  const int tile_cols = cm->tiles.cols;
1362
75.1k
  const int tile_rows = cm->tiles.rows;
1363
1364
75.1k
  av1_row_mt_mem_dealloc(cpi);
1365
1366
75.1k
  aom_free(cpi->tile_data);
1367
75.1k
  cpi->allocated_tiles = 0;
1368
75.1k
  enc_row_mt->allocated_tile_cols = 0;
1369
75.1k
  enc_row_mt->allocated_tile_rows = 0;
1370
1371
75.1k
  CHECK_MEM_ERROR(
1372
75.1k
      cm, cpi->tile_data,
1373
75.1k
      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1374
1375
75.1k
  cpi->allocated_tiles = tile_cols * tile_rows;
1376
75.1k
  enc_row_mt->allocated_tile_cols = tile_cols;
1377
75.1k
  enc_row_mt->allocated_tile_rows = tile_rows;
1378
190k
  for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1379
303k
    for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1380
188k
      const int tile_index = tile_row * tile_cols + tile_col;
1381
188k
      TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1382
188k
      av1_zero(this_tile->row_mt_sync);
1383
188k
      this_tile->row_ctx = NULL;
1384
188k
    }
1385
115k
  }
1386
75.1k
}
1387
1388
141k
void av1_init_tile_data(AV1_COMP *cpi) {
1389
141k
  AV1_COMMON *const cm = &cpi->common;
1390
141k
  const int num_planes = av1_num_planes(cm);
1391
141k
  const int tile_cols = cm->tiles.cols;
1392
141k
  const int tile_rows = cm->tiles.rows;
1393
141k
  int tile_col, tile_row;
1394
141k
  TokenInfo *const token_info = &cpi->token_info;
1395
141k
  TokenExtra *pre_tok = token_info->tile_tok[0][0];
1396
141k
  TokenList *tplist = token_info->tplist[0][0];
1397
141k
  unsigned int tile_tok = 0;
1398
141k
  int tplist_count = 0;
1399
1400
141k
  if (!is_stat_generation_stage(cpi) &&
1401
110k
      cm->features.allow_screen_content_tools) {
1402
    // Number of tokens for which token info needs to be allocated.
1403
0
    unsigned int tokens_required =
1404
0
        get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1405
0
                        MAX_SB_SIZE_LOG2, num_planes);
1406
    // Allocate/reallocate memory for token related info if the number of tokens
1407
    // required is more than the number of tokens already allocated. This could
1408
    // occur in case of the following:
1409
    // 1) If the memory is not yet allocated
1410
    // 2) If the frame dimensions have changed
1411
0
    const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1412
0
    if (realloc_tokens) {
1413
0
      free_token_info(token_info);
1414
0
      alloc_token_info(cm, token_info, tokens_required);
1415
0
      pre_tok = token_info->tile_tok[0][0];
1416
0
      tplist = token_info->tplist[0][0];
1417
0
    }
1418
0
  }
1419
1420
346k
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1421
520k
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1422
314k
      TileDataEnc *const tile_data =
1423
314k
          &cpi->tile_data[tile_row * tile_cols + tile_col];
1424
314k
      TileInfo *const tile_info = &tile_data->tile_info;
1425
314k
      av1_tile_init(tile_info, cm, tile_row, tile_col);
1426
314k
      tile_data->firstpass_top_mv = kZeroMv;
1427
314k
      tile_data->abs_sum_level = 0;
1428
1429
314k
      if (is_token_info_allocated(token_info)) {
1430
0
        token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1431
0
        pre_tok = token_info->tile_tok[tile_row][tile_col];
1432
0
        tile_tok = allocated_tokens(
1433
0
            tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1434
0
            num_planes);
1435
0
        token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1436
0
        tplist = token_info->tplist[tile_row][tile_col];
1437
0
        tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1438
0
      }
1439
314k
      tile_data->allow_update_cdf = !cm->tiles.large_scale;
1440
314k
      tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1441
314k
                                    !cm->features.disable_cdf_update &&
1442
314k
                                    !delay_wait_for_top_right_sb(cpi);
1443
314k
      tile_data->tctx = *cm->fc;
1444
314k
    }
1445
205k
  }
1446
141k
}
1447
1448
// Populate the start palette token info prior to encoding an SB row.
1449
static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1450
                                   int tile_row, int tile_col, int mi_row,
1451
298k
                                   TokenExtra **tp) {
1452
298k
  const TokenInfo *token_info = &cpi->token_info;
1453
298k
  if (!is_token_info_allocated(token_info)) return;
1454
1455
18.4E
  const AV1_COMMON *cm = &cpi->common;
1456
18.4E
  const int num_planes = av1_num_planes(cm);
1457
18.4E
  TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1458
18.4E
  const int sb_row_in_tile =
1459
18.4E
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1460
1461
18.4E
  get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1462
18.4E
                cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1463
18.4E
  assert(tplist != NULL);
1464
18.4E
  tplist[sb_row_in_tile].start = *tp;
1465
18.4E
}
1466
1467
// Populate the token count after encoding an SB row.
1468
static inline void populate_token_count(AV1_COMP *cpi,
1469
                                        const TileInfo *tile_info, int tile_row,
1470
                                        int tile_col, int mi_row,
1471
298k
                                        TokenExtra *tok) {
1472
298k
  const TokenInfo *token_info = &cpi->token_info;
1473
298k
  if (!is_token_info_allocated(token_info)) return;
1474
1475
27
  const AV1_COMMON *cm = &cpi->common;
1476
27
  const int num_planes = av1_num_planes(cm);
1477
27
  TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1478
27
  const int sb_row_in_tile =
1479
27
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1480
27
  const int tile_mb_cols =
1481
27
      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1482
27
  const int num_mb_rows_in_sb =
1483
27
      ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1484
27
  tplist[sb_row_in_tile].count =
1485
27
      (unsigned int)(tok - tplist[sb_row_in_tile].start);
1486
1487
27
  assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1488
27
         get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1489
27
                         cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1490
27
                         num_planes));
1491
1492
27
  (void)num_planes;
1493
27
  (void)tile_mb_cols;
1494
27
  (void)num_mb_rows_in_sb;
1495
27
}
1496
1497
/*!\brief Encode a superblock row
1498
 *
1499
 * \ingroup partition_search
1500
 */
1501
void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1502
298k
                       int tile_col, int mi_row) {
1503
298k
  AV1_COMMON *const cm = &cpi->common;
1504
298k
  const int tile_cols = cm->tiles.cols;
1505
298k
  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1506
298k
  const TileInfo *const tile_info = &this_tile->tile_info;
1507
298k
  TokenExtra *tok = NULL;
1508
1509
298k
  get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1510
1511
298k
  encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1512
1513
298k
  populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1514
298k
}
1515
1516
/*!\brief Encode a tile
1517
 *
1518
 * \ingroup partition_search
1519
 */
1520
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1521
58.0k
                     int tile_col) {
1522
58.0k
  AV1_COMMON *const cm = &cpi->common;
1523
58.0k
  TileDataEnc *const this_tile =
1524
58.0k
      &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1525
58.0k
  const TileInfo *const tile_info = &this_tile->tile_info;
1526
1527
58.0k
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1528
1529
58.0k
  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1530
58.0k
                         tile_info->mi_col_end, tile_row);
1531
58.0k
  av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1532
58.0k
                         &td->mb.e_mbd);
1533
1534
58.0k
#if !CONFIG_REALTIME_ONLY
1535
58.0k
  if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1536
58.0k
    cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1537
58.0k
#endif
1538
1539
58.0k
  if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1540
22.9k
    av1_crc32c_calculator_init(
1541
22.9k
        &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1542
22.9k
  }
1543
1544
121k
  for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1545
63.7k
       mi_row += cm->seq_params->mib_size) {
1546
63.7k
    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1547
63.7k
  }
1548
58.0k
  this_tile->abs_sum_level = td->abs_sum_level;
1549
58.0k
}
1550
1551
/*!\brief Break one frame into tiles and encode the tiles
1552
 *
1553
 * \ingroup partition_search
1554
 *
1555
 * \param[in]    cpi    Top-level encoder structure
1556
 */
1557
42.4k
static inline void encode_tiles(AV1_COMP *cpi) {
1558
42.4k
  AV1_COMMON *const cm = &cpi->common;
1559
42.4k
  const int tile_cols = cm->tiles.cols;
1560
42.4k
  const int tile_rows = cm->tiles.rows;
1561
42.4k
  int tile_col, tile_row;
1562
1563
42.4k
  MACROBLOCK *const mb = &cpi->td.mb;
1564
42.4k
  assert(IMPLIES(cpi->tile_data == NULL,
1565
42.4k
                 cpi->allocated_tiles < tile_cols * tile_rows));
1566
42.4k
  if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1567
1568
42.4k
  av1_init_tile_data(cpi);
1569
42.4k
  av1_alloc_mb_data(cpi, mb);
1570
1571
90.5k
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1572
106k
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1573
58.0k
      TileDataEnc *const this_tile =
1574
58.0k
          &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1575
58.0k
      cpi->td.intrabc_used = 0;
1576
58.0k
      cpi->td.deltaq_used = 0;
1577
58.0k
      cpi->td.abs_sum_level = 0;
1578
58.0k
      cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1579
58.0k
      cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1580
58.0k
      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1581
58.0k
      cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1582
58.0k
      av1_init_rtc_counters(&cpi->td.mb);
1583
58.0k
      cpi->td.mb.palette_pixels = 0;
1584
58.0k
      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1585
58.0k
      if (!frame_is_intra_only(&cpi->common))
1586
13.3k
        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1587
58.0k
      cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1588
58.0k
      cpi->intrabc_used |= cpi->td.intrabc_used;
1589
58.0k
      cpi->deltaq_used |= cpi->td.deltaq_used;
1590
58.0k
    }
1591
48.1k
  }
1592
1593
42.4k
  av1_dealloc_mb_data(mb, av1_num_planes(cm));
1594
42.4k
}
1595
1596
// Set the relative distance of a reference frame w.r.t. current frame
1597
static inline void set_rel_frame_dist(
1598
    const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1599
110k
    const int ref_frame_flags) {
1600
110k
  MV_REFERENCE_FRAME ref_frame;
1601
110k
  int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1602
110k
  ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1603
110k
  ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1604
883k
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1605
772k
    ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1606
772k
    if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1607
130k
      int dist = av1_encoder_get_relative_dist(
1608
130k
          cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1609
130k
          cm->current_frame.display_order_hint);
1610
130k
      ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1611
      // Get the nearest ref_frame in the past
1612
130k
      if (abs(dist) < min_past_dist && dist < 0) {
1613
33.0k
        ref_frame_dist_info->nearest_past_ref = ref_frame;
1614
33.0k
        min_past_dist = abs(dist);
1615
33.0k
      }
1616
      // Get the nearest ref_frame in the future
1617
130k
      if (dist < min_future_dist && dist > 0) {
1618
1.02k
        ref_frame_dist_info->nearest_future_ref = ref_frame;
1619
1.02k
        min_future_dist = dist;
1620
1.02k
      }
1621
130k
    }
1622
772k
  }
1623
110k
}
1624
1625
27.5k
static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1626
27.5k
  assert(!frame_is_intra_only(cm));
1627
1628
27.5k
  int one_sided_refs = 1;
1629
27.5k
  const int cur_display_order_hint = cm->current_frame.display_order_hint;
1630
220k
  for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1631
192k
    const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1632
192k
    if (buf == NULL) continue;
1633
192k
    if (av1_encoder_get_relative_dist(buf->display_order_hint,
1634
192k
                                      cur_display_order_hint) > 0) {
1635
0
      one_sided_refs = 0;  // bwd reference
1636
0
      break;
1637
0
    }
1638
192k
  }
1639
27.5k
  return one_sided_refs;
1640
27.5k
}
1641
1642
static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1643
10.5k
                                             int ref_order_hint[2]) {
1644
10.5k
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1645
10.5k
  ref_order_hint[0] = ref_order_hint[1] = 0;
1646
10.5k
  if (!skip_mode_info->skip_mode_allowed) return;
1647
1648
10.5k
  const RefCntBuffer *const buf_0 =
1649
10.5k
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1650
10.5k
  const RefCntBuffer *const buf_1 =
1651
10.5k
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1652
10.5k
  assert(buf_0 != NULL && buf_1 != NULL);
1653
1654
10.5k
  ref_order_hint[0] = buf_0->order_hint;
1655
10.5k
  ref_order_hint[1] = buf_1->order_hint;
1656
10.5k
}
1657
1658
110k
static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1659
110k
  AV1_COMMON *const cm = &cpi->common;
1660
1661
110k
  av1_setup_skip_mode_allowed(cm);
1662
110k
  if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1663
1664
  // Turn off skip mode if the temporal distances of the reference pair to the
1665
  // current frame are different by more than 1 frame.
1666
10.5k
  const int cur_offset = (int)cm->current_frame.order_hint;
1667
10.5k
  int ref_offset[2];
1668
10.5k
  get_skip_mode_ref_offsets(cm, ref_offset);
1669
10.5k
  const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1670
10.5k
                                            cur_offset, ref_offset[0]);
1671
10.5k
  const int cur_to_ref1 = abs(get_relative_dist(
1672
10.5k
      &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1673
10.5k
  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1674
1675
  // High Latency: Turn off skip mode if all refs are fwd.
1676
9.52k
  if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1677
1678
6.38k
  const int ref_frame[2] = {
1679
6.38k
    cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1680
6.38k
    cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1681
6.38k
  };
1682
6.38k
  if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1683
6.38k
      !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1684
1.86k
    return 0;
1685
1686
4.51k
  return 1;
1687
6.38k
}
1688
1689
static inline void set_default_interp_skip_flags(
1690
110k
    const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1691
110k
  const int num_planes = av1_num_planes(cm);
1692
110k
  interp_search_flags->default_interp_skip_flags =
1693
110k
      (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1694
110k
                        : INTERP_SKIP_LUMA_SKIP_CHROMA;
1695
110k
}
1696
1697
110k
static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1698
110k
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1699
110k
       cpi->sf.inter_sf.disable_onesided_comp) &&
1700
43.1k
      cpi->all_one_sided_refs) {
1701
    // Disable all compound references
1702
17.2k
    cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1703
93.1k
  } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1704
57.5k
             cpi->sf.inter_sf.selective_ref_frame >= 2) {
1705
25.8k
    AV1_COMMON *const cm = &cpi->common;
1706
25.8k
    const int cur_frame_display_order_hint =
1707
25.8k
        cm->current_frame.display_order_hint;
1708
25.8k
    unsigned int *ref_display_order_hint =
1709
25.8k
        cm->cur_frame->ref_display_order_hint;
1710
25.8k
    const int arf2_dist = av1_encoder_get_relative_dist(
1711
25.8k
        ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1712
25.8k
        cur_frame_display_order_hint);
1713
25.8k
    const int bwd_dist = av1_encoder_get_relative_dist(
1714
25.8k
        ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1715
25.8k
        cur_frame_display_order_hint);
1716
1717
569k
    for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1718
543k
      MV_REFERENCE_FRAME rf[2];
1719
543k
      av1_set_ref_frame(rf, ref_idx);
1720
543k
      if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1721
543k
          !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1722
543k
        continue;
1723
543k
      }
1724
1725
0
      if (!cpi->all_one_sided_refs) {
1726
0
        int ref_dist[2];
1727
0
        for (int i = 0; i < 2; ++i) {
1728
0
          ref_dist[i] = av1_encoder_get_relative_dist(
1729
0
              ref_display_order_hint[rf[i] - LAST_FRAME],
1730
0
              cur_frame_display_order_hint);
1731
0
        }
1732
1733
        // One-sided compound is used only when all reference frames are
1734
        // one-sided.
1735
0
        if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1736
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1737
0
        }
1738
0
      }
1739
1740
0
      if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1741
0
          (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1742
0
          (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1743
        // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1744
0
        if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1745
          // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1746
          // reference to the current frame than ALTREF2_FRAME
1747
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1748
0
        }
1749
0
      }
1750
0
    }
1751
25.8k
  }
1752
110k
}
1753
1754
92.5k
static int allow_deltaq_mode(AV1_COMP *cpi) {
1755
92.5k
#if !CONFIG_REALTIME_ONLY
1756
92.5k
  AV1_COMMON *const cm = &cpi->common;
1757
92.5k
  BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1758
92.5k
  int sbs_wide = mi_size_wide[sb_size];
1759
92.5k
  int sbs_high = mi_size_high[sb_size];
1760
1761
92.5k
  int64_t delta_rdcost = 0;
1762
257k
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1763
468k
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1764
303k
      int64_t this_delta_rdcost = 0;
1765
303k
      av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1766
303k
                                     mi_row, mi_col);
1767
303k
      delta_rdcost += this_delta_rdcost;
1768
303k
    }
1769
164k
  }
1770
92.5k
  return delta_rdcost < 0;
1771
#else
1772
  (void)cpi;
1773
  return 1;
1774
#endif  // !CONFIG_REALTIME_ONLY
1775
92.5k
}
1776
1777
0
#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1778
#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1779
1780
// Populates block level thresholds for force zeromv-skip decision
1781
110k
static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1782
110k
  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1783
1784
  // Threshold for forcing zeromv-skip decision is as below:
1785
  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1786
  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1787
  // allowing slightly higher error for smaller blocks.
1788
  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1789
  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1790
  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1791
  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1792
  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1793
  // small blocks, the same is clipped to 4.
1794
0
  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1795
0
  const int num_128x128_pix =
1796
0
      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1797
1798
0
  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1799
0
    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1800
1801
    // Calculate the threshold for zeromv-skip decision based on area of the
1802
    // partition
1803
0
    unsigned int thresh_exit_part_blk =
1804
0
        (unsigned int)(thresh_exit_128x128_part *
1805
0
                           sqrt((double)num_block_pix / num_128x128_pix) +
1806
0
                       0.5);
1807
0
    thresh_exit_part_blk = AOMMIN(
1808
0
        thresh_exit_part_blk,
1809
0
        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1810
0
    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1811
0
  }
1812
0
}
1813
1814
0
static void free_block_hash_buffers(uint32_t *block_hash_values[2]) {
1815
0
  for (int j = 0; j < 2; ++j) {
1816
0
    aom_free(block_hash_values[j]);
1817
0
  }
1818
0
}
1819
1820
/*!\brief Determines delta_q_res value for Variance Boost modulation.
1821
 */
1822
0
static int aom_get_variance_boost_delta_q_res(int qindex) {
1823
  // Signaling delta_q changes across superblocks comes with inherent syntax
1824
  // element overhead, which adds up to total payload size. This overhead
1825
  // becomes proportionally bigger the higher the base qindex (i.e. lower
1826
  // quality, smaller file size), so a balance needs to be struck.
1827
  // - Smaller delta_q_res: more granular delta_q control, more bits spent
1828
  // signaling deltas.
1829
  // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1830
  // deltas.
1831
  //
1832
  // At the same time, SB qindex fluctuations become larger the higher
1833
  // the base qindex (between lowest and highest-variance regions):
1834
  // - For QP 5: up to 8 qindexes
1835
  // - For QP 60: up to 52 qindexes
1836
  //
1837
  // With these factors in mind, it was found that the best strategy that
1838
  // maximizes quality per bitrate is by having very finely-grained delta_q
1839
  // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1840
  // and progressively making them coarser as base qindex increases (to reduce
1841
  // total signaling overhead).
1842
0
  int delta_q_res = 1;
1843
1844
0
  if (qindex >= 160) {
1845
0
    delta_q_res = 8;
1846
0
  } else if (qindex >= 120) {
1847
0
    delta_q_res = 4;
1848
0
  } else if (qindex >= 80) {
1849
0
    delta_q_res = 2;
1850
0
  } else {
1851
0
    delta_q_res = 1;
1852
0
  }
1853
1854
0
  return delta_q_res;
1855
0
}
1856
1857
#if !CONFIG_REALTIME_ONLY
1858
0
static float get_thresh_based_on_q(int qindex, int speed) {
1859
0
  const float min_threshold_arr[2] = { 0.06f, 0.09f };
1860
0
  const float max_threshold_arr[2] = { 0.10f, 0.13f };
1861
1862
0
  const float min_thresh = min_threshold_arr[speed >= 3];
1863
0
  const float max_thresh = max_threshold_arr[speed >= 3];
1864
0
  const float thresh = min_thresh + (max_thresh - min_thresh) *
1865
0
                                        ((float)MAXQ - (float)qindex) /
1866
0
                                        (float)(MAXQ - MINQ);
1867
0
  return thresh;
1868
0
}
1869
1870
0
static int get_mv_err(MV cur_mv, MV ref_mv) {
1871
0
  const MV diff = { cur_mv.row - ref_mv.row, cur_mv.col - ref_mv.col };
1872
0
  const MV abs_diff = { abs(diff.row), abs(diff.col) };
1873
0
  const int mv_err = (abs_diff.row + abs_diff.col);
1874
0
  return mv_err;
1875
0
}
1876
1877
0
static void check_mv_err_and_update(MV cur_mv, MV ref_mv, int *best_mv_err) {
1878
0
  const int mv_err = get_mv_err(cur_mv, ref_mv);
1879
0
  *best_mv_err = AOMMIN(mv_err, *best_mv_err);
1880
0
}
1881
1882
static int is_inside_frame_border(int mi_row, int mi_col, int row_offset,
1883
                                  int col_offset, int num_mi_rows,
1884
0
                                  int num_mi_cols) {
1885
0
  if (mi_row + row_offset < 0 || mi_row + row_offset >= num_mi_rows ||
1886
0
      mi_col + col_offset < 0 || mi_col + col_offset >= num_mi_cols)
1887
0
    return 0;
1888
1889
0
  return 1;
1890
0
}
1891
1892
// Compute the minimum MV error between current MV and spatial MV predictors.
1893
static int get_spatial_mvpred_err(AV1_COMMON *cm, TplParams *const tpl_data,
1894
                                  int tpl_idx, int mi_row, int mi_col,
1895
                                  int ref_idx, int_mv cur_mv, int allow_hp,
1896
0
                                  int is_integer) {
1897
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
1898
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
1899
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
1900
1901
0
  int mv_err = INT32_MAX;
1902
0
  const int step = 1 << block_mis_log2;
1903
0
  const int mv_pred_pos_in_mis[6][2] = {
1904
0
    { -step, 0 },     { 0, -step },     { -step, step },
1905
0
    { -step, -step }, { -2 * step, 0 }, { 0, -2 * step },
1906
0
  };
1907
1908
0
  for (int i = 0; i < 6; i++) {
1909
0
    int row_offset = mv_pred_pos_in_mis[i][0];
1910
0
    int col_offset = mv_pred_pos_in_mis[i][1];
1911
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
1912
0
                                tpl_frame->mi_rows, tpl_frame->mi_cols)) {
1913
0
      continue;
1914
0
    }
1915
1916
0
    const TplDepStats *tpl_stats =
1917
0
        &tpl_ptr[av1_tpl_ptr_pos(mi_row + row_offset, mi_col + col_offset,
1918
0
                                 tpl_frame->stride, block_mis_log2)];
1919
0
    int_mv this_refmv = tpl_stats->mv[ref_idx];
1920
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
1921
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
1922
0
  }
1923
1924
  // Check MV error w.r.t. Global MV / Zero MV
1925
0
  int_mv gm_mv = { 0 };
1926
0
  if (cm->global_motion[ref_idx + LAST_FRAME].wmtype > TRANSLATION) {
1927
0
    const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
1928
0
    gm_mv = gm_get_motion_vector(&cm->global_motion[ref_idx + LAST_FRAME],
1929
0
                                 allow_hp, bsize, mi_col, mi_row, is_integer);
1930
0
  }
1931
0
  check_mv_err_and_update(cur_mv.as_mv, gm_mv.as_mv, &mv_err);
1932
1933
0
  return mv_err;
1934
0
}
1935
1936
// Compute the minimum MV error between current MV and temporal MV predictors.
1937
static int get_temporal_mvpred_err(AV1_COMMON *cm, int mi_row, int mi_col,
1938
                                   int num_mi_rows, int num_mi_cols,
1939
                                   int ref_idx, int_mv cur_mv, int allow_hp,
1940
0
                                   int is_integer) {
1941
0
  const RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ref_idx + LAST_FRAME);
1942
0
  if (ref_buf == NULL) return INT32_MAX;
1943
0
  int cur_to_ref_dist =
1944
0
      get_relative_dist(&cm->seq_params->order_hint_info,
1945
0
                        cm->cur_frame->order_hint, ref_buf->order_hint);
1946
1947
0
  int mv_err = INT32_MAX;
1948
0
  const int mv_pred_pos_in_mis[7][2] = {
1949
0
    { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, { 4, -2 }, { 4, 4 }, { 2, 4 },
1950
0
  };
1951
1952
0
  for (int i = 0; i < 7; i++) {
1953
0
    int row_offset = mv_pred_pos_in_mis[i][0];
1954
0
    int col_offset = mv_pred_pos_in_mis[i][1];
1955
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
1956
0
                                num_mi_rows, num_mi_cols)) {
1957
0
      continue;
1958
0
    }
1959
0
    const TPL_MV_REF *ref_mvs =
1960
0
        cm->tpl_mvs +
1961
0
        ((mi_row + row_offset) >> 1) * (cm->mi_params.mi_stride >> 1) +
1962
0
        ((mi_col + col_offset) >> 1);
1963
0
    if (ref_mvs->mfmv0.as_int == INVALID_MV) continue;
1964
1965
0
    int_mv this_refmv;
1966
0
    av1_get_mv_projection(&this_refmv.as_mv, ref_mvs->mfmv0.as_mv,
1967
0
                          cur_to_ref_dist, ref_mvs->ref_frame_offset);
1968
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
1969
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
1970
0
  }
1971
1972
0
  return mv_err;
1973
0
}
1974
1975
// Determine whether to disable temporal MV prediction for the current frame
1976
// based on TPL and motion field data. Temporal MV prediction is disabled if the
1977
// reduction in MV error by including temporal MVs as MV predictors is small.
1978
110k
static void check_to_disable_ref_frame_mvs(AV1_COMP *cpi) {
1979
110k
  AV1_COMMON *cm = &cpi->common;
1980
110k
  if (!cm->features.allow_ref_frame_mvs || cpi->sf.hl_sf.ref_frame_mvs_lvl != 1)
1981
110k
    return;
1982
1983
0
  const int tpl_idx = cpi->gf_frame_index;
1984
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
1985
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
1986
1987
0
  const SUBPEL_FORCE_STOP tpl_subpel_precision =
1988
0
      cpi->sf.tpl_sf.subpel_force_stop;
1989
0
  const int allow_high_precision_mv = tpl_subpel_precision == EIGHTH_PEL &&
1990
0
                                      cm->features.allow_high_precision_mv;
1991
0
  const int force_integer_mv = tpl_subpel_precision == FULL_PEL ||
1992
0
                               cm->features.cur_frame_force_integer_mv;
1993
1994
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
1995
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
1996
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
1997
0
  const int step = 1 << block_mis_log2;
1998
1999
0
  uint64_t accum_spatial_mvpred_err = 0;
2000
0
  uint64_t accum_best_err = 0;
2001
2002
0
  for (int mi_row = 0; mi_row < tpl_frame->mi_rows; mi_row += step) {
2003
0
    for (int mi_col = 0; mi_col < tpl_frame->mi_cols; mi_col += step) {
2004
0
      TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos(
2005
0
          mi_row, mi_col, tpl_frame->stride, block_mis_log2)];
2006
0
      const int cur_best_ref_idx = tpl_stats_ptr->ref_frame_index[0];
2007
0
      if (cur_best_ref_idx == NONE_FRAME) continue;
2008
2009
0
      int_mv cur_mv = tpl_stats_ptr->mv[cur_best_ref_idx];
2010
0
      lower_mv_precision(&cur_mv.as_mv, allow_high_precision_mv,
2011
0
                         force_integer_mv);
2012
2013
0
      const int cur_spatial_mvpred_err = get_spatial_mvpred_err(
2014
0
          cm, tpl_data, tpl_idx, mi_row, mi_col, cur_best_ref_idx, cur_mv,
2015
0
          allow_high_precision_mv, force_integer_mv);
2016
2017
0
      const int cur_temporal_mvpred_err = get_temporal_mvpred_err(
2018
0
          cm, mi_row, mi_col, tpl_frame->mi_rows, tpl_frame->mi_cols,
2019
0
          cur_best_ref_idx, cur_mv, allow_high_precision_mv, force_integer_mv);
2020
2021
0
      const int cur_best_err =
2022
0
          AOMMIN(cur_spatial_mvpred_err, cur_temporal_mvpred_err);
2023
0
      accum_spatial_mvpred_err += cur_spatial_mvpred_err;
2024
0
      accum_best_err += cur_best_err;
2025
0
    }
2026
0
  }
2027
2028
0
  const float threshold =
2029
0
      get_thresh_based_on_q(cm->quant_params.base_qindex, cpi->oxcf.speed);
2030
0
  const float mv_err_reduction =
2031
0
      (float)(accum_spatial_mvpred_err - accum_best_err);
2032
2033
0
  if (mv_err_reduction <= threshold * accum_spatial_mvpred_err)
2034
0
    cm->features.allow_ref_frame_mvs = 0;
2035
0
}
2036
#endif  // !CONFIG_REALTIME_ONLY
2037
2038
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
2039
 * for a single frame
2040
 *
2041
 * \ingroup high_level_algo
2042
 */
2043
110k
static inline void encode_frame_internal(AV1_COMP *cpi) {
2044
110k
  ThreadData *const td = &cpi->td;
2045
110k
  MACROBLOCK *const x = &td->mb;
2046
110k
  AV1_COMMON *const cm = &cpi->common;
2047
110k
  CommonModeInfoParams *const mi_params = &cm->mi_params;
2048
110k
  FeatureFlags *const features = &cm->features;
2049
110k
  MACROBLOCKD *const xd = &x->e_mbd;
2050
110k
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2051
#if CONFIG_FPMT_TEST
2052
  FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
2053
  FrameProbInfo *const temp_frame_probs_simulation =
2054
      &cpi->ppi->temp_frame_probs_simulation;
2055
#endif
2056
110k
  FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
2057
110k
  IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
2058
110k
  MultiThreadInfo *const mt_info = &cpi->mt_info;
2059
110k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
2060
110k
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2061
110k
  const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
2062
110k
  int i;
2063
2064
110k
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
2065
74.7k
    mi_params->setup_mi(mi_params);
2066
74.7k
  }
2067
2068
110k
  set_mi_offsets(mi_params, xd, 0, 0);
2069
2070
110k
  av1_zero(*td->counts);
2071
110k
  av1_zero(rdc->tx_type_used);
2072
110k
  av1_zero(rdc->obmc_used);
2073
110k
  av1_zero(rdc->warped_used);
2074
110k
  av1_zero(rdc->seg_tmp_pred_cost);
2075
2076
  // Reset the flag.
2077
110k
  cpi->intrabc_used = 0;
2078
  // Need to disable intrabc when superres is selected
2079
110k
  if (av1_superres_scaled(cm)) {
2080
0
    features->allow_intrabc = 0;
2081
0
  }
2082
2083
110k
  features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
2084
2085
110k
  if (features->allow_warped_motion &&
2086
27.5k
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2087
10.3k
    const FRAME_UPDATE_TYPE update_type =
2088
10.3k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2089
10.3k
    int warped_probability =
2090
#if CONFIG_FPMT_TEST
2091
        cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
2092
            ? temp_frame_probs->warped_probs[update_type]
2093
            :
2094
#endif  // CONFIG_FPMT_TEST
2095
10.3k
            frame_probs->warped_probs[update_type];
2096
10.3k
    if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
2097
0
      features->allow_warped_motion = 0;
2098
10.3k
  }
2099
2100
110k
  int hash_table_created = 0;
2101
110k
  if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
2102
0
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
2103
    // TODO(any): move this outside of the recoding loop to avoid recalculating
2104
    // the hash table.
2105
    // add to hash table
2106
0
    const int pic_width = cpi->source->y_crop_width;
2107
0
    const int pic_height = cpi->source->y_crop_height;
2108
0
    uint32_t *block_hash_values[2] = { NULL };  // two buffers used ping-pong
2109
0
    bool error = false;
2110
2111
0
    for (int j = 0; j < 2; ++j) {
2112
0
      block_hash_values[j] = (uint32_t *)aom_malloc(
2113
0
          sizeof(*block_hash_values[j]) * pic_width * pic_height);
2114
0
      if (!block_hash_values[j]) {
2115
0
        error = true;
2116
0
        break;
2117
0
      }
2118
0
    }
2119
2120
0
    av1_hash_table_init(intrabc_hash_info);
2121
0
    if (error ||
2122
0
        !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
2123
0
      free_block_hash_buffers(block_hash_values);
2124
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2125
0
                         "Error allocating intrabc_hash_table and buffers");
2126
0
    }
2127
0
    hash_table_created = 1;
2128
0
    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0]);
2129
    // Hash data generated for screen contents is used for intraBC ME
2130
0
    const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
2131
0
    int max_sb_size = (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
2132
2133
0
    if (cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks) {
2134
0
      max_sb_size = AOMMIN(8, max_sb_size);
2135
0
    }
2136
2137
0
    int src_idx = 0;
2138
0
    for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
2139
0
      const int dst_idx = !src_idx;
2140
0
      av1_generate_block_hash_value(intrabc_hash_info, cpi->source, size,
2141
0
                                    block_hash_values[src_idx],
2142
0
                                    block_hash_values[dst_idx]);
2143
0
      if (size >= min_alloc_size &&
2144
0
          !av1_add_to_hash_map_by_row_with_precal_data(
2145
0
              &intrabc_hash_info->intrabc_hash_table,
2146
0
              block_hash_values[dst_idx], pic_width, pic_height, size)) {
2147
0
        error = true;
2148
0
        break;
2149
0
      }
2150
0
    }
2151
2152
0
    free_block_hash_buffers(block_hash_values);
2153
2154
0
    if (error) {
2155
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2156
0
                         "Error adding data to intrabc_hash_table");
2157
0
    }
2158
0
  }
2159
2160
110k
  const CommonQuantParams *quant_params = &cm->quant_params;
2161
993k
  for (i = 0; i < MAX_SEGMENTS; ++i) {
2162
883k
    const int qindex =
2163
883k
        cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
2164
883k
                        : quant_params->base_qindex;
2165
883k
    xd->lossless[i] =
2166
883k
        qindex == 0 && quant_params->y_dc_delta_q == 0 &&
2167
31.2k
        quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
2168
31.2k
        quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
2169
883k
    if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
2170
883k
    xd->qindex[i] = qindex;
2171
883k
    if (xd->lossless[i]) {
2172
31.2k
      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
2173
851k
    } else {
2174
851k
      cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
2175
851k
    }
2176
883k
  }
2177
110k
  features->coded_lossless = is_coded_lossless(cm, xd);
2178
110k
  features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
2179
2180
  // Fix delta q resolution for the moment
2181
2182
110k
  cm->delta_q_info.delta_q_res = 0;
2183
110k
  if (cpi->use_ducky_encode) {
2184
0
    cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
2185
110k
  } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ &&
2186
110k
             !cpi->roi.enabled) {
2187
110k
    if (deltaq_mode == DELTA_Q_OBJECTIVE)
2188
110k
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
2189
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
2190
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2191
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
2192
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2193
0
    else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
2194
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2195
0
    else if (deltaq_mode == DELTA_Q_HDR)
2196
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2197
0
    else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
2198
0
      cm->delta_q_info.delta_q_res =
2199
0
          aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
2200
    // Set delta_q_present_flag before it is used for the first time
2201
110k
    cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
2202
110k
    cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
2203
2204
    // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
2205
    // is used for ineligible frames. That effectively will turn off row_mt
2206
    // usage. Note objective delta_q and tpl eligible frames are only altref
2207
    // frames currently.
2208
110k
    const GF_GROUP *gf_group = &cpi->ppi->gf_group;
2209
110k
    if (cm->delta_q_info.delta_q_present_flag) {
2210
110k
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2211
110k
          gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
2212
17.8k
        cm->delta_q_info.delta_q_present_flag = 0;
2213
2214
110k
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2215
110k
          cm->delta_q_info.delta_q_present_flag) {
2216
92.5k
        cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
2217
92.5k
      }
2218
110k
    }
2219
2220
    // Reset delta_q_used flag
2221
110k
    cpi->deltaq_used = 0;
2222
2223
110k
    cm->delta_q_info.delta_lf_present_flag =
2224
110k
        cm->delta_q_info.delta_q_present_flag &&
2225
585
        oxcf->tool_cfg.enable_deltalf_mode;
2226
110k
    cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2227
2228
    // update delta_q_present_flag and delta_lf_present_flag based on
2229
    // base_qindex
2230
110k
    cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2231
110k
    cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2232
110k
  } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2233
0
             cpi->svc.number_temporal_layers == 1) {
2234
0
    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2235
0
    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2236
0
  }
2237
110k
  cpi->rc.cnt_zeromv = 0;
2238
2239
110k
  av1_frame_init_quantizer(cpi);
2240
110k
  init_encode_frame_mb_context(cpi);
2241
110k
  set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2242
2243
110k
  if (cm->prev_frame && cm->prev_frame->seg.enabled &&
2244
0
      cpi->svc.number_spatial_layers == 1)
2245
0
    cm->last_frame_seg_map = cm->prev_frame->seg_map;
2246
110k
  else
2247
110k
    cm->last_frame_seg_map = NULL;
2248
110k
  if (features->allow_intrabc || features->coded_lossless) {
2249
3.90k
    av1_set_default_ref_deltas(cm->lf.ref_deltas);
2250
3.90k
    av1_set_default_mode_deltas(cm->lf.mode_deltas);
2251
106k
  } else if (cm->prev_frame) {
2252
20.4k
    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2253
20.4k
    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2254
20.4k
  }
2255
110k
  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2256
110k
  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2257
2258
110k
  cpi->all_one_sided_refs =
2259
110k
      frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2260
2261
110k
  cpi->prune_ref_frame_mask = 0;
2262
  // Figure out which ref frames can be skipped at frame level.
2263
110k
  setup_prune_ref_frame_mask(cpi);
2264
2265
110k
  x->txfm_search_info.txb_split_count = 0;
2266
#if CONFIG_SPEED_STATS
2267
  x->txfm_search_info.tx_search_count = 0;
2268
#endif  // CONFIG_SPEED_STATS
2269
2270
110k
#if !CONFIG_REALTIME_ONLY
2271
#if CONFIG_COLLECT_COMPONENT_TIMING
2272
  start_timing(cpi, av1_compute_global_motion_time);
2273
#endif
2274
110k
  av1_compute_global_motion_facade(cpi);
2275
#if CONFIG_COLLECT_COMPONENT_TIMING
2276
  end_timing(cpi, av1_compute_global_motion_time);
2277
#endif
2278
110k
#endif  // !CONFIG_REALTIME_ONLY
2279
2280
#if CONFIG_COLLECT_COMPONENT_TIMING
2281
  start_timing(cpi, av1_setup_motion_field_time);
2282
#endif
2283
110k
  av1_calculate_ref_frame_side(cm);
2284
2285
110k
  features->allow_ref_frame_mvs &= !(cpi->sf.hl_sf.ref_frame_mvs_lvl == 2);
2286
110k
  if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2287
110k
#if !CONFIG_REALTIME_ONLY
2288
110k
  check_to_disable_ref_frame_mvs(cpi);
2289
110k
#endif  // !CONFIG_REALTIME_ONLY
2290
2291
#if CONFIG_COLLECT_COMPONENT_TIMING
2292
  end_timing(cpi, av1_setup_motion_field_time);
2293
#endif
2294
2295
110k
  cm->current_frame.skip_mode_info.skip_mode_flag =
2296
110k
      check_skip_mode_enabled(cpi);
2297
2298
  // Initialization of skip mode cost depends on the value of
2299
  // 'skip_mode_flag'. This initialization happens in the function
2300
  // av1_fill_mode_rates(), which is in turn called in
2301
  // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2302
  // has to be called after 'skip_mode_flag' is initialized.
2303
110k
  av1_initialize_rd_consts(cpi);
2304
110k
  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2305
110k
  populate_thresh_to_force_zeromv_skip(cpi);
2306
2307
110k
  enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2308
110k
  enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2309
110k
  mt_info->row_mt_enabled = 0;
2310
110k
  mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2311
110k
                                       cm->tiles.cols * cm->tiles.rows) > 1;
2312
2313
110k
  if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2314
67.9k
    mt_info->row_mt_enabled = 1;
2315
67.9k
    enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2316
67.9k
    enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2317
67.9k
    av1_encode_tiles_row_mt(cpi);
2318
67.9k
  } else {
2319
42.4k
    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2320
0
      av1_encode_tiles_mt(cpi);
2321
42.4k
    } else {
2322
      // Preallocate the pc_tree for realtime coding to reduce the cost of
2323
      // memory allocation.
2324
42.4k
      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2325
42.4k
      if (use_nonrd_mode) {
2326
13.6k
        td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2327
13.6k
        if (!td->pc_root)
2328
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2329
0
                             "Failed to allocate PC_TREE");
2330
28.8k
      } else {
2331
28.8k
        td->pc_root = NULL;
2332
28.8k
      }
2333
2334
42.4k
      encode_tiles(cpi);
2335
42.4k
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2336
42.4k
                                 cpi->sf.part_sf.partition_search_type);
2337
42.4k
      td->pc_root = NULL;
2338
42.4k
    }
2339
42.4k
  }
2340
2341
  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2342
110k
  if (features->allow_intrabc && !cpi->intrabc_used) {
2343
0
    features->allow_intrabc = 0;
2344
0
  }
2345
110k
  if (features->allow_intrabc) {
2346
0
    cm->delta_q_info.delta_lf_present_flag = 0;
2347
0
  }
2348
2349
110k
  if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2350
266
    cm->delta_q_info.delta_q_present_flag = 0;
2351
266
  }
2352
2353
  // Set the transform size appropriately before bitstream creation
2354
110k
  const MODE_EVAL_TYPE eval_type =
2355
110k
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2356
110k
          ? WINNER_MODE_EVAL
2357
110k
          : DEFAULT_EVAL;
2358
110k
  const TX_SIZE_SEARCH_METHOD tx_search_type =
2359
110k
      cpi->winner_mode_params.tx_size_search_methods[eval_type];
2360
110k
  assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2361
110k
  features->tx_mode = select_tx_mode(cm, tx_search_type);
2362
2363
  // Retain the frame level probability update conditions for parallel frames.
2364
  // These conditions will be consumed during postencode stage to update the
2365
  // probability.
2366
110k
  if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2367
0
    cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2368
0
        cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2369
0
    cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2370
0
        (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2371
0
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2372
0
    cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2373
0
        (features->allow_warped_motion &&
2374
0
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2375
0
    cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2376
0
        (cm->current_frame.frame_type != KEY_FRAME &&
2377
0
         cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2378
0
         features->interp_filter == SWITCHABLE);
2379
0
  }
2380
2381
110k
  if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2382
110k
      ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2383
110k
        INT_MAX) &&
2384
64.9k
       (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2385
43.3k
    const FRAME_UPDATE_TYPE update_type =
2386
43.3k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2387
866k
    for (i = 0; i < TX_SIZES_ALL; i++) {
2388
823k
      int sum = 0;
2389
823k
      int j;
2390
823k
      int left = MAX_TX_TYPE_PROB;
2391
2392
13.9M
      for (j = 0; j < TX_TYPES; j++)
2393
13.1M
        sum += cpi->td.rd_counts.tx_type_used[i][j];
2394
2395
13.9M
      for (j = TX_TYPES - 1; j >= 0; j--) {
2396
13.1M
        int update_txtype_frameprobs = 1;
2397
13.1M
        const int new_prob =
2398
13.1M
            sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2399
1.51M
                        cpi->td.rd_counts.tx_type_used[i][j] / sum)
2400
13.1M
                : (j ? 0 : MAX_TX_TYPE_PROB);
2401
#if CONFIG_FPMT_TEST
2402
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2403
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2404
              0) {
2405
            int prob =
2406
                (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2407
                 new_prob) >>
2408
                1;
2409
            left -= prob;
2410
            if (j == 0) prob += left;
2411
            temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2412
                prob;
2413
            // Copy temp_frame_probs_simulation to temp_frame_probs
2414
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2415
                 update_type_idx++) {
2416
              temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2417
                  temp_frame_probs_simulation
2418
                      ->tx_type_probs[update_type_idx][i][j];
2419
            }
2420
          }
2421
          update_txtype_frameprobs = 0;
2422
        }
2423
#endif  // CONFIG_FPMT_TEST
2424
        // Track the frame probabilities of parallel encode frames to update
2425
        // during postencode stage.
2426
13.1M
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2427
0
          update_txtype_frameprobs = 0;
2428
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2429
0
              .tx_type_probs[update_type][i][j] = new_prob;
2430
0
        }
2431
13.1M
        if (update_txtype_frameprobs) {
2432
13.1M
          int prob =
2433
13.1M
              (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2434
13.1M
          left -= prob;
2435
13.1M
          if (j == 0) prob += left;
2436
13.1M
          frame_probs->tx_type_probs[update_type][i][j] = prob;
2437
13.1M
        }
2438
13.1M
      }
2439
823k
    }
2440
43.3k
  }
2441
2442
110k
  if (cm->seg.enabled) {
2443
0
    cm->seg.temporal_update = 1;
2444
0
    if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2445
0
      cm->seg.temporal_update = 0;
2446
0
  }
2447
2448
110k
  if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2449
43.1k
      cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2450
0
    const FRAME_UPDATE_TYPE update_type =
2451
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2452
2453
0
    for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2454
0
      int sum = 0;
2455
0
      int update_obmc_frameprobs = 1;
2456
0
      for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2457
2458
0
      const int new_prob =
2459
0
          sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2460
#if CONFIG_FPMT_TEST
2461
      if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2462
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2463
          temp_frame_probs_simulation->obmc_probs[update_type][i] =
2464
              (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2465
               new_prob) >>
2466
              1;
2467
          // Copy temp_frame_probs_simulation to temp_frame_probs
2468
          for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2469
               update_type_idx++) {
2470
            temp_frame_probs->obmc_probs[update_type_idx][i] =
2471
                temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2472
          }
2473
        }
2474
        update_obmc_frameprobs = 0;
2475
      }
2476
#endif  // CONFIG_FPMT_TEST
2477
      // Track the frame probabilities of parallel encode frames to update
2478
      // during postencode stage.
2479
0
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2480
0
        update_obmc_frameprobs = 0;
2481
0
        cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2482
0
            new_prob;
2483
0
      }
2484
0
      if (update_obmc_frameprobs) {
2485
0
        frame_probs->obmc_probs[update_type][i] =
2486
0
            (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2487
0
      }
2488
0
    }
2489
0
  }
2490
2491
110k
  if (features->allow_warped_motion &&
2492
27.5k
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2493
10.3k
    const FRAME_UPDATE_TYPE update_type =
2494
10.3k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2495
10.3k
    int update_warp_frameprobs = 1;
2496
10.3k
    int sum = 0;
2497
31.0k
    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2498
10.3k
    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2499
#if CONFIG_FPMT_TEST
2500
    if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2501
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2502
        temp_frame_probs_simulation->warped_probs[update_type] =
2503
            (temp_frame_probs_simulation->warped_probs[update_type] +
2504
             new_prob) >>
2505
            1;
2506
        // Copy temp_frame_probs_simulation to temp_frame_probs
2507
        for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2508
             update_type_idx++) {
2509
          temp_frame_probs->warped_probs[update_type_idx] =
2510
              temp_frame_probs_simulation->warped_probs[update_type_idx];
2511
        }
2512
      }
2513
      update_warp_frameprobs = 0;
2514
    }
2515
#endif  // CONFIG_FPMT_TEST
2516
    // Track the frame probabilities of parallel encode frames to update
2517
    // during postencode stage.
2518
10.3k
    if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2519
0
      update_warp_frameprobs = 0;
2520
0
      cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2521
0
          new_prob;
2522
0
    }
2523
10.3k
    if (update_warp_frameprobs) {
2524
10.3k
      frame_probs->warped_probs[update_type] =
2525
10.3k
          (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2526
10.3k
    }
2527
10.3k
  }
2528
2529
110k
  if (cm->current_frame.frame_type != KEY_FRAME &&
2530
27.5k
      cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2531
17.2k
      features->interp_filter == SWITCHABLE) {
2532
17.2k
    const FRAME_UPDATE_TYPE update_type =
2533
17.2k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2534
2535
292k
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2536
275k
      int sum = 0;
2537
275k
      int j;
2538
275k
      int left = 1536;
2539
2540
1.10M
      for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2541
826k
        sum += cpi->td.counts->switchable_interp[i][j];
2542
826k
      }
2543
2544
1.10M
      for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2545
826k
        int update_interpfilter_frameprobs = 1;
2546
826k
        const int new_prob =
2547
826k
            sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2548
826k
                : (j ? 0 : 1536);
2549
#if CONFIG_FPMT_TEST
2550
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2551
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2552
              0) {
2553
            int prob = (temp_frame_probs_simulation
2554
                            ->switchable_interp_probs[update_type][i][j] +
2555
                        new_prob) >>
2556
                       1;
2557
            left -= prob;
2558
            if (j == 0) prob += left;
2559
            temp_frame_probs_simulation
2560
                ->switchable_interp_probs[update_type][i][j] = prob;
2561
            // Copy temp_frame_probs_simulation to temp_frame_probs
2562
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2563
                 update_type_idx++) {
2564
              temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2565
                  temp_frame_probs_simulation
2566
                      ->switchable_interp_probs[update_type_idx][i][j];
2567
            }
2568
          }
2569
          update_interpfilter_frameprobs = 0;
2570
        }
2571
#endif  // CONFIG_FPMT_TEST
2572
        // Track the frame probabilities of parallel encode frames to update
2573
        // during postencode stage.
2574
826k
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2575
0
          update_interpfilter_frameprobs = 0;
2576
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2577
0
              .switchable_interp_probs[update_type][i][j] = new_prob;
2578
0
        }
2579
826k
        if (update_interpfilter_frameprobs) {
2580
826k
          int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2581
826k
                      new_prob) >>
2582
826k
                     1;
2583
826k
          left -= prob;
2584
826k
          if (j == 0) prob += left;
2585
826k
          frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2586
826k
        }
2587
826k
      }
2588
275k
    }
2589
17.2k
  }
2590
110k
  if (hash_table_created) {
2591
0
    av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2592
0
  }
2593
110k
}
2594
2595
/*!\brief Setup reference frame buffers and encode a frame
2596
 *
2597
 * \ingroup high_level_algo
2598
 * \callgraph
2599
 * \callergraph
2600
 *
2601
 * \param[in]    cpi    Top-level encoder structure
2602
 */
2603
110k
void av1_encode_frame(AV1_COMP *cpi) {
2604
110k
  AV1_COMMON *const cm = &cpi->common;
2605
110k
  CurrentFrame *const current_frame = &cm->current_frame;
2606
110k
  FeatureFlags *const features = &cm->features;
2607
110k
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2608
110k
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2609
  // Indicates whether or not to use a default reduced set for ext-tx
2610
  // rather than the potential full set of 16 transforms
2611
110k
  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2612
2613
  // Make sure segment_id is no larger than last_active_segid.
2614
110k
  if (cm->seg.enabled && cm->seg.update_map) {
2615
0
    const int mi_rows = cm->mi_params.mi_rows;
2616
0
    const int mi_cols = cm->mi_params.mi_cols;
2617
0
    const int last_active_segid = cm->seg.last_active_segid;
2618
0
    uint8_t *map = cpi->enc_seg.map;
2619
0
    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2620
0
      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2621
0
        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2622
0
      }
2623
0
      map += mi_cols;
2624
0
    }
2625
0
  }
2626
2627
110k
  av1_setup_frame_buf_refs(cm);
2628
110k
  enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2629
110k
                         cm->cur_frame->ref_display_order_hint,
2630
110k
                         cm->current_frame.display_order_hint);
2631
110k
  set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2632
110k
                     cpi->ref_frame_flags);
2633
110k
  av1_setup_frame_sign_bias(cm);
2634
2635
  // If global motion is enabled, then every buffer which is used as either
2636
  // a source or a ref frame should have an image pyramid allocated.
2637
  // Check here so that issues can be caught early in debug mode
2638
#if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2639
  if (cpi->alloc_pyramid) {
2640
    assert(cpi->source->y_pyramid);
2641
    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2642
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2643
      if (buf != NULL) {
2644
        assert(buf->buf.y_pyramid);
2645
      }
2646
    }
2647
  }
2648
#endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2649
2650
#if CONFIG_MISMATCH_DEBUG
2651
  mismatch_reset_frame(av1_num_planes(cm));
2652
#endif
2653
2654
110k
  rdc->newmv_or_intra_blocks = 0;
2655
110k
  cpi->palette_pixel_num = 0;
2656
2657
110k
  if (cpi->sf.hl_sf.frame_parameter_update ||
2658
101k
      cpi->sf.rt_sf.use_comp_ref_nonrd) {
2659
101k
    if (frame_is_intra_only(cm))
2660
77.9k
      current_frame->reference_mode = SINGLE_REFERENCE;
2661
23.3k
    else
2662
23.3k
      current_frame->reference_mode = REFERENCE_MODE_SELECT;
2663
2664
101k
    features->interp_filter = SWITCHABLE;
2665
101k
    if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2666
2667
101k
    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2668
101k
        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2669
2670
101k
    rdc->compound_ref_used_flag = 0;
2671
101k
    rdc->skip_mode_used_flag = 0;
2672
2673
101k
    encode_frame_internal(cpi);
2674
2675
101k
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2676
      // Use a flag that includes 4x4 blocks
2677
23.3k
      if (rdc->compound_ref_used_flag == 0) {
2678
23.3k
        current_frame->reference_mode = SINGLE_REFERENCE;
2679
#if CONFIG_ENTROPY_STATS
2680
        av1_zero(cpi->td.counts->comp_inter);
2681
#endif  // CONFIG_ENTROPY_STATS
2682
23.3k
      }
2683
23.3k
    }
2684
    // Re-check on the skip mode status as reference mode may have been
2685
    // changed.
2686
101k
    SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2687
101k
    if (frame_is_intra_only(cm) ||
2688
101k
        current_frame->reference_mode == SINGLE_REFERENCE) {
2689
101k
      skip_mode_info->skip_mode_allowed = 0;
2690
101k
      skip_mode_info->skip_mode_flag = 0;
2691
101k
    }
2692
101k
    if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2693
0
      skip_mode_info->skip_mode_flag = 0;
2694
2695
101k
    if (!cm->tiles.large_scale) {
2696
101k
      if (features->tx_mode == TX_MODE_SELECT &&
2697
97.9k
          cpi->td.mb.txfm_search_info.txb_split_count == 0)
2698
49.7k
        features->tx_mode = TX_MODE_LARGEST;
2699
101k
    }
2700
101k
  } else {
2701
    // This is needed if real-time speed setting is changed on the fly
2702
    // from one using compound prediction to one using single reference.
2703
9.08k
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2704
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2705
9.08k
    encode_frame_internal(cpi);
2706
9.08k
  }
2707
110k
}