Coverage Report

Created: 2025-11-24 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/encodeframe.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <float.h>
14
#include <math.h>
15
#include <stdbool.h>
16
#include <stdio.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/binary_codes_writer.h"
24
#include "aom_ports/mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_util/aom_pthread.h"
27
#if CONFIG_MISMATCH_DEBUG
28
#include "aom_util/debug_util.h"
29
#endif  // CONFIG_MISMATCH_DEBUG
30
31
#include "av1/common/cfl.h"
32
#include "av1/common/common.h"
33
#include "av1/common/common_data.h"
34
#include "av1/common/entropy.h"
35
#include "av1/common/entropymode.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mv.h"
38
#include "av1/common/mvref_common.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconintra.h"
42
#include "av1/common/reconinter.h"
43
#include "av1/common/seg_common.h"
44
#include "av1/common/tile_common.h"
45
#include "av1/common/warped_motion.h"
46
47
#include "av1/encoder/allintra_vis.h"
48
#include "av1/encoder/aq_complexity.h"
49
#include "av1/encoder/aq_cyclicrefresh.h"
50
#include "av1/encoder/aq_variance.h"
51
#include "av1/encoder/av1_quantize.h"
52
#include "av1/encoder/global_motion_facade.h"
53
#include "av1/encoder/encodeframe.h"
54
#include "av1/encoder/encodeframe_utils.h"
55
#include "av1/encoder/encodemb.h"
56
#include "av1/encoder/encodemv.h"
57
#include "av1/encoder/encodetxb.h"
58
#include "av1/encoder/ethread.h"
59
#include "av1/encoder/extend.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/ml.h"
62
#include "av1/encoder/motion_search_facade.h"
63
#include "av1/encoder/partition_strategy.h"
64
#if !CONFIG_REALTIME_ONLY
65
#include "av1/encoder/partition_model_weights.h"
66
#endif
67
#include "av1/encoder/partition_search.h"
68
#include "av1/encoder/rd.h"
69
#include "av1/encoder/rdopt.h"
70
#include "av1/encoder/reconinter_enc.h"
71
#include "av1/encoder/segmentation.h"
72
#include "av1/encoder/tokenize.h"
73
#include "av1/encoder/tpl_model.h"
74
#include "av1/encoder/var_based_part.h"
75
76
#if CONFIG_TUNE_VMAF
77
#include "av1/encoder/tune_vmaf.h"
78
#endif
79
80
/*!\cond */
81
// This is used as a reference when computing the source variance for the
82
//  purposes of activity masking.
83
// Eventually this should be replaced by custom no-reference routines,
84
//  which will be faster.
85
static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94
  128, 128, 128, 128, 128, 128, 128, 128
95
};
96
97
#if CONFIG_AV1_HIGHBITDEPTH
98
static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
99
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
107
  128, 128, 128, 128, 128, 128, 128, 128
108
};
109
110
static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
126
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
127
};
128
129
static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
130
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
148
  128 * 16, 128 * 16
149
};
150
#endif  // CONFIG_AV1_HIGHBITDEPTH
151
/*!\endcond */
152
153
// For the given bit depth, returns a constant array used to assist the
154
// calculation of source block variance, which will then be used to decide
155
// adaptive quantizers.
156
24.7M
static const uint8_t *get_var_offs(int use_hbd, int bd) {
157
24.7M
#if CONFIG_AV1_HIGHBITDEPTH
158
24.7M
  if (use_hbd) {
159
3.32M
    assert(bd == 8 || bd == 10 || bd == 12);
160
3.32M
    const int off_index = (bd - 8) >> 1;
161
3.32M
    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
162
3.32M
                                                AV1_HIGH_VAR_OFFS_10,
163
3.32M
                                                AV1_HIGH_VAR_OFFS_12 };
164
3.32M
    return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
165
3.32M
  }
166
#else
167
  (void)use_hbd;
168
  (void)bd;
169
  assert(!use_hbd);
170
#endif
171
24.7M
  assert(bd == 8);
172
21.3M
  return AV1_VAR_OFFS;
173
24.7M
}
174
175
244k
void av1_init_rtc_counters(MACROBLOCK *const x) {
176
244k
  av1_init_cyclic_refresh_counters(x);
177
244k
  x->cnt_zeromv = 0;
178
244k
}
179
180
51.0k
void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
181
51.0k
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
182
0
    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
183
51.0k
  cpi->rc.cnt_zeromv += x->cnt_zeromv;
184
51.0k
  cpi->rc.num_col_blscroll_last_tl0 += x->sb_col_scroll;
185
51.0k
  cpi->rc.num_row_blscroll_last_tl0 += x->sb_row_scroll;
186
51.0k
}
187
188
unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
189
                                       const MACROBLOCKD *xd,
190
                                       const struct buf_2d *ref,
191
                                       BLOCK_SIZE bsize, int plane,
192
24.7M
                                       int use_hbd) {
193
24.7M
  const int subsampling_x = xd->plane[plane].subsampling_x;
194
24.7M
  const int subsampling_y = xd->plane[plane].subsampling_y;
195
24.7M
  const BLOCK_SIZE plane_bsize =
196
24.7M
      get_plane_block_size(bsize, subsampling_x, subsampling_y);
197
24.7M
  unsigned int sse;
198
24.7M
  const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
199
24.7M
      ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
200
24.7M
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
201
24.7M
}
202
203
unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
204
                                              const MACROBLOCKD *xd,
205
                                              const struct buf_2d *ref,
206
24.7M
                                              BLOCK_SIZE bsize, int plane) {
207
24.7M
  const int use_hbd = is_cur_buf_hbd(xd);
208
24.7M
  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
209
24.7M
}
210
211
void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
212
                          int mi_row, int mi_col, const int num_planes,
213
43.5M
                          BLOCK_SIZE bsize) {
214
  // Set current frame pointer.
215
43.5M
  x->e_mbd.cur_buf = src;
216
217
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
218
  // the static analysis warnings.
219
122M
  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
220
78.4M
    const int is_uv = i > 0;
221
78.4M
    setup_pred_plane(
222
78.4M
        &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
223
78.4M
        src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
224
78.4M
        x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
225
78.4M
  }
226
43.5M
}
227
228
#if !CONFIG_REALTIME_ONLY
229
/*!\brief Assigns different quantization parameters to each superblock
230
 * based on statistics relevant to the selected delta-q mode (variance).
231
 * This is the non-rd version.
232
 *
233
 * \param[in]     cpi         Top level encoder instance structure
234
 * \param[in,out] td          Thread data structure
235
 * \param[in,out] x           Superblock level data for this block.
236
 * \param[in]     tile_info   Tile information / identification
237
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
238
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
239
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
240
 *
241
 * \remark No return value but updates superblock and thread data
242
 * related to the q / q delta to be used.
243
 */
244
static inline void setup_delta_q_nonrd(AV1_COMP *const cpi, ThreadData *td,
245
                                       MACROBLOCK *const x,
246
                                       const TileInfo *const tile_info,
247
0
                                       int mi_row, int mi_col, int num_planes) {
248
0
  AV1_COMMON *const cm = &cpi->common;
249
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
250
0
  assert(delta_q_info->delta_q_present_flag);
251
252
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
253
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255
0
  const int delta_q_res = delta_q_info->delta_q_res;
256
0
  int current_qindex = cm->quant_params.base_qindex;
257
258
0
  if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
259
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
260
0
  }
261
262
0
  x->rdmult_cur_qindex = current_qindex;
263
0
  MACROBLOCKD *const xd = &x->e_mbd;
264
0
  current_qindex = av1_adjust_q_from_delta_q_res(
265
0
      delta_q_res, xd->current_base_qindex, current_qindex);
266
267
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
268
0
  x->rdmult_delta_qindex = x->delta_qindex;
269
270
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
271
0
  xd->mi[0]->current_qindex = current_qindex;
272
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
273
274
  // keep track of any non-zero delta-q used
275
0
  td->deltaq_used |= (x->delta_qindex != 0);
276
0
}
277
278
/*!\brief Assigns different quantization parameters to each superblock
279
 * based on statistics relevant to the selected delta-q mode (TPL weight,
280
 * variance, HDR, etc).
281
 *
282
 * \ingroup tpl_modelling
283
 *
284
 * \param[in]     cpi         Top level encoder instance structure
285
 * \param[in,out] td          Thread data structure
286
 * \param[in,out] x           Superblock level data for this block.
287
 * \param[in]     tile_info   Tile information / identification
288
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
289
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
290
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
291
 *
292
 * \remark No return value but updates superblock and thread data
293
 * related to the q / q delta to be used.
294
 */
295
static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
296
                                 MACROBLOCK *const x,
297
                                 const TileInfo *const tile_info, int mi_row,
298
2.24k
                                 int mi_col, int num_planes) {
299
2.24k
  AV1_COMMON *const cm = &cpi->common;
300
2.24k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
301
2.24k
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
302
2.24k
  assert(delta_q_info->delta_q_present_flag);
303
304
2.24k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
305
2.24k
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
306
307
2.24k
  const int delta_q_res = delta_q_info->delta_q_res;
308
2.24k
  int current_qindex = cm->quant_params.base_qindex;
309
2.24k
  if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
310
0
                                   DUCKY_ENCODE_FRAME_MODE_QINDEX) {
311
0
    const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
312
0
    const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
313
0
    const int sb_cols =
314
0
        CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
315
0
    const int sb_index = sb_row * sb_cols + sb_col;
316
0
    current_qindex =
317
0
        cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
318
2.24k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
319
0
    if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
320
0
      const int block_wavelet_energy_level =
321
0
          av1_block_wavelet_energy_level(cpi, x, sb_size);
322
0
      x->sb_energy_level = block_wavelet_energy_level;
323
0
      current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
324
0
          cpi, block_wavelet_energy_level);
325
0
    } else {
326
0
      const int block_var_level = av1_log_block_var(cpi, x, sb_size);
327
0
      x->sb_energy_level = block_var_level;
328
0
      current_qindex =
329
0
          av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
330
0
    }
331
2.24k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
332
2.24k
             cpi->oxcf.algo_cfg.enable_tpl_model) {
333
    // Setup deltaq based on tpl stats
334
2.24k
    current_qindex =
335
2.24k
        av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
336
2.24k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
337
0
    current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
338
1
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
339
0
    current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
340
1
  } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
341
0
    current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
342
1
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
343
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
344
0
  }
345
346
2.24k
  x->rdmult_cur_qindex = current_qindex;
347
2.24k
  MACROBLOCKD *const xd = &x->e_mbd;
348
2.24k
  const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
349
2.24k
      delta_q_res, xd->current_base_qindex, current_qindex);
350
2.24k
  if (cpi->use_ducky_encode) {
351
0
    assert(adjusted_qindex == current_qindex);
352
0
  }
353
2.24k
  current_qindex = adjusted_qindex;
354
355
2.24k
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
356
2.24k
  x->rdmult_delta_qindex = x->delta_qindex;
357
358
2.24k
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
359
2.24k
  xd->mi[0]->current_qindex = current_qindex;
360
2.24k
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
361
362
  // keep track of any non-zero delta-q used
363
2.24k
  td->deltaq_used |= (x->delta_qindex != 0);
364
365
2.24k
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
366
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
367
0
    const int lfmask = ~(delta_lf_res - 1);
368
0
    const int delta_lf_from_base =
369
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
370
0
    const int8_t delta_lf =
371
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
372
0
    const int frame_lf_count =
373
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
374
0
    const int mib_size = cm->seq_params->mib_size;
375
376
    // pre-set the delta lf for loop filter. Note that this value is set
377
    // before mi is assigned for each block in current superblock
378
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
379
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
380
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
381
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
382
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
383
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
384
0
        }
385
0
      }
386
0
    }
387
0
  }
388
2.24k
}
389
390
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
391
223k
                                 int mi_col) {
392
223k
  const AV1_COMMON *cm = &cpi->common;
393
223k
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
394
223k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
395
223k
  MACROBLOCK *x = &td->mb;
396
223k
  const int frame_idx = cpi->gf_frame_index;
397
223k
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
398
223k
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
399
400
223k
  av1_zero(x->tpl_keep_ref_frame);
401
402
223k
  if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
403
43.6k
  if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
404
26.1k
  if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
405
406
26.1k
  const int is_overlay =
407
26.1k
      cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
408
26.1k
  if (is_overlay) {
409
0
    memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
410
0
    return;
411
0
  }
412
413
26.1k
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
414
26.1k
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
415
26.1k
  const int tpl_stride = tpl_frame->stride;
416
26.1k
  int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
417
26.1k
  const int step = 1 << block_mis_log2;
418
26.1k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
419
420
26.1k
  const int mi_row_end =
421
26.1k
      AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
422
26.1k
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
423
26.1k
  const int mi_col_sr =
424
26.1k
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
425
26.1k
  const int mi_col_end_sr =
426
26.1k
      AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
427
26.1k
                                  cm->superres_scale_denominator),
428
26.1k
             mi_cols_sr);
429
26.1k
  const int row_step = step;
430
26.1k
  const int col_step_sr =
431
26.1k
      coded_to_superres_mi(step, cm->superres_scale_denominator);
432
90.1k
  for (int row = mi_row; row < mi_row_end; row += row_step) {
433
224k
    for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
434
160k
      const TplDepStats *this_stats =
435
160k
          &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
436
160k
      int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
437
      // Find the winner ref frame idx for the current block
438
160k
      int64_t best_inter_cost = this_stats->pred_error[0];
439
160k
      int best_rf_idx = 0;
440
1.12M
      for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
441
960k
        if ((this_stats->pred_error[idx] < best_inter_cost) &&
442
0
            (this_stats->pred_error[idx] != 0)) {
443
0
          best_inter_cost = this_stats->pred_error[idx];
444
0
          best_rf_idx = idx;
445
0
        }
446
960k
      }
447
      // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
448
      // LAST_FRAME.
449
160k
      tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
450
160k
                                    this_stats->pred_error[LAST_FRAME - 1];
451
452
1.12M
      for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
453
961k
        inter_cost[rf_idx] += tpl_pred_error[rf_idx];
454
160k
    }
455
63.9k
  }
456
457
26.1k
  int rank_index[INTER_REFS_PER_FRAME - 1];
458
182k
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
459
156k
    rank_index[idx] = idx + 1;
460
548k
    for (int i = idx; i > 0; --i) {
461
391k
      if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
462
0
        const int tmp = rank_index[i - 1];
463
0
        rank_index[i - 1] = rank_index[i];
464
0
        rank_index[i] = tmp;
465
0
      }
466
391k
    }
467
156k
  }
468
469
26.1k
  x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
470
26.1k
  x->tpl_keep_ref_frame[LAST_FRAME] = 1;
471
472
26.1k
  int cutoff_ref = 0;
473
182k
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
474
156k
    x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
475
156k
    if (idx > 2) {
476
78.3k
      if (!cutoff_ref) {
477
        // If the predictive coding gains are smaller than the previous more
478
        // relevant frame over certain amount, discard this frame and all the
479
        // frames afterwards.
480
26.1k
        if (llabs(inter_cost[rank_index[idx]]) <
481
26.1k
                llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
482
26.1k
            inter_cost[rank_index[idx]] == 0)
483
26.1k
          cutoff_ref = 1;
484
26.1k
      }
485
486
78.3k
      if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
487
78.3k
    }
488
156k
  }
489
26.1k
}
490
491
static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
492
0
                                           int mi_row, int mi_col) {
493
0
  const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
494
0
  const int orig_rdmult = cpi->rd.RDMULT;
495
496
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
497
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
498
0
  const int gf_group_index = cpi->gf_frame_index;
499
0
  if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
500
0
      cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
501
0
      cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
502
0
    const int dr =
503
0
        av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
504
0
    x->rdmult = dr;
505
0
  }
506
0
}
507
#endif  // !CONFIG_REALTIME_ONLY
508
509
#if CONFIG_RT_ML_PARTITIONING
510
// Get a prediction(stored in x->est_pred) for the whole superblock.
511
static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
512
                               MACROBLOCK *x, int mi_row, int mi_col) {
513
  AV1_COMMON *const cm = &cpi->common;
514
  const int is_key_frame = frame_is_intra_only(cm);
515
  MACROBLOCKD *xd = &x->e_mbd;
516
517
  // TODO(kyslov) Extend to 128x128
518
  assert(cm->seq_params->sb_size == BLOCK_64X64);
519
520
  av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
521
522
  if (!is_key_frame) {
523
    MB_MODE_INFO *mi = xd->mi[0];
524
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
525
526
    assert(yv12 != NULL);
527
528
    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
529
                         get_ref_scale_factors(cm, LAST_FRAME), 1);
530
    mi->ref_frame[0] = LAST_FRAME;
531
    mi->ref_frame[1] = NONE;
532
    mi->bsize = BLOCK_64X64;
533
    mi->mv[0].as_int = 0;
534
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
535
536
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
537
538
    xd->plane[0].dst.buf = x->est_pred;
539
    xd->plane[0].dst.stride = 64;
540
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
541
  } else {
542
#if CONFIG_AV1_HIGHBITDEPTH
543
    switch (xd->bd) {
544
      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
545
      case 10:
546
        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
547
        break;
548
      case 12:
549
        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
550
        break;
551
    }
552
#else
553
    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
554
#endif  // CONFIG_VP9_HIGHBITDEPTH
555
  }
556
}
557
#endif  // CONFIG_RT_ML_PARTITIONING
558
559
6.42k
#define AVG_CDF_WEIGHT_LEFT 3
560
6.42k
#define AVG_CDF_WEIGHT_TOP_RIGHT 1
561
562
/*!\brief Encode a superblock (minimal RD search involved)
563
 *
564
 * \ingroup partition_search
565
 * Encodes the superblock by a pre-determined partition pattern, only minor
566
 * rd-based searches are allowed to adjust the initial pattern. It is only used
567
 * by realtime encoding.
568
 */
569
static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
570
                                   TileDataEnc *tile_data, TokenExtra **tp,
571
                                   const int mi_row, const int mi_col,
572
118k
                                   const int seg_skip) {
573
118k
  AV1_COMMON *const cm = &cpi->common;
574
118k
  MACROBLOCK *const x = &td->mb;
575
118k
  const SPEED_FEATURES *const sf = &cpi->sf;
576
118k
  const TileInfo *const tile_info = &tile_data->tile_info;
577
118k
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
578
118k
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
579
118k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
580
118k
  PC_TREE *const pc_root = td->pc_root;
581
582
118k
#if !CONFIG_REALTIME_ONLY
583
118k
  if (cm->delta_q_info.delta_q_present_flag) {
584
0
    const int num_planes = av1_num_planes(cm);
585
586
0
    setup_delta_q_nonrd(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
587
0
  }
588
118k
#endif
589
#if CONFIG_RT_ML_PARTITIONING
590
  if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
591
    RD_STATS dummy_rdc;
592
    get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
593
    av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
594
                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
595
    return;
596
  }
597
#endif
598
  // Set the partition
599
118k
  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
600
118k
      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
601
0
       (!frame_is_intra_only(cm) &&
602
0
        (!cpi->ppi->use_svc ||
603
0
         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
604
    // set a fixed-size partition
605
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
606
0
    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
607
0
    if (sf->rt_sf.use_fast_fixed_part &&
608
0
        x->content_state_sb.source_sad_nonrd < kLowSad) {
609
0
      bsize_select = cm->seq_params->sb_size;
610
0
    }
611
0
    if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
612
0
        cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
613
0
      bsize_select = cm->seq_params->sb_size;
614
0
      x->force_zeromv_skip_for_sb = 1;
615
0
    }
616
0
    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
617
0
    if (x->content_state_sb.source_sad_nonrd > kZeroSad)
618
0
      x->force_color_check_block_level = 1;
619
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
620
118k
  } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
621
    // set a variance-based partition
622
118k
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
623
118k
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
624
118k
  }
625
118k
  assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
626
118k
         sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
627
118k
  set_cb_offsets(td->mb.cb_offset, 0, 0);
628
629
  // Initialize the flag to skip cdef to 1.
630
118k
  if (sf->rt_sf.skip_cdef_sb) {
631
0
    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
632
    // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
633
    // "blocks".
634
0
    for (int r = 0; r < block64_in_sb; ++r) {
635
0
      for (int c = 0; c < block64_in_sb; ++c) {
636
0
        const int idx_in_sb =
637
0
            r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
638
0
        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
639
0
      }
640
0
    }
641
0
  }
642
643
#if CONFIG_COLLECT_COMPONENT_TIMING
644
  start_timing(cpi, nonrd_use_partition_time);
645
#endif
646
118k
  av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
647
118k
                          pc_root);
648
#if CONFIG_COLLECT_COMPONENT_TIMING
649
  end_timing(cpi, nonrd_use_partition_time);
650
#endif
651
118k
}
652
653
// This function initializes the stats for encode_rd_sb.
654
static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
655
                                     const TileDataEnc *tile_data,
656
                                     SIMPLE_MOTION_DATA_TREE *sms_root,
657
                                     RD_STATS *rd_cost, int mi_row, int mi_col,
658
223k
                                     int gather_tpl_data) {
659
223k
  const AV1_COMMON *cm = &cpi->common;
660
223k
  const TileInfo *tile_info = &tile_data->tile_info;
661
223k
  MACROBLOCK *x = &td->mb;
662
663
223k
  const SPEED_FEATURES *sf = &cpi->sf;
664
223k
  const int use_simple_motion_search =
665
223k
      (sf->part_sf.simple_motion_search_split ||
666
0
       sf->part_sf.simple_motion_search_prune_rect ||
667
0
       sf->part_sf.simple_motion_search_early_term_none ||
668
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
669
223k
      !frame_is_intra_only(cm);
670
223k
  if (use_simple_motion_search) {
671
42.0k
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
672
42.0k
                                             mi_row, mi_col);
673
42.0k
  }
674
675
223k
#if !CONFIG_REALTIME_ONLY
676
223k
  if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
677
223k
        cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
678
223k
    init_ref_frame_space(cpi, td, mi_row, mi_col);
679
223k
    x->sb_energy_level = 0;
680
223k
    x->part_search_info.cnn_output_valid = 0;
681
223k
    if (gather_tpl_data) {
682
223k
      if (cm->delta_q_info.delta_q_present_flag) {
683
2.25k
        const int num_planes = av1_num_planes(cm);
684
2.25k
        const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
685
2.25k
        setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
686
2.25k
        av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
687
2.25k
      }
688
689
      // TODO(jingning): revisit this function.
690
223k
      if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
691
0
        adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
692
0
      }
693
223k
    }
694
223k
  }
695
#else
696
  (void)tile_info;
697
  (void)mi_row;
698
  (void)mi_col;
699
  (void)gather_tpl_data;
700
#endif
701
702
223k
  x->reuse_inter_pred = false;
703
223k
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
704
223k
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
705
223k
  av1_zero(x->picked_ref_frames_mask);
706
223k
  av1_invalid_rd_stats(rd_cost);
707
223k
}
708
709
#if !CONFIG_REALTIME_ONLY
710
static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
711
                                        const TileDataEnc *tile_data,
712
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
713
                                        RD_STATS *rd_cost, int mi_row,
714
0
                                        int mi_col, int delta_qp_ofs) {
715
0
  AV1_COMMON *const cm = &cpi->common;
716
0
  MACROBLOCK *const x = &td->mb;
717
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
718
0
  const TileInfo *tile_info = &tile_data->tile_info;
719
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
720
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
721
0
  assert(delta_q_info->delta_q_present_flag);
722
0
  const int delta_q_res = delta_q_info->delta_q_res;
723
724
0
  const SPEED_FEATURES *sf = &cpi->sf;
725
0
  const int use_simple_motion_search =
726
0
      (sf->part_sf.simple_motion_search_split ||
727
0
       sf->part_sf.simple_motion_search_prune_rect ||
728
0
       sf->part_sf.simple_motion_search_early_term_none ||
729
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
730
0
      !frame_is_intra_only(cm);
731
0
  if (use_simple_motion_search) {
732
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
733
0
                                             mi_row, mi_col);
734
0
  }
735
736
0
  int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
737
738
0
  MACROBLOCKD *const xd = &x->e_mbd;
739
0
  current_qindex = av1_adjust_q_from_delta_q_res(
740
0
      delta_q_res, xd->current_base_qindex, current_qindex);
741
742
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
743
744
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
745
0
  xd->mi[0]->current_qindex = current_qindex;
746
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
747
748
  // keep track of any non-zero delta-q used
749
0
  td->deltaq_used |= (x->delta_qindex != 0);
750
751
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
752
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
753
0
    const int lfmask = ~(delta_lf_res - 1);
754
0
    const int delta_lf_from_base =
755
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
756
0
    const int8_t delta_lf =
757
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
758
0
    const int frame_lf_count =
759
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
760
0
    const int mib_size = cm->seq_params->mib_size;
761
762
    // pre-set the delta lf for loop filter. Note that this value is set
763
    // before mi is assigned for each block in current superblock
764
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
765
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
766
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
767
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
768
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
769
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
770
0
        }
771
0
      }
772
0
    }
773
0
  }
774
775
0
  x->reuse_inter_pred = false;
776
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
777
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
778
0
  av1_zero(x->picked_ref_frames_mask);
779
0
  av1_invalid_rd_stats(rd_cost);
780
0
}
781
782
static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
783
                       TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
784
                       int mi_col, BLOCK_SIZE bsize,
785
                       SIMPLE_MOTION_DATA_TREE *sms_tree,
786
0
                       SB_FIRST_PASS_STATS *sb_org_stats) {
787
0
  AV1_COMMON *const cm = &cpi->common;
788
0
  MACROBLOCK *const x = &td->mb;
789
0
  RD_STATS rdc_winner, cur_rdc;
790
0
  av1_invalid_rd_stats(&rdc_winner);
791
792
0
  int best_qindex = td->mb.rdmult_delta_qindex;
793
0
  const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
794
0
  const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
795
0
  const int step = cm->delta_q_info.delta_q_res;
796
797
0
  for (int sweep_qp_delta = start; sweep_qp_delta <= end;
798
0
       sweep_qp_delta += step) {
799
0
    sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
800
0
                                mi_col, sweep_qp_delta);
801
802
0
    const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
803
0
    const int backup_current_qindex =
804
0
        cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
805
806
0
    av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
807
0
    av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
808
0
    cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
809
810
0
    td->pc_root = av1_alloc_pc_tree_node(bsize);
811
0
    if (!td->pc_root)
812
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
813
0
                         "Failed to allocate PC_TREE");
814
0
    av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
815
0
                          &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
816
0
                          SB_DRY_PASS, NULL);
817
818
0
    if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
819
0
        (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
820
0
         rdc_winner.rdcost == cur_rdc.rdcost)) {
821
0
      rdc_winner = cur_rdc;
822
0
      best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
823
0
    }
824
0
  }
825
826
0
  return best_qindex;
827
0
}
828
#endif  //! CONFIG_REALTIME_ONLY
829
830
/*!\brief Encode a superblock (RD-search-based)
831
 *
832
 * \ingroup partition_search
833
 * Conducts partition search for a superblock, based on rate-distortion costs,
834
 * from scratch or adjusting from a pre-calculated partition pattern.
835
 */
836
static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
837
                                TileDataEnc *tile_data, TokenExtra **tp,
838
                                const int mi_row, const int mi_col,
839
223k
                                const int seg_skip) {
840
223k
  AV1_COMMON *const cm = &cpi->common;
841
223k
  MACROBLOCK *const x = &td->mb;
842
223k
  MACROBLOCKD *const xd = &x->e_mbd;
843
223k
  const SPEED_FEATURES *const sf = &cpi->sf;
844
223k
  const TileInfo *const tile_info = &tile_data->tile_info;
845
223k
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
846
223k
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
847
223k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
848
223k
  const int num_planes = av1_num_planes(cm);
849
223k
  int dummy_rate;
850
223k
  int64_t dummy_dist;
851
223k
  RD_STATS dummy_rdc;
852
223k
  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
853
854
#if CONFIG_REALTIME_ONLY
855
  (void)seg_skip;
856
#endif  // CONFIG_REALTIME_ONLY
857
858
223k
  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
859
223k
                    1);
860
861
  // Encode the superblock
862
223k
  if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
863
    // partition search starting from a variance-based partition
864
25.2k
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
865
25.2k
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
866
867
#if CONFIG_COLLECT_COMPONENT_TIMING
868
    start_timing(cpi, rd_use_partition_time);
869
#endif
870
25.2k
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
871
25.2k
    if (!td->pc_root)
872
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
873
0
                         "Failed to allocate PC_TREE");
874
25.2k
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
875
25.2k
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
876
25.2k
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
877
25.2k
                               sf->part_sf.partition_search_type);
878
25.2k
    td->pc_root = NULL;
879
#if CONFIG_COLLECT_COMPONENT_TIMING
880
    end_timing(cpi, rd_use_partition_time);
881
#endif
882
25.2k
  }
883
197k
#if !CONFIG_REALTIME_ONLY
884
197k
  else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
885
    // partition search by adjusting a fixed-size partition
886
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
887
0
    const BLOCK_SIZE bsize =
888
0
        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
889
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
890
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
891
0
    if (!td->pc_root)
892
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
893
0
                         "Failed to allocate PC_TREE");
894
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
895
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
896
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
897
0
                               sf->part_sf.partition_search_type);
898
0
    td->pc_root = NULL;
899
197k
  } else {
900
    // The most exhaustive recursive partition search
901
197k
    SuperBlockEnc *sb_enc = &x->sb_enc;
902
    // No stats for overlay frames. Exclude key frame.
903
197k
    av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
904
905
    // Reset the tree for simple motion search data
906
197k
    av1_reset_simple_motion_tree_partition(sms_root, sb_size);
907
908
#if CONFIG_COLLECT_COMPONENT_TIMING
909
    start_timing(cpi, rd_pick_partition_time);
910
#endif
911
912
    // Estimate the maximum square partition block size, which will be used
913
    // as the starting block size for partitioning the sb
914
197k
    set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
915
916
    // The superblock can be searched only once, or twice consecutively for
917
    // better quality. Note that the meaning of passes here is different from
918
    // the general concept of 1-pass/2-pass encoders.
919
197k
    const int num_passes =
920
197k
        cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
921
922
197k
    if (cpi->oxcf.sb_qp_sweep &&
923
0
        !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
924
0
          cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
925
0
        cm->delta_q_info.delta_q_present_flag) {
926
0
      AOM_CHECK_MEM_ERROR(
927
0
          x->e_mbd.error_info, td->mb.sb_stats_cache,
928
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
929
0
      av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
930
0
                          mi_col);
931
0
      assert(x->rdmult_delta_qindex == x->delta_qindex);
932
933
0
      const int best_qp_diff =
934
0
          sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
935
0
                      td->mb.sb_stats_cache) -
936
0
          x->rdmult_delta_qindex;
937
938
0
      sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
939
0
                                  mi_row, mi_col, best_qp_diff);
940
941
0
      const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
942
0
      const int backup_current_qindex =
943
0
          cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
944
945
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
946
0
      av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
947
0
                           mi_col);
948
949
0
      cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
950
0
          backup_current_qindex;
951
0
      aom_free(td->mb.sb_stats_cache);
952
0
      td->mb.sb_stats_cache = NULL;
953
0
    }
954
197k
    if (num_passes == 1) {
955
#if CONFIG_PARTITION_SEARCH_ORDER
956
      if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
957
        av1_reset_part_sf(&cpi->sf.part_sf);
958
        av1_reset_sf_for_ext_part(cpi);
959
        RD_STATS this_rdc;
960
        av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
961
                                mi_col, sb_size, &this_rdc);
962
      } else {
963
        td->pc_root = av1_alloc_pc_tree_node(sb_size);
964
        if (!td->pc_root)
965
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
966
                             "Failed to allocate PC_TREE");
967
        av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
968
                              &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
969
                              NULL, SB_SINGLE_PASS, NULL);
970
      }
971
#else
972
197k
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
973
197k
      if (!td->pc_root)
974
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
975
0
                           "Failed to allocate PC_TREE");
976
197k
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
977
197k
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
978
197k
                            SB_SINGLE_PASS, NULL);
979
197k
#endif  // CONFIG_PARTITION_SEARCH_ORDER
980
197k
    } else {
981
      // First pass
982
475
      AOM_CHECK_MEM_ERROR(
983
475
          x->e_mbd.error_info, td->mb.sb_fp_stats,
984
475
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
985
475
      av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
986
475
                          mi_col);
987
475
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
988
475
      if (!td->pc_root)
989
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
990
0
                           "Failed to allocate PC_TREE");
991
475
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
992
475
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
993
475
                            SB_DRY_PASS, NULL);
994
995
      // Second pass
996
475
      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
997
475
                        mi_col, 0);
998
475
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
999
475
      av1_reset_simple_motion_tree_partition(sms_root, sb_size);
1000
1001
475
      av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1002
475
                           mi_col);
1003
1004
475
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1005
475
      if (!td->pc_root)
1006
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1007
0
                           "Failed to allocate PC_TREE");
1008
475
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1009
475
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1010
475
                            SB_WET_PASS, NULL);
1011
475
      aom_free(td->mb.sb_fp_stats);
1012
475
      td->mb.sb_fp_stats = NULL;
1013
475
    }
1014
1015
    // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
1016
197k
    sb_enc->tpl_data_count = 0;
1017
#if CONFIG_COLLECT_COMPONENT_TIMING
1018
    end_timing(cpi, rd_pick_partition_time);
1019
#endif
1020
197k
  }
1021
223k
#endif  // !CONFIG_REALTIME_ONLY
1022
1023
  // Update the inter rd model
1024
  // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
1025
223k
  if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
1026
31.5k
      cm->tiles.cols == 1 && cm->tiles.rows == 1) {
1027
21.6k
    av1_inter_mode_data_fit(tile_data, x->rdmult);
1028
21.6k
  }
1029
223k
}
1030
1031
// Check if the cost update of symbols mode, coeff and dv are tile or off.
1032
static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
1033
415k
    const AV1_COMP *const cpi) {
1034
415k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
1035
1036
415k
  return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1037
61.6k
          inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1038
61.6k
          cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1039
415k
}
1040
1041
// When row-mt is enabled and cost update frequencies are set to off/tile,
1042
// processing of current SB can start even before processing of top-right SB
1043
// is finished. This function checks if it is sufficient to wait for top SB
1044
// to finish processing before current SB starts processing.
1045
652k
static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
1046
652k
  const MODE mode = cpi->oxcf.mode;
1047
652k
  if (mode == GOOD) return 0;
1048
1049
415k
  if (mode == ALLINTRA)
1050
316k
    return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
1051
98.4k
  else if (mode == REALTIME)
1052
98.5k
    return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
1053
0
            cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1054
18.4E
  else
1055
18.4E
    return 0;
1056
415k
}
1057
1058
/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1059
 *
1060
 * \ingroup partition_search
1061
 * \callgraph
1062
 * \callergraph
1063
 */
1064
static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1065
27.3k
                                         int mi_col) {
1066
27.3k
  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1067
1068
27.3k
  const AV1_COMMON *const cm = &cpi->common;
1069
27.3k
  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1070
27.3k
                                   ? (cm->seq_params->mib_size >> 1)
1071
27.3k
                                   : cm->seq_params->mib_size;
1072
27.3k
  const int num_blk_64x64_cols =
1073
27.3k
      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1074
27.3k
  const int num_blk_64x64_rows =
1075
27.3k
      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1076
27.3k
  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1077
27.3k
  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1078
27.3k
  uint64_t curr_sb_sad = UINT64_MAX;
1079
  // Avoid the border as sad_blk_64x64 may not be set for the border
1080
  // in the scene detection.
1081
27.3k
  if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1082
22.5k
      (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1083
22.5k
    return curr_sb_sad;
1084
22.5k
  }
1085
4.80k
  const uint64_t *const src_sad_blk_64x64_data =
1086
4.80k
      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1087
4.80k
                              blk_64x64_row_index * num_blk_64x64_cols];
1088
4.80k
  if (cm->seq_params->sb_size == BLOCK_128X128) {
1089
    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1090
    // superblock
1091
0
    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1092
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
1093
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1094
4.83k
  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1095
4.83k
    curr_sb_sad = src_sad_blk_64x64_data[0];
1096
4.83k
  }
1097
4.80k
  return curr_sb_sad;
1098
27.3k
}
1099
1100
/*!\brief Determine whether grading content can be skipped based on sad stat
1101
 *
1102
 * \ingroup partition_search
1103
 * \callgraph
1104
 * \callergraph
1105
 */
1106
static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1107
                                              MACROBLOCK *const x, int mi_row,
1108
27.3k
                                              int mi_col) {
1109
27.3k
  if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1110
0
    return true;
1111
27.3k
  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1112
27.3k
  if (curr_sb_sad == UINT64_MAX) return true;
1113
4.80k
  if (curr_sb_sad == 0) {
1114
0
    x->content_state_sb.source_sad_nonrd = kZeroSad;
1115
0
    return false;
1116
0
  }
1117
4.80k
  AV1_COMMON *const cm = &cpi->common;
1118
4.80k
  bool do_calc_src_content = true;
1119
1120
4.80k
  if (cpi->oxcf.speed < 9) return do_calc_src_content;
1121
1122
  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1123
1.25k
  if (AOMMIN(cm->width, cm->height) < 360) {
1124
    // Derive Average 64x64 block source SAD from SB source SAD
1125
1.25k
    const uint64_t avg_64x64_blk_sad =
1126
1.25k
        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1127
1.25k
                                                   : curr_sb_sad;
1128
1129
    // The threshold is determined based on kLowSad and kHighSad threshold and
1130
    // test results.
1131
1.25k
    uint64_t thresh_low = 15000;
1132
1.25k
    uint64_t thresh_high = 40000;
1133
1134
1.25k
    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1135
0
      thresh_low = thresh_low << 1;
1136
0
      thresh_high = thresh_high << 1;
1137
0
    }
1138
1139
1.25k
    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1140
0
      do_calc_src_content = false;
1141
      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1142
      // to RTC rd path.
1143
0
      x->content_state_sb.source_sad_nonrd = kMedSad;
1144
0
    }
1145
1.25k
  }
1146
1147
1.22k
  return do_calc_src_content;
1148
4.80k
}
1149
1150
/*!\brief Determine whether grading content is needed based on sf and frame stat
1151
 *
1152
 * \ingroup partition_search
1153
 * \callgraph
1154
 * \callergraph
1155
 */
1156
// TODO(any): consolidate sfs to make interface cleaner
1157
static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1158
                                           TileDataEnc *tile_data, int mi_row,
1159
342k
                                           int mi_col) {
1160
342k
  AV1_COMMON *const cm = &cpi->common;
1161
342k
  if (cm->current_frame.frame_type == KEY_FRAME ||
1162
69.4k
      (cpi->ppi->use_svc &&
1163
272k
       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1164
272k
    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1165
272k
    assert(x->content_state_sb.source_sad_rd == kMedSad);
1166
272k
    return;
1167
272k
  }
1168
342k
  bool calc_src_content = false;
1169
1170
69.4k
  if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1171
27.4k
    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1172
27.3k
      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1173
27.3k
    } else {
1174
107
      x->content_state_sb.source_sad_nonrd = kZeroSad;
1175
107
    }
1176
42.0k
  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1177
0
             (cm->width * cm->height <= 352 * 288)) {
1178
0
    if (cpi->rc.frame_source_sad > 0)
1179
0
      calc_src_content = true;
1180
0
    else
1181
0
      x->content_state_sb.source_sad_rd = kZeroSad;
1182
0
  }
1183
69.4k
  if (calc_src_content)
1184
27.3k
    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1185
69.4k
}
1186
1187
/*!\brief Encode a superblock row by breaking it into superblocks
1188
 *
1189
 * \ingroup partition_search
1190
 * \callgraph
1191
 * \callergraph
1192
 * Do partition and mode search for an sb row: one row of superblocks filling up
1193
 * the width of the current tile.
1194
 */
1195
static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1196
                                 TileDataEnc *tile_data, int mi_row,
1197
292k
                                 TokenExtra **tp) {
1198
292k
  AV1_COMMON *const cm = &cpi->common;
1199
292k
  const TileInfo *const tile_info = &tile_data->tile_info;
1200
292k
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1201
292k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1202
292k
  AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1203
292k
  bool row_mt_enabled = mt_info->row_mt_enabled;
1204
292k
  MACROBLOCK *const x = &td->mb;
1205
292k
  MACROBLOCKD *const xd = &x->e_mbd;
1206
292k
  const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1207
292k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1208
292k
  const int mib_size = cm->seq_params->mib_size;
1209
292k
  const int mib_size_log2 = cm->seq_params->mib_size_log2;
1210
292k
  const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1211
292k
  const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1212
1213
#if CONFIG_COLLECT_COMPONENT_TIMING
1214
  start_timing(cpi, encode_sb_row_time);
1215
#endif
1216
1217
  // Initialize the left context for the new SB row
1218
292k
  av1_zero_left_context(xd);
1219
1220
  // Reset delta for quantizer and loof filters at the beginning of every tile
1221
292k
  if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1222
286k
    if (cm->delta_q_info.delta_q_present_flag)
1223
1.83k
      xd->current_base_qindex = cm->quant_params.base_qindex;
1224
286k
    if (cm->delta_q_info.delta_lf_present_flag) {
1225
0
      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1226
0
    }
1227
286k
  }
1228
1229
292k
  reset_thresh_freq_fact(x);
1230
1231
  // Code each SB in the row
1232
292k
  for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1233
634k
       mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1234
    // In realtime/allintra mode and when frequency of cost updates is off/tile,
1235
    // wait for the top superblock to finish encoding. Otherwise, wait for the
1236
    // top-right superblock to finish encoding.
1237
342k
    enc_row_mt->sync_read_ptr(
1238
342k
        row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1239
1240
342k
#if CONFIG_MULTITHREAD
1241
342k
    if (row_mt_enabled) {
1242
261k
      pthread_mutex_lock(enc_row_mt->mutex_);
1243
261k
      const bool row_mt_exit = enc_row_mt->row_mt_exit;
1244
261k
      pthread_mutex_unlock(enc_row_mt->mutex_);
1245
      // Exit in case any worker has encountered an error.
1246
261k
      if (row_mt_exit) return;
1247
261k
    }
1248
342k
#endif
1249
1250
342k
    const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1251
342k
    if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1252
39.1k
      if ((tile_info->mi_col_start == mi_col)) {
1253
        // restore frame context at the 1st column sb
1254
32.7k
        *xd->tile_ctx = *x->row_ctx;
1255
32.7k
      } else {
1256
        // update context
1257
6.42k
        int wt_left = AVG_CDF_WEIGHT_LEFT;
1258
6.42k
        int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1259
6.42k
        if (tile_info->mi_col_end > (mi_col + mib_size))
1260
3.25k
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1261
3.25k
                              wt_left, wt_tr);
1262
3.16k
        else
1263
3.16k
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1264
3.16k
                              wt_left, wt_tr);
1265
6.42k
      }
1266
39.1k
    }
1267
1268
    // Update the rate cost tables for some symbols
1269
342k
    av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1270
1271
    // Reset color coding related parameters
1272
342k
    av1_zero(x->color_sensitivity_sb);
1273
342k
    av1_zero(x->color_sensitivity_sb_g);
1274
342k
    av1_zero(x->color_sensitivity_sb_alt);
1275
342k
    av1_zero(x->color_sensitivity);
1276
342k
    x->content_state_sb.source_sad_nonrd = kMedSad;
1277
342k
    x->content_state_sb.source_sad_rd = kMedSad;
1278
342k
    x->content_state_sb.lighting_change = 0;
1279
342k
    x->content_state_sb.low_sumdiff = 0;
1280
342k
    x->force_zeromv_skip_for_sb = 0;
1281
342k
    x->sb_me_block = 0;
1282
342k
    x->sb_me_partition = 0;
1283
342k
    x->sb_me_mv.as_int = 0;
1284
342k
    x->sb_col_scroll = 0;
1285
342k
    x->sb_row_scroll = 0;
1286
342k
    x->sb_force_fixed_part = 1;
1287
342k
    x->color_palette_thresh = 64;
1288
342k
    x->force_color_check_block_level = 0;
1289
342k
    x->nonrd_prune_ref_frame_search =
1290
342k
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1291
1292
342k
    if (cpi->oxcf.mode == ALLINTRA) {
1293
184k
      x->intra_sb_rdmult_modifier = 128;
1294
184k
    }
1295
1296
342k
    xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1297
342k
    x->source_variance = UINT_MAX;
1298
342k
    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1299
1300
    // Get segment id and skip flag
1301
342k
    const struct segmentation *const seg = &cm->seg;
1302
342k
    int seg_skip = 0;
1303
342k
    if (seg->enabled) {
1304
0
      const uint8_t *const map =
1305
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1306
0
      const uint8_t segment_id =
1307
0
          map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1308
0
              : 0;
1309
0
      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1310
0
    }
1311
1312
342k
    produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1313
1314
342k
    init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1315
342k
                                        sb_size);
1316
1317
    // Grade the temporal variation of the sb, the grade will be used to decide
1318
    // fast mode search strategy for coding blocks
1319
342k
    if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1320
1321
    // encode the superblock
1322
342k
    if (use_nonrd_mode) {
1323
118k
      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1324
223k
    } else {
1325
223k
      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1326
223k
    }
1327
1328
    // Update the top-right context in row_mt coding
1329
342k
    if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1330
39.1k
      if (sb_cols_in_tile == 1)
1331
29.5k
        x->row_ctx[0] = *xd->tile_ctx;
1332
9.58k
      else if (sb_col_in_tile >= 1)
1333
6.42k
        x->row_ctx[sb_col_in_tile - 1] = *xd->tile_ctx;
1334
39.1k
    }
1335
342k
    enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1336
342k
                               sb_cols_in_tile);
1337
342k
  }
1338
1339
#if CONFIG_COLLECT_COMPONENT_TIMING
1340
  end_timing(cpi, encode_sb_row_time);
1341
#endif
1342
292k
}
1343
1344
108k
static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1345
108k
  AV1_COMMON *const cm = &cpi->common;
1346
108k
  const int num_planes = av1_num_planes(cm);
1347
108k
  MACROBLOCK *const x = &cpi->td.mb;
1348
108k
  MACROBLOCKD *const xd = &x->e_mbd;
1349
1350
  // Copy data over into macro block data structures.
1351
108k
  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1352
108k
                       cm->seq_params->sb_size);
1353
1354
108k
  av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1355
108k
                         cm->seq_params->subsampling_y, num_planes);
1356
108k
}
1357
1358
74.0k
void av1_alloc_tile_data(AV1_COMP *cpi) {
1359
74.0k
  AV1_COMMON *const cm = &cpi->common;
1360
74.0k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1361
74.0k
  const int tile_cols = cm->tiles.cols;
1362
74.0k
  const int tile_rows = cm->tiles.rows;
1363
1364
74.0k
  av1_row_mt_mem_dealloc(cpi);
1365
1366
74.0k
  aom_free(cpi->tile_data);
1367
74.0k
  cpi->allocated_tiles = 0;
1368
74.0k
  enc_row_mt->allocated_tile_cols = 0;
1369
74.0k
  enc_row_mt->allocated_tile_rows = 0;
1370
1371
74.0k
  CHECK_MEM_ERROR(
1372
74.0k
      cm, cpi->tile_data,
1373
74.0k
      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1374
1375
74.0k
  cpi->allocated_tiles = tile_cols * tile_rows;
1376
74.0k
  enc_row_mt->allocated_tile_cols = tile_cols;
1377
74.0k
  enc_row_mt->allocated_tile_rows = tile_rows;
1378
187k
  for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1379
298k
    for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1380
184k
      const int tile_index = tile_row * tile_cols + tile_col;
1381
184k
      TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1382
184k
      av1_zero(this_tile->row_mt_sync);
1383
184k
      this_tile->row_ctx = NULL;
1384
184k
    }
1385
113k
  }
1386
74.0k
}
1387
1388
139k
void av1_init_tile_data(AV1_COMP *cpi) {
1389
139k
  AV1_COMMON *const cm = &cpi->common;
1390
139k
  const int num_planes = av1_num_planes(cm);
1391
139k
  const int tile_cols = cm->tiles.cols;
1392
139k
  const int tile_rows = cm->tiles.rows;
1393
139k
  int tile_col, tile_row;
1394
139k
  TokenInfo *const token_info = &cpi->token_info;
1395
139k
  TokenExtra *pre_tok = token_info->tile_tok[0][0];
1396
139k
  TokenList *tplist = token_info->tplist[0][0];
1397
139k
  unsigned int tile_tok = 0;
1398
139k
  int tplist_count = 0;
1399
1400
139k
  if (!is_stat_generation_stage(cpi) &&
1401
108k
      cm->features.allow_screen_content_tools) {
1402
    // Number of tokens for which token info needs to be allocated.
1403
0
    unsigned int tokens_required =
1404
0
        get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1405
0
                        MAX_SB_SIZE_LOG2, num_planes);
1406
    // Allocate/reallocate memory for token related info if the number of tokens
1407
    // required is more than the number of tokens already allocated. This could
1408
    // occur in case of the following:
1409
    // 1) If the memory is not yet allocated
1410
    // 2) If the frame dimensions have changed
1411
0
    const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1412
0
    if (realloc_tokens) {
1413
0
      free_token_info(token_info);
1414
0
      alloc_token_info(cm, token_info, tokens_required);
1415
0
      pre_tok = token_info->tile_tok[0][0];
1416
0
      tplist = token_info->tplist[0][0];
1417
0
    }
1418
0
  }
1419
1420
342k
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1421
512k
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1422
309k
      TileDataEnc *const tile_data =
1423
309k
          &cpi->tile_data[tile_row * tile_cols + tile_col];
1424
309k
      TileInfo *const tile_info = &tile_data->tile_info;
1425
309k
      av1_tile_init(tile_info, cm, tile_row, tile_col);
1426
309k
      tile_data->firstpass_top_mv = kZeroMv;
1427
309k
      tile_data->abs_sum_level = 0;
1428
1429
309k
      if (is_token_info_allocated(token_info)) {
1430
0
        token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1431
0
        pre_tok = token_info->tile_tok[tile_row][tile_col];
1432
0
        tile_tok = allocated_tokens(
1433
0
            tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1434
0
            num_planes);
1435
0
        token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1436
0
        tplist = token_info->tplist[tile_row][tile_col];
1437
0
        tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1438
0
      }
1439
309k
      tile_data->allow_update_cdf = !cm->tiles.large_scale;
1440
309k
      tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1441
309k
                                    !cm->features.disable_cdf_update &&
1442
309k
                                    !delay_wait_for_top_right_sb(cpi);
1443
309k
      tile_data->tctx = *cm->fc;
1444
309k
    }
1445
202k
  }
1446
139k
}
1447
1448
// Populate the start palette token info prior to encoding an SB row.
1449
static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1450
                                   int tile_row, int tile_col, int mi_row,
1451
292k
                                   TokenExtra **tp) {
1452
292k
  const TokenInfo *token_info = &cpi->token_info;
1453
292k
  if (!is_token_info_allocated(token_info)) return;
1454
1455
217
  const AV1_COMMON *cm = &cpi->common;
1456
217
  const int num_planes = av1_num_planes(cm);
1457
217
  TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1458
217
  const int sb_row_in_tile =
1459
217
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1460
1461
217
  get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1462
217
                cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1463
217
  assert(tplist != NULL);
1464
217
  tplist[sb_row_in_tile].start = *tp;
1465
217
}
1466
1467
// Populate the token count after encoding an SB row.
1468
static inline void populate_token_count(AV1_COMP *cpi,
1469
                                        const TileInfo *tile_info, int tile_row,
1470
                                        int tile_col, int mi_row,
1471
293k
                                        TokenExtra *tok) {
1472
293k
  const TokenInfo *token_info = &cpi->token_info;
1473
293k
  if (!is_token_info_allocated(token_info)) return;
1474
1475
3
  const AV1_COMMON *cm = &cpi->common;
1476
3
  const int num_planes = av1_num_planes(cm);
1477
3
  TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1478
3
  const int sb_row_in_tile =
1479
3
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1480
3
  const int tile_mb_cols =
1481
3
      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1482
3
  const int num_mb_rows_in_sb =
1483
3
      ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1484
3
  tplist[sb_row_in_tile].count =
1485
3
      (unsigned int)(tok - tplist[sb_row_in_tile].start);
1486
1487
3
  assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1488
3
         get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1489
3
                         cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1490
3
                         num_planes));
1491
1492
3
  (void)num_planes;
1493
3
  (void)tile_mb_cols;
1494
3
  (void)num_mb_rows_in_sb;
1495
3
}
1496
1497
/*!\brief Encode a superblock row
1498
 *
1499
 * \ingroup partition_search
1500
 */
1501
void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1502
292k
                       int tile_col, int mi_row) {
1503
292k
  AV1_COMMON *const cm = &cpi->common;
1504
292k
  const int tile_cols = cm->tiles.cols;
1505
292k
  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1506
292k
  const TileInfo *const tile_info = &this_tile->tile_info;
1507
292k
  TokenExtra *tok = NULL;
1508
1509
292k
  get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1510
1511
292k
  encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1512
1513
292k
  populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1514
292k
}
1515
1516
/*!\brief Encode a tile
1517
 *
1518
 * \ingroup partition_search
1519
 */
1520
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1521
57.5k
                     int tile_col) {
1522
57.5k
  AV1_COMMON *const cm = &cpi->common;
1523
57.5k
  TileDataEnc *const this_tile =
1524
57.5k
      &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1525
57.5k
  const TileInfo *const tile_info = &this_tile->tile_info;
1526
1527
57.5k
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1528
1529
57.5k
  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1530
57.5k
                         tile_info->mi_col_end, tile_row);
1531
57.5k
  av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1532
57.5k
                         &td->mb.e_mbd);
1533
1534
57.5k
#if !CONFIG_REALTIME_ONLY
1535
57.5k
  if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1536
57.5k
    cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1537
57.5k
#endif
1538
1539
57.5k
  if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1540
22.7k
    av1_crc32c_calculator_init(
1541
22.7k
        &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1542
22.7k
  }
1543
1544
120k
  for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1545
63.1k
       mi_row += cm->seq_params->mib_size) {
1546
63.1k
    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1547
63.1k
  }
1548
57.5k
  this_tile->abs_sum_level = td->abs_sum_level;
1549
57.5k
}
1550
1551
/*!\brief Break one frame into tiles and encode the tiles
1552
 *
1553
 * \ingroup partition_search
1554
 *
1555
 * \param[in]    cpi    Top-level encoder structure
1556
 */
1557
41.9k
static inline void encode_tiles(AV1_COMP *cpi) {
1558
41.9k
  AV1_COMMON *const cm = &cpi->common;
1559
41.9k
  const int tile_cols = cm->tiles.cols;
1560
41.9k
  const int tile_rows = cm->tiles.rows;
1561
41.9k
  int tile_col, tile_row;
1562
1563
41.9k
  MACROBLOCK *const mb = &cpi->td.mb;
1564
41.9k
  assert(IMPLIES(cpi->tile_data == NULL,
1565
41.9k
                 cpi->allocated_tiles < tile_cols * tile_rows));
1566
41.9k
  if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1567
1568
41.9k
  av1_init_tile_data(cpi);
1569
41.9k
  av1_alloc_mb_data(cpi, mb);
1570
1571
89.6k
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1572
105k
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1573
57.5k
      TileDataEnc *const this_tile =
1574
57.5k
          &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1575
57.5k
      cpi->td.intrabc_used = 0;
1576
57.5k
      cpi->td.deltaq_used = 0;
1577
57.5k
      cpi->td.abs_sum_level = 0;
1578
57.5k
      cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1579
57.5k
      cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1580
57.5k
      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1581
57.5k
      cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1582
57.5k
      av1_init_rtc_counters(&cpi->td.mb);
1583
57.5k
      cpi->td.mb.palette_pixels = 0;
1584
57.5k
      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1585
57.5k
      if (!frame_is_intra_only(&cpi->common))
1586
13.2k
        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1587
57.5k
      cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1588
57.5k
      cpi->intrabc_used |= cpi->td.intrabc_used;
1589
57.5k
      cpi->deltaq_used |= cpi->td.deltaq_used;
1590
57.5k
    }
1591
47.6k
  }
1592
1593
41.9k
  av1_dealloc_mb_data(mb, av1_num_planes(cm));
1594
41.9k
}
1595
1596
// Set the relative distance of a reference frame w.r.t. current frame
1597
static inline void set_rel_frame_dist(
1598
    const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1599
108k
    const int ref_frame_flags) {
1600
108k
  MV_REFERENCE_FRAME ref_frame;
1601
108k
  int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1602
108k
  ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1603
108k
  ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1604
871k
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1605
762k
    ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1606
762k
    if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1607
128k
      int dist = av1_encoder_get_relative_dist(
1608
128k
          cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1609
128k
          cm->current_frame.display_order_hint);
1610
128k
      ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1611
      // Get the nearest ref_frame in the past
1612
128k
      if (abs(dist) < min_past_dist && dist < 0) {
1613
32.7k
        ref_frame_dist_info->nearest_past_ref = ref_frame;
1614
32.7k
        min_past_dist = abs(dist);
1615
32.7k
      }
1616
      // Get the nearest ref_frame in the future
1617
128k
      if (dist < min_future_dist && dist > 0) {
1618
1.01k
        ref_frame_dist_info->nearest_future_ref = ref_frame;
1619
1.01k
        min_future_dist = dist;
1620
1.01k
      }
1621
128k
    }
1622
762k
  }
1623
108k
}
1624
1625
27.3k
static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1626
27.3k
  assert(!frame_is_intra_only(cm));
1627
1628
27.3k
  int one_sided_refs = 1;
1629
27.3k
  const int cur_display_order_hint = cm->current_frame.display_order_hint;
1630
218k
  for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1631
191k
    const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1632
191k
    if (buf == NULL) continue;
1633
191k
    if (av1_encoder_get_relative_dist(buf->display_order_hint,
1634
191k
                                      cur_display_order_hint) > 0) {
1635
0
      one_sided_refs = 0;  // bwd reference
1636
0
      break;
1637
0
    }
1638
191k
  }
1639
27.3k
  return one_sided_refs;
1640
27.3k
}
1641
1642
static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1643
10.4k
                                             int ref_order_hint[2]) {
1644
10.4k
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1645
10.4k
  ref_order_hint[0] = ref_order_hint[1] = 0;
1646
10.4k
  if (!skip_mode_info->skip_mode_allowed) return;
1647
1648
10.4k
  const RefCntBuffer *const buf_0 =
1649
10.4k
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1650
10.4k
  const RefCntBuffer *const buf_1 =
1651
10.4k
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1652
10.4k
  assert(buf_0 != NULL && buf_1 != NULL);
1653
1654
10.4k
  ref_order_hint[0] = buf_0->order_hint;
1655
10.4k
  ref_order_hint[1] = buf_1->order_hint;
1656
10.4k
}
1657
1658
108k
static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1659
108k
  AV1_COMMON *const cm = &cpi->common;
1660
1661
108k
  av1_setup_skip_mode_allowed(cm);
1662
108k
  if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1663
1664
  // Turn off skip mode if the temporal distances of the reference pair to the
1665
  // current frame are different by more than 1 frame.
1666
10.4k
  const int cur_offset = (int)cm->current_frame.order_hint;
1667
10.4k
  int ref_offset[2];
1668
10.4k
  get_skip_mode_ref_offsets(cm, ref_offset);
1669
10.4k
  const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1670
10.4k
                                            cur_offset, ref_offset[0]);
1671
10.4k
  const int cur_to_ref1 = abs(get_relative_dist(
1672
10.4k
      &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1673
10.4k
  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1674
1675
  // High Latency: Turn off skip mode if all refs are fwd.
1676
9.46k
  if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1677
1678
6.34k
  const int ref_frame[2] = {
1679
6.34k
    cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1680
6.34k
    cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1681
6.34k
  };
1682
6.34k
  if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1683
6.34k
      !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1684
1.85k
    return 0;
1685
1686
4.49k
  return 1;
1687
6.34k
}
1688
1689
static inline void set_default_interp_skip_flags(
1690
108k
    const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1691
108k
  const int num_planes = av1_num_planes(cm);
1692
108k
  interp_search_flags->default_interp_skip_flags =
1693
108k
      (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1694
108k
                        : INTERP_SKIP_LUMA_SKIP_CHROMA;
1695
108k
}
1696
1697
108k
static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1698
108k
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1699
108k
       cpi->sf.inter_sf.disable_onesided_comp) &&
1700
42.7k
      cpi->all_one_sided_refs) {
1701
    // Disable all compound references
1702
17.0k
    cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1703
91.8k
  } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1704
56.6k
             cpi->sf.inter_sf.selective_ref_frame >= 2) {
1705
25.6k
    AV1_COMMON *const cm = &cpi->common;
1706
25.6k
    const int cur_frame_display_order_hint =
1707
25.6k
        cm->current_frame.display_order_hint;
1708
25.6k
    unsigned int *ref_display_order_hint =
1709
25.6k
        cm->cur_frame->ref_display_order_hint;
1710
25.6k
    const int arf2_dist = av1_encoder_get_relative_dist(
1711
25.6k
        ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1712
25.6k
        cur_frame_display_order_hint);
1713
25.6k
    const int bwd_dist = av1_encoder_get_relative_dist(
1714
25.6k
        ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1715
25.6k
        cur_frame_display_order_hint);
1716
1717
563k
    for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1718
538k
      MV_REFERENCE_FRAME rf[2];
1719
538k
      av1_set_ref_frame(rf, ref_idx);
1720
538k
      if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1721
538k
          !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1722
538k
        continue;
1723
538k
      }
1724
1725
0
      if (!cpi->all_one_sided_refs) {
1726
0
        int ref_dist[2];
1727
0
        for (int i = 0; i < 2; ++i) {
1728
0
          ref_dist[i] = av1_encoder_get_relative_dist(
1729
0
              ref_display_order_hint[rf[i] - LAST_FRAME],
1730
0
              cur_frame_display_order_hint);
1731
0
        }
1732
1733
        // One-sided compound is used only when all reference frames are
1734
        // one-sided.
1735
0
        if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1736
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1737
0
        }
1738
0
      }
1739
1740
0
      if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1741
0
          (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1742
0
          (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1743
        // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1744
0
        if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1745
          // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1746
          // reference to the current frame than ALTREF2_FRAME
1747
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1748
0
        }
1749
0
      }
1750
0
    }
1751
25.6k
  }
1752
108k
}
1753
1754
91.2k
static int allow_deltaq_mode(AV1_COMP *cpi) {
1755
91.2k
#if !CONFIG_REALTIME_ONLY
1756
91.2k
  AV1_COMMON *const cm = &cpi->common;
1757
91.2k
  BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1758
91.2k
  int sbs_wide = mi_size_wide[sb_size];
1759
91.2k
  int sbs_high = mi_size_high[sb_size];
1760
1761
91.2k
  int64_t delta_rdcost = 0;
1762
253k
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1763
460k
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1764
298k
      int64_t this_delta_rdcost = 0;
1765
298k
      av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1766
298k
                                     mi_row, mi_col);
1767
298k
      delta_rdcost += this_delta_rdcost;
1768
298k
    }
1769
162k
  }
1770
91.2k
  return delta_rdcost < 0;
1771
#else
1772
  (void)cpi;
1773
  return 1;
1774
#endif  // !CONFIG_REALTIME_ONLY
1775
91.2k
}
1776
1777
0
#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1778
#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1779
1780
// Populates block level thresholds for force zeromv-skip decision
1781
108k
static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1782
108k
  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1783
1784
  // Threshold for forcing zeromv-skip decision is as below:
1785
  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1786
  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1787
  // allowing slightly higher error for smaller blocks.
1788
  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1789
  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1790
  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1791
  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1792
  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1793
  // small blocks, the same is clipped to 4.
1794
0
  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1795
0
  const int num_128x128_pix =
1796
0
      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1797
1798
0
  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1799
0
    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1800
1801
    // Calculate the threshold for zeromv-skip decision based on area of the
1802
    // partition
1803
0
    unsigned int thresh_exit_part_blk =
1804
0
        (unsigned int)(thresh_exit_128x128_part *
1805
0
                           sqrt((double)num_block_pix / num_128x128_pix) +
1806
0
                       0.5);
1807
0
    thresh_exit_part_blk = AOMMIN(
1808
0
        thresh_exit_part_blk,
1809
0
        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1810
0
    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1811
0
  }
1812
0
}
1813
1814
0
static void free_block_hash_buffers(uint32_t *block_hash_values[2]) {
1815
0
  for (int j = 0; j < 2; ++j) {
1816
0
    aom_free(block_hash_values[j]);
1817
0
  }
1818
0
}
1819
1820
/*!\brief Determines delta_q_res value for Variance Boost modulation.
1821
 */
1822
0
static int aom_get_variance_boost_delta_q_res(int qindex) {
1823
  // Signaling delta_q changes across superblocks comes with inherent syntax
1824
  // element overhead, which adds up to total payload size. This overhead
1825
  // becomes proportionally bigger the higher the base qindex (i.e. lower
1826
  // quality, smaller file size), so a balance needs to be struck.
1827
  // - Smaller delta_q_res: more granular delta_q control, more bits spent
1828
  // signaling deltas.
1829
  // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1830
  // deltas.
1831
  //
1832
  // At the same time, SB qindex fluctuations become larger the higher
1833
  // the base qindex (between lowest and highest-variance regions):
1834
  // - For QP 5: up to 8 qindexes
1835
  // - For QP 60: up to 52 qindexes
1836
  //
1837
  // With these factors in mind, it was found that the best strategy that
1838
  // maximizes quality per bitrate is by having very finely-grained delta_q
1839
  // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1840
  // and progressively making them coarser as base qindex increases (to reduce
1841
  // total signaling overhead).
1842
0
  int delta_q_res = 1;
1843
1844
0
  if (qindex >= 160) {
1845
0
    delta_q_res = 8;
1846
0
  } else if (qindex >= 120) {
1847
0
    delta_q_res = 4;
1848
0
  } else if (qindex >= 80) {
1849
0
    delta_q_res = 2;
1850
0
  } else {
1851
0
    delta_q_res = 1;
1852
0
  }
1853
1854
0
  return delta_q_res;
1855
0
}
1856
1857
#if !CONFIG_REALTIME_ONLY
1858
0
static float get_thresh_based_on_q(int qindex, int speed) {
1859
0
  const float min_threshold_arr[2] = { 0.06f, 0.09f };
1860
0
  const float max_threshold_arr[2] = { 0.10f, 0.13f };
1861
1862
0
  const float min_thresh = min_threshold_arr[speed >= 3];
1863
0
  const float max_thresh = max_threshold_arr[speed >= 3];
1864
0
  const float thresh = min_thresh + (max_thresh - min_thresh) *
1865
0
                                        ((float)MAXQ - (float)qindex) /
1866
0
                                        (float)(MAXQ - MINQ);
1867
0
  return thresh;
1868
0
}
1869
1870
0
static int get_mv_err(MV cur_mv, MV ref_mv) {
1871
0
  const MV diff = { cur_mv.row - ref_mv.row, cur_mv.col - ref_mv.col };
1872
0
  const MV abs_diff = { abs(diff.row), abs(diff.col) };
1873
0
  const int mv_err = (abs_diff.row + abs_diff.col);
1874
0
  return mv_err;
1875
0
}
1876
1877
0
static void check_mv_err_and_update(MV cur_mv, MV ref_mv, int *best_mv_err) {
1878
0
  const int mv_err = get_mv_err(cur_mv, ref_mv);
1879
0
  *best_mv_err = AOMMIN(mv_err, *best_mv_err);
1880
0
}
1881
1882
static int is_inside_frame_border(int mi_row, int mi_col, int row_offset,
1883
                                  int col_offset, int num_mi_rows,
1884
0
                                  int num_mi_cols) {
1885
0
  if (mi_row + row_offset < 0 || mi_row + row_offset >= num_mi_rows ||
1886
0
      mi_col + col_offset < 0 || mi_col + col_offset >= num_mi_cols)
1887
0
    return 0;
1888
1889
0
  return 1;
1890
0
}
1891
1892
// Compute the minimum MV error between current MV and spatial MV predictors.
1893
static int get_spatial_mvpred_err(AV1_COMMON *cm, TplParams *const tpl_data,
1894
                                  int tpl_idx, int mi_row, int mi_col,
1895
                                  int ref_idx, int_mv cur_mv, int allow_hp,
1896
0
                                  int is_integer) {
1897
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
1898
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
1899
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
1900
1901
0
  int mv_err = INT32_MAX;
1902
0
  const int step = 1 << block_mis_log2;
1903
0
  const int mv_pred_pos_in_mis[6][2] = {
1904
0
    { -step, 0 },     { 0, -step },     { -step, step },
1905
0
    { -step, -step }, { -2 * step, 0 }, { 0, -2 * step },
1906
0
  };
1907
1908
0
  for (int i = 0; i < 6; i++) {
1909
0
    int row_offset = mv_pred_pos_in_mis[i][0];
1910
0
    int col_offset = mv_pred_pos_in_mis[i][1];
1911
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
1912
0
                                tpl_frame->mi_rows, tpl_frame->mi_cols)) {
1913
0
      continue;
1914
0
    }
1915
1916
0
    const TplDepStats *tpl_stats =
1917
0
        &tpl_ptr[av1_tpl_ptr_pos(mi_row + row_offset, mi_col + col_offset,
1918
0
                                 tpl_frame->stride, block_mis_log2)];
1919
0
    int_mv this_refmv = tpl_stats->mv[ref_idx];
1920
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
1921
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
1922
0
  }
1923
1924
  // Check MV error w.r.t. Global MV / Zero MV
1925
0
  int_mv gm_mv = { 0 };
1926
0
  if (cm->global_motion[ref_idx + LAST_FRAME].wmtype > TRANSLATION) {
1927
0
    const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
1928
0
    gm_mv = gm_get_motion_vector(&cm->global_motion[ref_idx + LAST_FRAME],
1929
0
                                 allow_hp, bsize, mi_col, mi_row, is_integer);
1930
0
  }
1931
0
  check_mv_err_and_update(cur_mv.as_mv, gm_mv.as_mv, &mv_err);
1932
1933
0
  return mv_err;
1934
0
}
1935
1936
// Compute the minimum MV error between current MV and temporal MV predictors.
1937
static int get_temporal_mvpred_err(AV1_COMMON *cm, int mi_row, int mi_col,
1938
                                   int num_mi_rows, int num_mi_cols,
1939
                                   int ref_idx, int_mv cur_mv, int allow_hp,
1940
0
                                   int is_integer) {
1941
0
  const RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ref_idx + LAST_FRAME);
1942
0
  if (ref_buf == NULL) return INT32_MAX;
1943
0
  int cur_to_ref_dist =
1944
0
      get_relative_dist(&cm->seq_params->order_hint_info,
1945
0
                        cm->cur_frame->order_hint, ref_buf->order_hint);
1946
1947
0
  int mv_err = INT32_MAX;
1948
0
  const int mv_pred_pos_in_mis[7][2] = {
1949
0
    { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, { 4, -2 }, { 4, 4 }, { 2, 4 },
1950
0
  };
1951
1952
0
  for (int i = 0; i < 7; i++) {
1953
0
    int row_offset = mv_pred_pos_in_mis[i][0];
1954
0
    int col_offset = mv_pred_pos_in_mis[i][1];
1955
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
1956
0
                                num_mi_rows, num_mi_cols)) {
1957
0
      continue;
1958
0
    }
1959
0
    const TPL_MV_REF *ref_mvs =
1960
0
        cm->tpl_mvs +
1961
0
        ((mi_row + row_offset) >> 1) * (cm->mi_params.mi_stride >> 1) +
1962
0
        ((mi_col + col_offset) >> 1);
1963
0
    if (ref_mvs->mfmv0.as_int == INVALID_MV) continue;
1964
1965
0
    int_mv this_refmv;
1966
0
    av1_get_mv_projection(&this_refmv.as_mv, ref_mvs->mfmv0.as_mv,
1967
0
                          cur_to_ref_dist, ref_mvs->ref_frame_offset);
1968
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
1969
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
1970
0
  }
1971
1972
0
  return mv_err;
1973
0
}
1974
1975
// Determine whether to disable temporal MV prediction for the current frame
1976
// based on TPL and motion field data. Temporal MV prediction is disabled if the
1977
// reduction in MV error by including temporal MVs as MV predictors is small.
1978
108k
static void check_to_disable_ref_frame_mvs(AV1_COMP *cpi) {
1979
108k
  AV1_COMMON *cm = &cpi->common;
1980
108k
  if (!cm->features.allow_ref_frame_mvs || cpi->sf.hl_sf.ref_frame_mvs_lvl != 1)
1981
108k
    return;
1982
1983
0
  const int tpl_idx = cpi->gf_frame_index;
1984
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
1985
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
1986
1987
0
  const SUBPEL_FORCE_STOP tpl_subpel_precision =
1988
0
      cpi->sf.tpl_sf.subpel_force_stop;
1989
0
  const int allow_high_precision_mv = tpl_subpel_precision == EIGHTH_PEL &&
1990
0
                                      cm->features.allow_high_precision_mv;
1991
0
  const int force_integer_mv = tpl_subpel_precision == FULL_PEL ||
1992
0
                               cm->features.cur_frame_force_integer_mv;
1993
1994
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
1995
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
1996
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
1997
0
  const int step = 1 << block_mis_log2;
1998
1999
0
  uint64_t accum_spatial_mvpred_err = 0;
2000
0
  uint64_t accum_best_err = 0;
2001
2002
0
  for (int mi_row = 0; mi_row < tpl_frame->mi_rows; mi_row += step) {
2003
0
    for (int mi_col = 0; mi_col < tpl_frame->mi_cols; mi_col += step) {
2004
0
      TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos(
2005
0
          mi_row, mi_col, tpl_frame->stride, block_mis_log2)];
2006
0
      const int cur_best_ref_idx = tpl_stats_ptr->ref_frame_index[0];
2007
0
      if (cur_best_ref_idx == NONE_FRAME) continue;
2008
2009
0
      int_mv cur_mv = tpl_stats_ptr->mv[cur_best_ref_idx];
2010
0
      lower_mv_precision(&cur_mv.as_mv, allow_high_precision_mv,
2011
0
                         force_integer_mv);
2012
2013
0
      const int cur_spatial_mvpred_err = get_spatial_mvpred_err(
2014
0
          cm, tpl_data, tpl_idx, mi_row, mi_col, cur_best_ref_idx, cur_mv,
2015
0
          allow_high_precision_mv, force_integer_mv);
2016
2017
0
      const int cur_temporal_mvpred_err = get_temporal_mvpred_err(
2018
0
          cm, mi_row, mi_col, tpl_frame->mi_rows, tpl_frame->mi_cols,
2019
0
          cur_best_ref_idx, cur_mv, allow_high_precision_mv, force_integer_mv);
2020
2021
0
      const int cur_best_err =
2022
0
          AOMMIN(cur_spatial_mvpred_err, cur_temporal_mvpred_err);
2023
0
      accum_spatial_mvpred_err += cur_spatial_mvpred_err;
2024
0
      accum_best_err += cur_best_err;
2025
0
    }
2026
0
  }
2027
2028
0
  const float threshold =
2029
0
      get_thresh_based_on_q(cm->quant_params.base_qindex, cpi->oxcf.speed);
2030
0
  const float mv_err_reduction =
2031
0
      (float)(accum_spatial_mvpred_err - accum_best_err);
2032
2033
0
  if (mv_err_reduction <= threshold * accum_spatial_mvpred_err)
2034
0
    cm->features.allow_ref_frame_mvs = 0;
2035
0
}
2036
#endif  // !CONFIG_REALTIME_ONLY
2037
2038
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
2039
 * for a single frame
2040
 *
2041
 * \ingroup high_level_algo
2042
 */
2043
108k
static inline void encode_frame_internal(AV1_COMP *cpi) {
2044
108k
  ThreadData *const td = &cpi->td;
2045
108k
  MACROBLOCK *const x = &td->mb;
2046
108k
  AV1_COMMON *const cm = &cpi->common;
2047
108k
  CommonModeInfoParams *const mi_params = &cm->mi_params;
2048
108k
  FeatureFlags *const features = &cm->features;
2049
108k
  MACROBLOCKD *const xd = &x->e_mbd;
2050
108k
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2051
#if CONFIG_FPMT_TEST
2052
  FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
2053
  FrameProbInfo *const temp_frame_probs_simulation =
2054
      &cpi->ppi->temp_frame_probs_simulation;
2055
#endif
2056
108k
  FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
2057
108k
  IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
2058
108k
  MultiThreadInfo *const mt_info = &cpi->mt_info;
2059
108k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
2060
108k
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2061
108k
  const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
2062
108k
  int i;
2063
2064
108k
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
2065
73.7k
    mi_params->setup_mi(mi_params);
2066
73.7k
  }
2067
2068
108k
  set_mi_offsets(mi_params, xd, 0, 0);
2069
2070
108k
  av1_zero(*td->counts);
2071
108k
  av1_zero(rdc->tx_type_used);
2072
108k
  av1_zero(rdc->obmc_used);
2073
108k
  av1_zero(rdc->warped_used);
2074
108k
  av1_zero(rdc->seg_tmp_pred_cost);
2075
2076
  // Reset the flag.
2077
108k
  cpi->intrabc_used = 0;
2078
  // Need to disable intrabc when superres is selected
2079
108k
  if (av1_superres_scaled(cm)) {
2080
0
    features->allow_intrabc = 0;
2081
0
  }
2082
2083
108k
  features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
2084
2085
108k
  if (features->allow_warped_motion &&
2086
27.3k
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2087
10.2k
    const FRAME_UPDATE_TYPE update_type =
2088
10.2k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2089
10.2k
    int warped_probability =
2090
#if CONFIG_FPMT_TEST
2091
        cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
2092
            ? temp_frame_probs->warped_probs[update_type]
2093
            :
2094
#endif  // CONFIG_FPMT_TEST
2095
10.2k
            frame_probs->warped_probs[update_type];
2096
10.2k
    if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
2097
0
      features->allow_warped_motion = 0;
2098
10.2k
  }
2099
2100
108k
  int hash_table_created = 0;
2101
108k
  if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
2102
0
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
2103
    // TODO(any): move this outside of the recoding loop to avoid recalculating
2104
    // the hash table.
2105
    // add to hash table
2106
0
    const int pic_width = cpi->source->y_crop_width;
2107
0
    const int pic_height = cpi->source->y_crop_height;
2108
0
    uint32_t *block_hash_values[2] = { NULL };  // two buffers used ping-pong
2109
0
    bool error = false;
2110
2111
0
    for (int j = 0; j < 2; ++j) {
2112
0
      block_hash_values[j] = (uint32_t *)aom_malloc(
2113
0
          sizeof(*block_hash_values[j]) * pic_width * pic_height);
2114
0
      if (!block_hash_values[j]) {
2115
0
        error = true;
2116
0
        break;
2117
0
      }
2118
0
    }
2119
2120
0
    av1_hash_table_init(intrabc_hash_info);
2121
0
    if (error ||
2122
0
        !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
2123
0
      free_block_hash_buffers(block_hash_values);
2124
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2125
0
                         "Error allocating intrabc_hash_table and buffers");
2126
0
    }
2127
0
    hash_table_created = 1;
2128
0
    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0]);
2129
    // Hash data generated for screen contents is used for intraBC ME
2130
0
    const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
2131
0
    int max_sb_size = (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
2132
2133
0
    if (cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks) {
2134
0
      max_sb_size = AOMMIN(8, max_sb_size);
2135
0
    }
2136
2137
0
    int src_idx = 0;
2138
0
    for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
2139
0
      const int dst_idx = !src_idx;
2140
0
      av1_generate_block_hash_value(intrabc_hash_info, cpi->source, size,
2141
0
                                    block_hash_values[src_idx],
2142
0
                                    block_hash_values[dst_idx]);
2143
0
      if (size >= min_alloc_size &&
2144
0
          !av1_add_to_hash_map_by_row_with_precal_data(
2145
0
              &intrabc_hash_info->intrabc_hash_table,
2146
0
              block_hash_values[dst_idx], pic_width, pic_height, size)) {
2147
0
        error = true;
2148
0
        break;
2149
0
      }
2150
0
    }
2151
2152
0
    free_block_hash_buffers(block_hash_values);
2153
2154
0
    if (error) {
2155
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2156
0
                         "Error adding data to intrabc_hash_table");
2157
0
    }
2158
0
  }
2159
2160
108k
  const CommonQuantParams *quant_params = &cm->quant_params;
2161
980k
  for (i = 0; i < MAX_SEGMENTS; ++i) {
2162
871k
    const int qindex =
2163
871k
        cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
2164
871k
                        : quant_params->base_qindex;
2165
871k
    xd->lossless[i] =
2166
871k
        qindex == 0 && quant_params->y_dc_delta_q == 0 &&
2167
31.1k
        quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
2168
31.1k
        quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
2169
871k
    if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
2170
871k
    xd->qindex[i] = qindex;
2171
871k
    if (xd->lossless[i]) {
2172
31.1k
      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
2173
840k
    } else {
2174
840k
      cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
2175
840k
    }
2176
871k
  }
2177
108k
  features->coded_lossless = is_coded_lossless(cm, xd);
2178
108k
  features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
2179
2180
  // Fix delta q resolution for the moment
2181
2182
108k
  cm->delta_q_info.delta_q_res = 0;
2183
108k
  if (cpi->use_ducky_encode) {
2184
0
    cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
2185
108k
  } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ &&
2186
108k
             !cpi->roi.enabled) {
2187
108k
    if (deltaq_mode == DELTA_Q_OBJECTIVE)
2188
108k
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
2189
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
2190
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2191
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
2192
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2193
0
    else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
2194
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2195
0
    else if (deltaq_mode == DELTA_Q_HDR)
2196
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2197
0
    else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
2198
0
      cm->delta_q_info.delta_q_res =
2199
0
          aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
2200
    // Set delta_q_present_flag before it is used for the first time
2201
108k
    cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
2202
108k
    cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
2203
2204
    // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
2205
    // is used for ineligible frames. That effectively will turn off row_mt
2206
    // usage. Note objective delta_q and tpl eligible frames are only altref
2207
    // frames currently.
2208
108k
    const GF_GROUP *gf_group = &cpi->ppi->gf_group;
2209
108k
    if (cm->delta_q_info.delta_q_present_flag) {
2210
108k
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2211
108k
          gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
2212
17.6k
        cm->delta_q_info.delta_q_present_flag = 0;
2213
2214
108k
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2215
108k
          cm->delta_q_info.delta_q_present_flag) {
2216
91.2k
        cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
2217
91.2k
      }
2218
108k
    }
2219
2220
    // Reset delta_q_used flag
2221
108k
    cpi->deltaq_used = 0;
2222
2223
108k
    cm->delta_q_info.delta_lf_present_flag =
2224
108k
        cm->delta_q_info.delta_q_present_flag &&
2225
578
        oxcf->tool_cfg.enable_deltalf_mode;
2226
108k
    cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2227
2228
    // update delta_q_present_flag and delta_lf_present_flag based on
2229
    // base_qindex
2230
108k
    cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2231
108k
    cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2232
108k
  } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2233
0
             cpi->svc.number_temporal_layers == 1) {
2234
0
    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2235
0
    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2236
0
  }
2237
108k
  cpi->rc.cnt_zeromv = 0;
2238
2239
108k
  av1_frame_init_quantizer(cpi);
2240
108k
  init_encode_frame_mb_context(cpi);
2241
108k
  set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2242
2243
108k
  if (cm->prev_frame && cm->prev_frame->seg.enabled &&
2244
0
      cpi->svc.number_spatial_layers == 1)
2245
0
    cm->last_frame_seg_map = cm->prev_frame->seg_map;
2246
108k
  else
2247
108k
    cm->last_frame_seg_map = NULL;
2248
108k
  if (features->allow_intrabc || features->coded_lossless) {
2249
3.89k
    av1_set_default_ref_deltas(cm->lf.ref_deltas);
2250
3.89k
    av1_set_default_mode_deltas(cm->lf.mode_deltas);
2251
105k
  } else if (cm->prev_frame) {
2252
20.3k
    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2253
20.3k
    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2254
20.3k
  }
2255
108k
  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2256
108k
  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2257
2258
108k
  cpi->all_one_sided_refs =
2259
108k
      frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2260
2261
108k
  cpi->prune_ref_frame_mask = 0;
2262
  // Figure out which ref frames can be skipped at frame level.
2263
108k
  setup_prune_ref_frame_mask(cpi);
2264
2265
108k
  x->txfm_search_info.txb_split_count = 0;
2266
#if CONFIG_SPEED_STATS
2267
  x->txfm_search_info.tx_search_count = 0;
2268
#endif  // CONFIG_SPEED_STATS
2269
2270
108k
#if !CONFIG_REALTIME_ONLY
2271
#if CONFIG_COLLECT_COMPONENT_TIMING
2272
  start_timing(cpi, av1_compute_global_motion_time);
2273
#endif
2274
108k
  av1_compute_global_motion_facade(cpi);
2275
#if CONFIG_COLLECT_COMPONENT_TIMING
2276
  end_timing(cpi, av1_compute_global_motion_time);
2277
#endif
2278
108k
#endif  // !CONFIG_REALTIME_ONLY
2279
2280
#if CONFIG_COLLECT_COMPONENT_TIMING
2281
  start_timing(cpi, av1_setup_motion_field_time);
2282
#endif
2283
108k
  av1_calculate_ref_frame_side(cm);
2284
2285
108k
  features->allow_ref_frame_mvs &= !(cpi->sf.hl_sf.ref_frame_mvs_lvl == 2);
2286
108k
  if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2287
108k
#if !CONFIG_REALTIME_ONLY
2288
108k
  check_to_disable_ref_frame_mvs(cpi);
2289
108k
#endif  // !CONFIG_REALTIME_ONLY
2290
2291
#if CONFIG_COLLECT_COMPONENT_TIMING
2292
  end_timing(cpi, av1_setup_motion_field_time);
2293
#endif
2294
2295
108k
  cm->current_frame.skip_mode_info.skip_mode_flag =
2296
108k
      check_skip_mode_enabled(cpi);
2297
2298
  // Initialization of skip mode cost depends on the value of
2299
  // 'skip_mode_flag'. This initialization happens in the function
2300
  // av1_fill_mode_rates(), which is in turn called in
2301
  // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2302
  // has to be called after 'skip_mode_flag' is initialized.
2303
108k
  av1_initialize_rd_consts(cpi);
2304
108k
  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2305
108k
  populate_thresh_to_force_zeromv_skip(cpi);
2306
2307
108k
  enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2308
108k
  enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2309
108k
  mt_info->row_mt_enabled = 0;
2310
108k
  mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2311
108k
                                       cm->tiles.cols * cm->tiles.rows) > 1;
2312
2313
108k
  if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2314
66.9k
    mt_info->row_mt_enabled = 1;
2315
66.9k
    enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2316
66.9k
    enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2317
66.9k
    av1_encode_tiles_row_mt(cpi);
2318
66.9k
  } else {
2319
41.9k
    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2320
0
      av1_encode_tiles_mt(cpi);
2321
41.9k
    } else {
2322
      // Preallocate the pc_tree for realtime coding to reduce the cost of
2323
      // memory allocation.
2324
41.9k
      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2325
41.9k
      if (use_nonrd_mode) {
2326
13.4k
        td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2327
13.4k
        if (!td->pc_root)
2328
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2329
0
                             "Failed to allocate PC_TREE");
2330
28.5k
      } else {
2331
28.5k
        td->pc_root = NULL;
2332
28.5k
      }
2333
2334
41.9k
      encode_tiles(cpi);
2335
41.9k
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2336
41.9k
                                 cpi->sf.part_sf.partition_search_type);
2337
41.9k
      td->pc_root = NULL;
2338
41.9k
    }
2339
41.9k
  }
2340
2341
  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2342
108k
  if (features->allow_intrabc && !cpi->intrabc_used) {
2343
0
    features->allow_intrabc = 0;
2344
0
  }
2345
108k
  if (features->allow_intrabc) {
2346
0
    cm->delta_q_info.delta_lf_present_flag = 0;
2347
0
  }
2348
2349
108k
  if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2350
262
    cm->delta_q_info.delta_q_present_flag = 0;
2351
262
  }
2352
2353
  // Set the transform size appropriately before bitstream creation
2354
108k
  const MODE_EVAL_TYPE eval_type =
2355
108k
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2356
108k
          ? WINNER_MODE_EVAL
2357
108k
          : DEFAULT_EVAL;
2358
108k
  const TX_SIZE_SEARCH_METHOD tx_search_type =
2359
108k
      cpi->winner_mode_params.tx_size_search_methods[eval_type];
2360
108k
  assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2361
108k
  features->tx_mode = select_tx_mode(cm, tx_search_type);
2362
2363
  // Retain the frame level probability update conditions for parallel frames.
2364
  // These conditions will be consumed during postencode stage to update the
2365
  // probability.
2366
108k
  if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2367
0
    cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2368
0
        cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2369
0
    cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2370
0
        (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2371
0
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2372
0
    cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2373
0
        (features->allow_warped_motion &&
2374
0
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2375
0
    cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2376
0
        (cm->current_frame.frame_type != KEY_FRAME &&
2377
0
         cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2378
0
         features->interp_filter == SWITCHABLE);
2379
0
  }
2380
2381
108k
  if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2382
108k
      ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2383
108k
        INT_MAX) &&
2384
64.4k
       (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2385
42.9k
    const FRAME_UPDATE_TYPE update_type =
2386
42.9k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2387
858k
    for (i = 0; i < TX_SIZES_ALL; i++) {
2388
815k
      int sum = 0;
2389
815k
      int j;
2390
815k
      int left = MAX_TX_TYPE_PROB;
2391
2392
13.8M
      for (j = 0; j < TX_TYPES; j++)
2393
13.0M
        sum += cpi->td.rd_counts.tx_type_used[i][j];
2394
2395
13.8M
      for (j = TX_TYPES - 1; j >= 0; j--) {
2396
13.0M
        int update_txtype_frameprobs = 1;
2397
13.0M
        const int new_prob =
2398
13.0M
            sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2399
1.49M
                        cpi->td.rd_counts.tx_type_used[i][j] / sum)
2400
13.0M
                : (j ? 0 : MAX_TX_TYPE_PROB);
2401
#if CONFIG_FPMT_TEST
2402
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2403
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2404
              0) {
2405
            int prob =
2406
                (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2407
                 new_prob) >>
2408
                1;
2409
            left -= prob;
2410
            if (j == 0) prob += left;
2411
            temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2412
                prob;
2413
            // Copy temp_frame_probs_simulation to temp_frame_probs
2414
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2415
                 update_type_idx++) {
2416
              temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2417
                  temp_frame_probs_simulation
2418
                      ->tx_type_probs[update_type_idx][i][j];
2419
            }
2420
          }
2421
          update_txtype_frameprobs = 0;
2422
        }
2423
#endif  // CONFIG_FPMT_TEST
2424
        // Track the frame probabilities of parallel encode frames to update
2425
        // during postencode stage.
2426
13.0M
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2427
0
          update_txtype_frameprobs = 0;
2428
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2429
0
              .tx_type_probs[update_type][i][j] = new_prob;
2430
0
        }
2431
13.0M
        if (update_txtype_frameprobs) {
2432
13.0M
          int prob =
2433
13.0M
              (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2434
13.0M
          left -= prob;
2435
13.0M
          if (j == 0) prob += left;
2436
13.0M
          frame_probs->tx_type_probs[update_type][i][j] = prob;
2437
13.0M
        }
2438
13.0M
      }
2439
815k
    }
2440
42.9k
  }
2441
2442
108k
  if (cm->seg.enabled) {
2443
0
    cm->seg.temporal_update = 1;
2444
0
    if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2445
0
      cm->seg.temporal_update = 0;
2446
0
  }
2447
2448
108k
  if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2449
42.7k
      cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2450
0
    const FRAME_UPDATE_TYPE update_type =
2451
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2452
2453
0
    for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2454
0
      int sum = 0;
2455
0
      int update_obmc_frameprobs = 1;
2456
0
      for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2457
2458
0
      const int new_prob =
2459
0
          sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2460
#if CONFIG_FPMT_TEST
2461
      if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2462
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2463
          temp_frame_probs_simulation->obmc_probs[update_type][i] =
2464
              (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2465
               new_prob) >>
2466
              1;
2467
          // Copy temp_frame_probs_simulation to temp_frame_probs
2468
          for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2469
               update_type_idx++) {
2470
            temp_frame_probs->obmc_probs[update_type_idx][i] =
2471
                temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2472
          }
2473
        }
2474
        update_obmc_frameprobs = 0;
2475
      }
2476
#endif  // CONFIG_FPMT_TEST
2477
      // Track the frame probabilities of parallel encode frames to update
2478
      // during postencode stage.
2479
0
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2480
0
        update_obmc_frameprobs = 0;
2481
0
        cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2482
0
            new_prob;
2483
0
      }
2484
0
      if (update_obmc_frameprobs) {
2485
0
        frame_probs->obmc_probs[update_type][i] =
2486
0
            (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2487
0
      }
2488
0
    }
2489
0
  }
2490
2491
108k
  if (features->allow_warped_motion &&
2492
27.3k
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2493
10.2k
    const FRAME_UPDATE_TYPE update_type =
2494
10.2k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2495
10.2k
    int update_warp_frameprobs = 1;
2496
10.2k
    int sum = 0;
2497
30.7k
    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2498
10.2k
    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2499
#if CONFIG_FPMT_TEST
2500
    if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2501
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2502
        temp_frame_probs_simulation->warped_probs[update_type] =
2503
            (temp_frame_probs_simulation->warped_probs[update_type] +
2504
             new_prob) >>
2505
            1;
2506
        // Copy temp_frame_probs_simulation to temp_frame_probs
2507
        for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2508
             update_type_idx++) {
2509
          temp_frame_probs->warped_probs[update_type_idx] =
2510
              temp_frame_probs_simulation->warped_probs[update_type_idx];
2511
        }
2512
      }
2513
      update_warp_frameprobs = 0;
2514
    }
2515
#endif  // CONFIG_FPMT_TEST
2516
    // Track the frame probabilities of parallel encode frames to update
2517
    // during postencode stage.
2518
10.2k
    if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2519
0
      update_warp_frameprobs = 0;
2520
0
      cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2521
0
          new_prob;
2522
0
    }
2523
10.2k
    if (update_warp_frameprobs) {
2524
10.2k
      frame_probs->warped_probs[update_type] =
2525
10.2k
          (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2526
10.2k
    }
2527
10.2k
  }
2528
2529
108k
  if (cm->current_frame.frame_type != KEY_FRAME &&
2530
27.3k
      cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2531
17.0k
      features->interp_filter == SWITCHABLE) {
2532
17.0k
    const FRAME_UPDATE_TYPE update_type =
2533
17.0k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2534
2535
290k
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2536
273k
      int sum = 0;
2537
273k
      int j;
2538
273k
      int left = 1536;
2539
2540
1.09M
      for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2541
820k
        sum += cpi->td.counts->switchable_interp[i][j];
2542
820k
      }
2543
2544
1.09M
      for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2545
820k
        int update_interpfilter_frameprobs = 1;
2546
820k
        const int new_prob =
2547
820k
            sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2548
820k
                : (j ? 0 : 1536);
2549
#if CONFIG_FPMT_TEST
2550
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2551
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2552
              0) {
2553
            int prob = (temp_frame_probs_simulation
2554
                            ->switchable_interp_probs[update_type][i][j] +
2555
                        new_prob) >>
2556
                       1;
2557
            left -= prob;
2558
            if (j == 0) prob += left;
2559
            temp_frame_probs_simulation
2560
                ->switchable_interp_probs[update_type][i][j] = prob;
2561
            // Copy temp_frame_probs_simulation to temp_frame_probs
2562
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2563
                 update_type_idx++) {
2564
              temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2565
                  temp_frame_probs_simulation
2566
                      ->switchable_interp_probs[update_type_idx][i][j];
2567
            }
2568
          }
2569
          update_interpfilter_frameprobs = 0;
2570
        }
2571
#endif  // CONFIG_FPMT_TEST
2572
        // Track the frame probabilities of parallel encode frames to update
2573
        // during postencode stage.
2574
820k
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2575
0
          update_interpfilter_frameprobs = 0;
2576
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2577
0
              .switchable_interp_probs[update_type][i][j] = new_prob;
2578
0
        }
2579
820k
        if (update_interpfilter_frameprobs) {
2580
820k
          int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2581
820k
                      new_prob) >>
2582
820k
                     1;
2583
820k
          left -= prob;
2584
820k
          if (j == 0) prob += left;
2585
820k
          frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2586
820k
        }
2587
820k
      }
2588
273k
    }
2589
17.0k
  }
2590
108k
  if (hash_table_created) {
2591
0
    av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2592
0
  }
2593
108k
}
2594
2595
/*!\brief Setup reference frame buffers and encode a frame
2596
 *
2597
 * \ingroup high_level_algo
2598
 * \callgraph
2599
 * \callergraph
2600
 *
2601
 * \param[in]    cpi    Top-level encoder structure
2602
 */
2603
108k
void av1_encode_frame(AV1_COMP *cpi) {
2604
108k
  AV1_COMMON *const cm = &cpi->common;
2605
108k
  CurrentFrame *const current_frame = &cm->current_frame;
2606
108k
  FeatureFlags *const features = &cm->features;
2607
108k
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2608
108k
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2609
  // Indicates whether or not to use a default reduced set for ext-tx
2610
  // rather than the potential full set of 16 transforms
2611
108k
  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2612
2613
  // Make sure segment_id is no larger than last_active_segid.
2614
108k
  if (cm->seg.enabled && cm->seg.update_map) {
2615
0
    const int mi_rows = cm->mi_params.mi_rows;
2616
0
    const int mi_cols = cm->mi_params.mi_cols;
2617
0
    const int last_active_segid = cm->seg.last_active_segid;
2618
0
    uint8_t *map = cpi->enc_seg.map;
2619
0
    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2620
0
      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2621
0
        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2622
0
      }
2623
0
      map += mi_cols;
2624
0
    }
2625
0
  }
2626
2627
108k
  av1_setup_frame_buf_refs(cm);
2628
108k
  enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2629
108k
                         cm->cur_frame->ref_display_order_hint,
2630
108k
                         cm->current_frame.display_order_hint);
2631
108k
  set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2632
108k
                     cpi->ref_frame_flags);
2633
108k
  av1_setup_frame_sign_bias(cm);
2634
2635
  // If global motion is enabled, then every buffer which is used as either
2636
  // a source or a ref frame should have an image pyramid allocated.
2637
  // Check here so that issues can be caught early in debug mode
2638
#if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2639
  if (cpi->alloc_pyramid) {
2640
    assert(cpi->source->y_pyramid);
2641
    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2642
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2643
      if (buf != NULL) {
2644
        assert(buf->buf.y_pyramid);
2645
      }
2646
    }
2647
  }
2648
#endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2649
2650
#if CONFIG_MISMATCH_DEBUG
2651
  mismatch_reset_frame(av1_num_planes(cm));
2652
#endif
2653
2654
108k
  rdc->newmv_or_intra_blocks = 0;
2655
108k
  cpi->palette_pixel_num = 0;
2656
2657
108k
  if (cpi->sf.hl_sf.frame_parameter_update ||
2658
99.9k
      cpi->sf.rt_sf.use_comp_ref_nonrd) {
2659
99.9k
    if (frame_is_intra_only(cm))
2660
76.7k
      current_frame->reference_mode = SINGLE_REFERENCE;
2661
23.1k
    else
2662
23.1k
      current_frame->reference_mode = REFERENCE_MODE_SELECT;
2663
2664
99.9k
    features->interp_filter = SWITCHABLE;
2665
99.9k
    if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2666
2667
99.9k
    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2668
99.9k
        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2669
2670
99.9k
    rdc->compound_ref_used_flag = 0;
2671
99.9k
    rdc->skip_mode_used_flag = 0;
2672
2673
99.9k
    encode_frame_internal(cpi);
2674
2675
99.9k
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2676
      // Use a flag that includes 4x4 blocks
2677
23.1k
      if (rdc->compound_ref_used_flag == 0) {
2678
23.1k
        current_frame->reference_mode = SINGLE_REFERENCE;
2679
#if CONFIG_ENTROPY_STATS
2680
        av1_zero(cpi->td.counts->comp_inter);
2681
#endif  // CONFIG_ENTROPY_STATS
2682
23.1k
      }
2683
23.1k
    }
2684
    // Re-check on the skip mode status as reference mode may have been
2685
    // changed.
2686
99.9k
    SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2687
99.9k
    if (frame_is_intra_only(cm) ||
2688
99.9k
        current_frame->reference_mode == SINGLE_REFERENCE) {
2689
99.9k
      skip_mode_info->skip_mode_allowed = 0;
2690
99.9k
      skip_mode_info->skip_mode_flag = 0;
2691
99.9k
    }
2692
99.9k
    if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2693
0
      skip_mode_info->skip_mode_flag = 0;
2694
2695
99.9k
    if (!cm->tiles.large_scale) {
2696
99.9k
      if (features->tx_mode == TX_MODE_SELECT &&
2697
96.6k
          cpi->td.mb.txfm_search_info.txb_split_count == 0)
2698
48.9k
        features->tx_mode = TX_MODE_LARGEST;
2699
99.9k
    }
2700
99.9k
  } else {
2701
    // This is needed if real-time speed setting is changed on the fly
2702
    // from one using compound prediction to one using single reference.
2703
9.01k
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2704
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2705
9.01k
    encode_frame_internal(cpi);
2706
9.01k
  }
2707
108k
}