Coverage Report

Created: 2026-01-16 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/encodeframe.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <float.h>
14
#include <math.h>
15
#include <stdbool.h>
16
#include <stdio.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/binary_codes_writer.h"
24
#include "aom_ports/mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_util/aom_pthread.h"
27
#if CONFIG_MISMATCH_DEBUG
28
#include "aom_util/debug_util.h"
29
#endif  // CONFIG_MISMATCH_DEBUG
30
31
#include "av1/common/cfl.h"
32
#include "av1/common/common.h"
33
#include "av1/common/common_data.h"
34
#include "av1/common/entropy.h"
35
#include "av1/common/entropymode.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mv.h"
38
#include "av1/common/mvref_common.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconintra.h"
42
#include "av1/common/reconinter.h"
43
#include "av1/common/seg_common.h"
44
#include "av1/common/tile_common.h"
45
#include "av1/common/warped_motion.h"
46
47
#include "av1/encoder/allintra_vis.h"
48
#include "av1/encoder/aq_complexity.h"
49
#include "av1/encoder/aq_cyclicrefresh.h"
50
#include "av1/encoder/aq_variance.h"
51
#include "av1/encoder/av1_quantize.h"
52
#include "av1/encoder/global_motion_facade.h"
53
#include "av1/encoder/encodeframe.h"
54
#include "av1/encoder/encodeframe_utils.h"
55
#include "av1/encoder/encodemb.h"
56
#include "av1/encoder/encodemv.h"
57
#include "av1/encoder/encodetxb.h"
58
#include "av1/encoder/ethread.h"
59
#include "av1/encoder/extend.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/ml.h"
62
#include "av1/encoder/motion_search_facade.h"
63
#include "av1/encoder/partition_strategy.h"
64
#if !CONFIG_REALTIME_ONLY
65
#include "av1/encoder/partition_model_weights.h"
66
#endif
67
#include "av1/encoder/partition_search.h"
68
#include "av1/encoder/rd.h"
69
#include "av1/encoder/rdopt.h"
70
#include "av1/encoder/reconinter_enc.h"
71
#include "av1/encoder/segmentation.h"
72
#include "av1/encoder/tokenize.h"
73
#include "av1/encoder/tpl_model.h"
74
#include "av1/encoder/var_based_part.h"
75
76
#if CONFIG_TUNE_VMAF
77
#include "av1/encoder/tune_vmaf.h"
78
#endif
79
80
/*!\cond */
81
// This is used as a reference when computing the source variance for the
82
//  purposes of activity masking.
83
// Eventually this should be replaced by custom no-reference routines,
84
//  which will be faster.
85
static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94
  128, 128, 128, 128, 128, 128, 128, 128
95
};
96
97
#if CONFIG_AV1_HIGHBITDEPTH
98
static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
99
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
107
  128, 128, 128, 128, 128, 128, 128, 128
108
};
109
110
static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
126
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
127
};
128
129
static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
130
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
148
  128 * 16, 128 * 16
149
};
150
#endif  // CONFIG_AV1_HIGHBITDEPTH
151
/*!\endcond */
152
153
// For the given bit depth, returns a constant array used to assist the
154
// calculation of source block variance, which will then be used to decide
155
// adaptive quantizers.
156
32.3M
static const uint8_t *get_var_offs(int use_hbd, int bd) {
157
32.3M
#if CONFIG_AV1_HIGHBITDEPTH
158
32.3M
  if (use_hbd) {
159
4.43M
    assert(bd == 8 || bd == 10 || bd == 12);
160
4.43M
    const int off_index = (bd - 8) >> 1;
161
4.43M
    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
162
4.43M
                                                AV1_HIGH_VAR_OFFS_10,
163
4.43M
                                                AV1_HIGH_VAR_OFFS_12 };
164
4.43M
    return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
165
4.43M
  }
166
#else
167
  (void)use_hbd;
168
  (void)bd;
169
  assert(!use_hbd);
170
#endif
171
32.3M
  assert(bd == 8);
172
27.8M
  return AV1_VAR_OFFS;
173
32.3M
}
174
175
294k
void av1_init_rtc_counters(MACROBLOCK *const x) {
176
294k
  av1_init_cyclic_refresh_counters(x);
177
294k
  x->cnt_zeromv = 0;
178
294k
}
179
180
50.3k
void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
181
50.3k
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
182
0
    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
183
50.3k
  cpi->rc.cnt_zeromv += x->cnt_zeromv;
184
50.3k
  cpi->rc.num_col_blscroll_last_tl0 += x->sb_col_scroll;
185
50.3k
  cpi->rc.num_row_blscroll_last_tl0 += x->sb_row_scroll;
186
50.3k
}
187
188
unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
189
                                       const MACROBLOCKD *xd,
190
                                       const struct buf_2d *ref,
191
                                       BLOCK_SIZE bsize, int plane,
192
32.3M
                                       int use_hbd) {
193
32.3M
  const int subsampling_x = xd->plane[plane].subsampling_x;
194
32.3M
  const int subsampling_y = xd->plane[plane].subsampling_y;
195
32.3M
  const BLOCK_SIZE plane_bsize =
196
32.3M
      get_plane_block_size(bsize, subsampling_x, subsampling_y);
197
32.3M
  unsigned int sse;
198
32.3M
  const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
199
32.3M
      ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
200
32.3M
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
201
32.3M
}
202
203
unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
204
                                              const MACROBLOCKD *xd,
205
                                              const struct buf_2d *ref,
206
32.3M
                                              BLOCK_SIZE bsize, int plane) {
207
32.3M
  const int use_hbd = is_cur_buf_hbd(xd);
208
32.3M
  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
209
32.3M
}
210
211
void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
212
                          int mi_row, int mi_col, const int num_planes,
213
63.1M
                          BLOCK_SIZE bsize) {
214
  // Set current frame pointer.
215
63.1M
  x->e_mbd.cur_buf = src;
216
217
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
218
  // the static analysis warnings.
219
178M
  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
220
115M
    const int is_uv = i > 0;
221
115M
    setup_pred_plane(
222
115M
        &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
223
115M
        src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
224
115M
        x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
225
115M
  }
226
63.1M
}
227
228
#if !CONFIG_REALTIME_ONLY
229
/*!\brief Assigns different quantization parameters to each superblock
230
 * based on statistics relevant to the selected delta-q mode (variance).
231
 * This is the non-rd version.
232
 *
233
 * \param[in]     cpi         Top level encoder instance structure
234
 * \param[in,out] td          Thread data structure
235
 * \param[in,out] x           Superblock level data for this block.
236
 * \param[in]     tile_info   Tile information / identification
237
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
238
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
239
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
240
 *
241
 * \remark No return value but updates superblock and thread data
242
 * related to the q / q delta to be used.
243
 */
244
static inline void setup_delta_q_nonrd(AV1_COMP *const cpi, ThreadData *td,
245
                                       MACROBLOCK *const x,
246
                                       const TileInfo *const tile_info,
247
0
                                       int mi_row, int mi_col, int num_planes) {
248
0
  AV1_COMMON *const cm = &cpi->common;
249
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
250
0
  assert(delta_q_info->delta_q_present_flag);
251
252
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
253
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255
0
  const int delta_q_res = delta_q_info->delta_q_res;
256
0
  int current_qindex = cm->quant_params.base_qindex;
257
258
0
  if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
259
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
260
0
  }
261
262
0
  x->rdmult_cur_qindex = current_qindex;
263
0
  MACROBLOCKD *const xd = &x->e_mbd;
264
0
  current_qindex = av1_adjust_q_from_delta_q_res(
265
0
      delta_q_res, xd->current_base_qindex, current_qindex);
266
267
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
268
0
  x->rdmult_delta_qindex = x->delta_qindex;
269
270
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
271
0
  xd->mi[0]->current_qindex = current_qindex;
272
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
273
274
  // keep track of any non-zero delta-q used
275
0
  td->deltaq_used |= (x->delta_qindex != 0);
276
0
}
277
278
/*!\brief Assigns different quantization parameters to each superblock
279
 * based on statistics relevant to the selected delta-q mode (TPL weight,
280
 * variance, HDR, etc).
281
 *
282
 * \ingroup tpl_modelling
283
 *
284
 * \param[in]     cpi         Top level encoder instance structure
285
 * \param[in,out] td          Thread data structure
286
 * \param[in,out] x           Superblock level data for this block.
287
 * \param[in]     tile_info   Tile information / identification
288
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
289
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
290
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
291
 *
292
 * \remark No return value but updates superblock and thread data
293
 * related to the q / q delta to be used.
294
 */
295
static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
296
                                 MACROBLOCK *const x,
297
                                 const TileInfo *const tile_info, int mi_row,
298
2.22k
                                 int mi_col, int num_planes) {
299
2.22k
  AV1_COMMON *const cm = &cpi->common;
300
2.22k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
301
2.22k
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
302
2.22k
  assert(delta_q_info->delta_q_present_flag);
303
304
2.22k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
305
2.22k
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
306
307
2.22k
  const int delta_q_res = delta_q_info->delta_q_res;
308
2.22k
  int current_qindex = cm->quant_params.base_qindex;
309
2.22k
  if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
310
0
                                   DUCKY_ENCODE_FRAME_MODE_QINDEX) {
311
0
    const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
312
0
    const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
313
0
    const int sb_cols =
314
0
        CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
315
0
    const int sb_index = sb_row * sb_cols + sb_col;
316
0
    current_qindex =
317
0
        cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
318
2.22k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
319
0
    if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
320
0
      const int block_wavelet_energy_level =
321
0
          av1_block_wavelet_energy_level(cpi, x, sb_size);
322
0
      x->sb_energy_level = block_wavelet_energy_level;
323
0
      current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
324
0
          cpi, block_wavelet_energy_level);
325
0
    } else {
326
0
      const int block_var_level = av1_log_block_var(cpi, x, sb_size);
327
0
      x->sb_energy_level = block_var_level;
328
0
      current_qindex =
329
0
          av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
330
0
    }
331
2.22k
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
332
2.22k
             cpi->oxcf.algo_cfg.enable_tpl_model) {
333
    // Setup deltaq based on tpl stats
334
2.22k
    current_qindex =
335
2.22k
        av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
336
18.4E
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
337
0
    current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
338
18.4E
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
339
0
    current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
340
18.4E
  } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
341
0
    current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
342
18.4E
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
343
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
344
0
  }
345
346
2.22k
  x->rdmult_cur_qindex = current_qindex;
347
2.22k
  MACROBLOCKD *const xd = &x->e_mbd;
348
2.22k
  const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
349
2.22k
      delta_q_res, xd->current_base_qindex, current_qindex);
350
2.22k
  if (cpi->use_ducky_encode) {
351
0
    assert(adjusted_qindex == current_qindex);
352
0
  }
353
2.22k
  current_qindex = adjusted_qindex;
354
355
2.22k
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
356
2.22k
  x->rdmult_delta_qindex = x->delta_qindex;
357
358
2.22k
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
359
2.22k
  xd->mi[0]->current_qindex = current_qindex;
360
2.22k
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
361
362
  // keep track of any non-zero delta-q used
363
2.22k
  td->deltaq_used |= (x->delta_qindex != 0);
364
365
2.22k
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
366
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
367
0
    const int lfmask = ~(delta_lf_res - 1);
368
0
    const int delta_lf_from_base =
369
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
370
0
    const int8_t delta_lf =
371
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
372
0
    const int frame_lf_count =
373
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
374
0
    const int mib_size = cm->seq_params->mib_size;
375
376
    // pre-set the delta lf for loop filter. Note that this value is set
377
    // before mi is assigned for each block in current superblock
378
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
379
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
380
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
381
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
382
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
383
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
384
0
        }
385
0
      }
386
0
    }
387
0
  }
388
2.22k
}
389
390
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
391
282k
                                 int mi_col) {
392
282k
  const AV1_COMMON *cm = &cpi->common;
393
282k
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
394
282k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
395
282k
  MACROBLOCK *x = &td->mb;
396
282k
  const int frame_idx = cpi->gf_frame_index;
397
282k
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
398
282k
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
399
400
282k
  av1_zero(x->tpl_keep_ref_frame);
401
402
282k
  if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
403
43.2k
  if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
404
25.8k
  if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
405
406
25.8k
  const int is_overlay =
407
25.8k
      cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
408
25.8k
  if (is_overlay) {
409
0
    memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
410
0
    return;
411
0
  }
412
413
25.8k
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
414
25.8k
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
415
25.8k
  const int tpl_stride = tpl_frame->stride;
416
25.8k
  int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
417
25.8k
  const int step = 1 << block_mis_log2;
418
25.8k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
419
420
25.8k
  const int mi_row_end =
421
25.8k
      AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
422
25.8k
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
423
25.8k
  const int mi_col_sr =
424
25.8k
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
425
25.8k
  const int mi_col_end_sr =
426
25.8k
      AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
427
25.8k
                                  cm->superres_scale_denominator),
428
25.8k
             mi_cols_sr);
429
25.8k
  const int row_step = step;
430
25.8k
  const int col_step_sr =
431
25.8k
      coded_to_superres_mi(step, cm->superres_scale_denominator);
432
89.2k
  for (int row = mi_row; row < mi_row_end; row += row_step) {
433
222k
    for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
434
159k
      const TplDepStats *this_stats =
435
159k
          &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
436
159k
      int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
437
      // Find the winner ref frame idx for the current block
438
159k
      int64_t best_inter_cost = this_stats->pred_error[0];
439
159k
      int best_rf_idx = 0;
440
1.11M
      for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
441
954k
        if ((this_stats->pred_error[idx] < best_inter_cost) &&
442
0
            (this_stats->pred_error[idx] != 0)) {
443
0
          best_inter_cost = this_stats->pred_error[idx];
444
0
          best_rf_idx = idx;
445
0
        }
446
954k
      }
447
      // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
448
      // LAST_FRAME.
449
159k
      tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
450
159k
                                    this_stats->pred_error[LAST_FRAME - 1];
451
452
1.11M
      for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
453
954k
        inter_cost[rf_idx] += tpl_pred_error[rf_idx];
454
159k
    }
455
63.4k
  }
456
457
25.8k
  int rank_index[INTER_REFS_PER_FRAME - 1];
458
180k
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
459
154k
    rank_index[idx] = idx + 1;
460
540k
    for (int i = idx; i > 0; --i) {
461
386k
      if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
462
0
        const int tmp = rank_index[i - 1];
463
0
        rank_index[i - 1] = rank_index[i];
464
0
        rank_index[i] = tmp;
465
0
      }
466
386k
    }
467
154k
  }
468
469
25.8k
  x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
470
25.8k
  x->tpl_keep_ref_frame[LAST_FRAME] = 1;
471
472
25.8k
  int cutoff_ref = 0;
473
180k
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
474
154k
    x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
475
154k
    if (idx > 2) {
476
77.3k
      if (!cutoff_ref) {
477
        // If the predictive coding gains are smaller than the previous more
478
        // relevant frame over certain amount, discard this frame and all the
479
        // frames afterwards.
480
25.7k
        if (llabs(inter_cost[rank_index[idx]]) <
481
25.7k
                llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
482
25.8k
            inter_cost[rank_index[idx]] == 0)
483
25.7k
          cutoff_ref = 1;
484
25.7k
      }
485
486
77.3k
      if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
487
77.3k
    }
488
154k
  }
489
25.8k
}
490
491
static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
492
0
                                           int mi_row, int mi_col) {
493
0
  const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
494
0
  const int orig_rdmult = cpi->rd.RDMULT;
495
496
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
497
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
498
0
  const int gf_group_index = cpi->gf_frame_index;
499
0
  if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
500
0
      cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
501
0
      cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
502
0
    const int dr =
503
0
        av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
504
0
    x->rdmult = dr;
505
0
  }
506
0
}
507
#endif  // !CONFIG_REALTIME_ONLY
508
509
#if CONFIG_RT_ML_PARTITIONING
510
// Get a prediction(stored in x->est_pred) for the whole superblock.
511
static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
512
                               MACROBLOCK *x, int mi_row, int mi_col) {
513
  AV1_COMMON *const cm = &cpi->common;
514
  const int is_key_frame = frame_is_intra_only(cm);
515
  MACROBLOCKD *xd = &x->e_mbd;
516
517
  // TODO(kyslov) Extend to 128x128
518
  assert(cm->seq_params->sb_size == BLOCK_64X64);
519
520
  av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
521
522
  if (!is_key_frame) {
523
    MB_MODE_INFO *mi = xd->mi[0];
524
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
525
526
    assert(yv12 != NULL);
527
528
    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
529
                         get_ref_scale_factors(cm, LAST_FRAME), 1);
530
    mi->ref_frame[0] = LAST_FRAME;
531
    mi->ref_frame[1] = NONE;
532
    mi->bsize = BLOCK_64X64;
533
    mi->mv[0].as_int = 0;
534
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
535
536
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
537
538
    xd->plane[0].dst.buf = x->est_pred;
539
    xd->plane[0].dst.stride = 64;
540
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
541
  } else {
542
#if CONFIG_AV1_HIGHBITDEPTH
543
    switch (xd->bd) {
544
      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
545
      case 10:
546
        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
547
        break;
548
      case 12:
549
        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
550
        break;
551
    }
552
#else
553
    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
554
#endif  // CONFIG_VP9_HIGHBITDEPTH
555
  }
556
}
557
#endif  // CONFIG_RT_ML_PARTITIONING
558
559
10.8k
#define AVG_CDF_WEIGHT_LEFT 3
560
10.8k
#define AVG_CDF_WEIGHT_TOP_RIGHT 1
561
562
/*!\brief Encode a superblock (minimal RD search involved)
563
 *
564
 * \ingroup partition_search
565
 * Encodes the superblock by a pre-determined partition pattern, only minor
566
 * rd-based searches are allowed to adjust the initial pattern. It is only used
567
 * by realtime encoding.
568
 */
569
static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
570
                                   TileDataEnc *tile_data, TokenExtra **tp,
571
                                   const int mi_row, const int mi_col,
572
149k
                                   const int seg_skip) {
573
149k
  AV1_COMMON *const cm = &cpi->common;
574
149k
  MACROBLOCK *const x = &td->mb;
575
149k
  const SPEED_FEATURES *const sf = &cpi->sf;
576
149k
  const TileInfo *const tile_info = &tile_data->tile_info;
577
149k
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
578
149k
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
579
149k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
580
149k
  PC_TREE *const pc_root = td->pc_root;
581
582
149k
#if !CONFIG_REALTIME_ONLY
583
149k
  if (cm->delta_q_info.delta_q_present_flag) {
584
0
    const int num_planes = av1_num_planes(cm);
585
586
0
    setup_delta_q_nonrd(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
587
0
  }
588
149k
#endif
589
#if CONFIG_RT_ML_PARTITIONING
590
  if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
591
    RD_STATS dummy_rdc;
592
    get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
593
    av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
594
                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
595
    return;
596
  }
597
#endif
598
  // Set the partition
599
149k
  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
600
149k
      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
601
0
       (!frame_is_intra_only(cm) &&
602
0
        (!cpi->ppi->use_svc ||
603
0
         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
604
    // set a fixed-size partition
605
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
606
0
    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
607
0
    if (sf->rt_sf.use_fast_fixed_part &&
608
0
        x->content_state_sb.source_sad_nonrd < kLowSad) {
609
0
      bsize_select = cm->seq_params->sb_size;
610
0
    }
611
0
    if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
612
0
        cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
613
0
      bsize_select = cm->seq_params->sb_size;
614
0
      x->force_zeromv_skip_for_sb = 1;
615
0
    }
616
0
    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
617
0
    if (x->content_state_sb.source_sad_nonrd > kZeroSad)
618
0
      x->force_color_check_block_level = 1;
619
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
620
149k
  } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
621
    // set a variance-based partition
622
149k
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
623
149k
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
624
149k
  }
625
149k
  assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
626
149k
         sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
627
149k
  set_cb_offsets(td->mb.cb_offset, 0, 0);
628
629
  // Initialize the flag to skip cdef to 1.
630
149k
  if (sf->rt_sf.skip_cdef_sb) {
631
0
    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
632
    // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
633
    // "blocks".
634
0
    for (int r = 0; r < block64_in_sb; ++r) {
635
0
      for (int c = 0; c < block64_in_sb; ++c) {
636
0
        const int idx_in_sb =
637
0
            r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
638
0
        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
639
0
      }
640
0
    }
641
0
  }
642
643
#if CONFIG_COLLECT_COMPONENT_TIMING
644
  start_timing(cpi, nonrd_use_partition_time);
645
#endif
646
149k
  av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
647
149k
                          pc_root);
648
#if CONFIG_COLLECT_COMPONENT_TIMING
649
  end_timing(cpi, nonrd_use_partition_time);
650
#endif
651
149k
}
652
653
// This function initializes the stats for encode_rd_sb.
654
static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
655
                                     const TileDataEnc *tile_data,
656
                                     SIMPLE_MOTION_DATA_TREE *sms_root,
657
                                     RD_STATS *rd_cost, int mi_row, int mi_col,
658
282k
                                     int gather_tpl_data) {
659
282k
  const AV1_COMMON *cm = &cpi->common;
660
282k
  const TileInfo *tile_info = &tile_data->tile_info;
661
282k
  MACROBLOCK *x = &td->mb;
662
663
282k
  const SPEED_FEATURES *sf = &cpi->sf;
664
282k
  const int use_simple_motion_search =
665
282k
      (sf->part_sf.simple_motion_search_split ||
666
0
       sf->part_sf.simple_motion_search_prune_rect ||
667
0
       sf->part_sf.simple_motion_search_early_term_none ||
668
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
669
282k
      !frame_is_intra_only(cm);
670
282k
  if (use_simple_motion_search) {
671
41.5k
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
672
41.5k
                                             mi_row, mi_col);
673
41.5k
  }
674
675
282k
#if !CONFIG_REALTIME_ONLY
676
282k
  if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
677
282k
        cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
678
282k
    init_ref_frame_space(cpi, td, mi_row, mi_col);
679
282k
    x->sb_energy_level = 0;
680
282k
    x->part_search_info.cnn_output_valid = 0;
681
282k
    if (gather_tpl_data) {
682
282k
      if (cm->delta_q_info.delta_q_present_flag) {
683
2.22k
        const int num_planes = av1_num_planes(cm);
684
2.22k
        const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
685
2.22k
        setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
686
2.22k
        av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
687
2.22k
      }
688
689
      // TODO(jingning): revisit this function.
690
282k
      if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
691
0
        adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
692
0
      }
693
282k
    }
694
282k
  }
695
#else
696
  (void)tile_info;
697
  (void)mi_row;
698
  (void)mi_col;
699
  (void)gather_tpl_data;
700
#endif
701
702
282k
  x->reuse_inter_pred = false;
703
282k
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
704
282k
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
705
282k
  av1_zero(x->picked_ref_frames_mask);
706
282k
  av1_invalid_rd_stats(rd_cost);
707
282k
}
708
709
#if !CONFIG_REALTIME_ONLY
710
static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
711
                                        const TileDataEnc *tile_data,
712
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
713
                                        RD_STATS *rd_cost, int mi_row,
714
0
                                        int mi_col, int delta_qp_ofs) {
715
0
  AV1_COMMON *const cm = &cpi->common;
716
0
  MACROBLOCK *const x = &td->mb;
717
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
718
0
  const TileInfo *tile_info = &tile_data->tile_info;
719
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
720
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
721
0
  assert(delta_q_info->delta_q_present_flag);
722
0
  const int delta_q_res = delta_q_info->delta_q_res;
723
724
0
  const SPEED_FEATURES *sf = &cpi->sf;
725
0
  const int use_simple_motion_search =
726
0
      (sf->part_sf.simple_motion_search_split ||
727
0
       sf->part_sf.simple_motion_search_prune_rect ||
728
0
       sf->part_sf.simple_motion_search_early_term_none ||
729
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
730
0
      !frame_is_intra_only(cm);
731
0
  if (use_simple_motion_search) {
732
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
733
0
                                             mi_row, mi_col);
734
0
  }
735
736
0
  int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
737
738
0
  MACROBLOCKD *const xd = &x->e_mbd;
739
0
  current_qindex = av1_adjust_q_from_delta_q_res(
740
0
      delta_q_res, xd->current_base_qindex, current_qindex);
741
742
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
743
744
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
745
0
  xd->mi[0]->current_qindex = current_qindex;
746
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
747
748
  // keep track of any non-zero delta-q used
749
0
  td->deltaq_used |= (x->delta_qindex != 0);
750
751
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
752
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
753
0
    const int lfmask = ~(delta_lf_res - 1);
754
0
    const int delta_lf_from_base =
755
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
756
0
    const int8_t delta_lf =
757
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
758
0
    const int frame_lf_count =
759
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
760
0
    const int mib_size = cm->seq_params->mib_size;
761
762
    // pre-set the delta lf for loop filter. Note that this value is set
763
    // before mi is assigned for each block in current superblock
764
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
765
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
766
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
767
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
768
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
769
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
770
0
        }
771
0
      }
772
0
    }
773
0
  }
774
775
0
  x->reuse_inter_pred = false;
776
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
777
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
778
0
  av1_zero(x->picked_ref_frames_mask);
779
0
  av1_invalid_rd_stats(rd_cost);
780
0
}
781
782
static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
783
                       TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
784
                       int mi_col, BLOCK_SIZE bsize,
785
                       SIMPLE_MOTION_DATA_TREE *sms_tree,
786
0
                       SB_FIRST_PASS_STATS *sb_org_stats) {
787
0
  AV1_COMMON *const cm = &cpi->common;
788
0
  MACROBLOCK *const x = &td->mb;
789
0
  RD_STATS rdc_winner, cur_rdc;
790
0
  av1_invalid_rd_stats(&rdc_winner);
791
792
0
  int best_qindex = td->mb.rdmult_delta_qindex;
793
0
  const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
794
0
  const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
795
0
  const int step = cm->delta_q_info.delta_q_res;
796
797
0
  for (int sweep_qp_delta = start; sweep_qp_delta <= end;
798
0
       sweep_qp_delta += step) {
799
0
    sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
800
0
                                mi_col, sweep_qp_delta);
801
802
0
    const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
803
0
    const int backup_current_qindex =
804
0
        cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
805
806
0
    av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
807
0
    av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
808
0
    cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
809
810
0
    td->pc_root = av1_alloc_pc_tree_node(bsize);
811
0
    if (!td->pc_root)
812
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
813
0
                         "Failed to allocate PC_TREE");
814
0
    av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
815
0
                          &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
816
0
                          SB_DRY_PASS, NULL);
817
818
0
    if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
819
0
        (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
820
0
         rdc_winner.rdcost == cur_rdc.rdcost)) {
821
0
      rdc_winner = cur_rdc;
822
0
      best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
823
0
    }
824
0
  }
825
826
0
  return best_qindex;
827
0
}
828
#endif  //! CONFIG_REALTIME_ONLY
829
830
/*!\brief Encode a superblock (RD-search-based)
831
 *
832
 * \ingroup partition_search
833
 * Conducts partition search for a superblock, based on rate-distortion costs,
834
 * from scratch or adjusting from a pre-calculated partition pattern.
835
 */
836
static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
837
                                TileDataEnc *tile_data, TokenExtra **tp,
838
                                const int mi_row, const int mi_col,
839
282k
                                const int seg_skip) {
840
282k
  AV1_COMMON *const cm = &cpi->common;
841
282k
  MACROBLOCK *const x = &td->mb;
842
282k
  MACROBLOCKD *const xd = &x->e_mbd;
843
282k
  const SPEED_FEATURES *const sf = &cpi->sf;
844
282k
  const TileInfo *const tile_info = &tile_data->tile_info;
845
282k
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
846
282k
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
847
282k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
848
282k
  const int num_planes = av1_num_planes(cm);
849
282k
  int dummy_rate;
850
282k
  int64_t dummy_dist;
851
282k
  RD_STATS dummy_rdc;
852
282k
  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
853
854
#if CONFIG_REALTIME_ONLY
855
  (void)seg_skip;
856
#endif  // CONFIG_REALTIME_ONLY
857
858
282k
  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
859
282k
                    1);
860
861
  // Encode the superblock
862
282k
  if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
863
    // partition search starting from a variance-based partition
864
38.3k
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
865
38.3k
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
866
867
#if CONFIG_COLLECT_COMPONENT_TIMING
868
    start_timing(cpi, rd_use_partition_time);
869
#endif
870
38.3k
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
871
38.3k
    if (!td->pc_root)
872
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
873
0
                         "Failed to allocate PC_TREE");
874
38.3k
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
875
38.3k
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
876
38.3k
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
877
38.3k
                               sf->part_sf.partition_search_type);
878
38.3k
    td->pc_root = NULL;
879
#if CONFIG_COLLECT_COMPONENT_TIMING
880
    end_timing(cpi, rd_use_partition_time);
881
#endif
882
38.3k
  }
883
244k
#if !CONFIG_REALTIME_ONLY
884
244k
  else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
885
    // partition search by adjusting a fixed-size partition
886
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
887
0
    const BLOCK_SIZE bsize =
888
0
        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
889
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
890
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
891
0
    if (!td->pc_root)
892
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
893
0
                         "Failed to allocate PC_TREE");
894
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
895
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
896
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
897
0
                               sf->part_sf.partition_search_type);
898
0
    td->pc_root = NULL;
899
244k
  } else {
900
    // The most exhaustive recursive partition search
901
244k
    SuperBlockEnc *sb_enc = &x->sb_enc;
902
    // No stats for overlay frames. Exclude key frame.
903
244k
    av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
904
905
    // Reset the tree for simple motion search data
906
244k
    av1_reset_simple_motion_tree_partition(sms_root, sb_size);
907
908
#if CONFIG_COLLECT_COMPONENT_TIMING
909
    start_timing(cpi, rd_pick_partition_time);
910
#endif
911
912
    // Estimate the maximum square partition block size, which will be used
913
    // as the starting block size for partitioning the sb
914
244k
    set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
915
916
    // The superblock can be searched only once, or twice consecutively for
917
    // better quality. Note that the meaning of passes here is different from
918
    // the general concept of 1-pass/2-pass encoders.
919
244k
    const int num_passes =
920
244k
        cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
921
922
244k
    if (cpi->oxcf.sb_qp_sweep &&
923
0
        !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
924
0
          cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
925
0
        cm->delta_q_info.delta_q_present_flag) {
926
0
      AOM_CHECK_MEM_ERROR(
927
0
          x->e_mbd.error_info, td->mb.sb_stats_cache,
928
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
929
0
      av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
930
0
                          mi_col);
931
0
      assert(x->rdmult_delta_qindex == x->delta_qindex);
932
933
0
      const int best_qp_diff =
934
0
          sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
935
0
                      td->mb.sb_stats_cache) -
936
0
          x->rdmult_delta_qindex;
937
938
0
      sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
939
0
                                  mi_row, mi_col, best_qp_diff);
940
941
0
      const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
942
0
      const int backup_current_qindex =
943
0
          cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
944
945
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
946
0
      av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
947
0
                           mi_col);
948
949
0
      cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
950
0
          backup_current_qindex;
951
0
      aom_free(td->mb.sb_stats_cache);
952
0
      td->mb.sb_stats_cache = NULL;
953
0
    }
954
244k
    if (num_passes == 1) {
955
#if CONFIG_PARTITION_SEARCH_ORDER
956
      if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
957
        av1_reset_part_sf(&cpi->sf.part_sf);
958
        av1_reset_sf_for_ext_part(cpi);
959
        RD_STATS this_rdc;
960
        av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
961
                                mi_col, sb_size, &this_rdc);
962
      } else {
963
        td->pc_root = av1_alloc_pc_tree_node(sb_size);
964
        if (!td->pc_root)
965
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
966
                             "Failed to allocate PC_TREE");
967
        av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
968
                              &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
969
                              NULL, SB_SINGLE_PASS, NULL);
970
      }
971
#else
972
244k
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
973
244k
      if (!td->pc_root)
974
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
975
0
                           "Failed to allocate PC_TREE");
976
244k
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
977
244k
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
978
244k
                            SB_SINGLE_PASS, NULL);
979
244k
#endif  // CONFIG_PARTITION_SEARCH_ORDER
980
244k
    } else {
981
      // First pass
982
374
      AOM_CHECK_MEM_ERROR(
983
374
          x->e_mbd.error_info, td->mb.sb_fp_stats,
984
374
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
985
374
      av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
986
374
                          mi_col);
987
374
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
988
374
      if (!td->pc_root)
989
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
990
0
                           "Failed to allocate PC_TREE");
991
374
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
992
374
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
993
374
                            SB_DRY_PASS, NULL);
994
995
      // Second pass
996
374
      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
997
374
                        mi_col, 0);
998
374
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
999
374
      av1_reset_simple_motion_tree_partition(sms_root, sb_size);
1000
1001
374
      av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
1002
374
                           mi_col);
1003
1004
374
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
1005
374
      if (!td->pc_root)
1006
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1007
0
                           "Failed to allocate PC_TREE");
1008
374
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
1009
374
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
1010
374
                            SB_WET_PASS, NULL);
1011
374
      aom_free(td->mb.sb_fp_stats);
1012
374
      td->mb.sb_fp_stats = NULL;
1013
374
    }
1014
1015
    // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
1016
244k
    sb_enc->tpl_data_count = 0;
1017
#if CONFIG_COLLECT_COMPONENT_TIMING
1018
    end_timing(cpi, rd_pick_partition_time);
1019
#endif
1020
244k
  }
1021
282k
#endif  // !CONFIG_REALTIME_ONLY
1022
1023
  // Update the inter rd model
1024
  // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
1025
282k
  if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
1026
31.5k
      cm->tiles.cols == 1 && cm->tiles.rows == 1) {
1027
21.5k
    av1_inter_mode_data_fit(tile_data, x->rdmult);
1028
21.5k
  }
1029
282k
}
1030
1031
// Check if the cost update of symbols mode, coeff and dv are tile or off.
1032
static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
1033
562k
    const AV1_COMP *const cpi) {
1034
562k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
1035
1036
562k
  return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1037
93.1k
          inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
1038
93.1k
          cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1039
562k
}
1040
1041
// When row-mt is enabled and cost update frequencies are set to off/tile,
1042
// processing of current SB can start even before processing of top-right SB
1043
// is finished. This function checks if it is sufficient to wait for top SB
1044
// to finish processing before current SB starts processing.
1045
796k
static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
1046
796k
  const MODE mode = cpi->oxcf.mode;
1047
796k
  if (mode == GOOD) return 0;
1048
1049
562k
  if (mode == ALLINTRA)
1050
465k
    return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
1051
97.8k
  else if (mode == REALTIME)
1052
97.7k
    return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
1053
0
            cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
1054
19
  else
1055
19
    return 0;
1056
562k
}
1057
1058
/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1059
 *
1060
 * \ingroup partition_search
1061
 * \callgraph
1062
 * \callergraph
1063
 */
1064
static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1065
27.1k
                                         int mi_col) {
1066
27.1k
  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1067
1068
27.1k
  const AV1_COMMON *const cm = &cpi->common;
1069
27.1k
  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1070
27.1k
                                   ? (cm->seq_params->mib_size >> 1)
1071
27.1k
                                   : cm->seq_params->mib_size;
1072
27.1k
  const int num_blk_64x64_cols =
1073
27.1k
      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1074
27.1k
  const int num_blk_64x64_rows =
1075
27.1k
      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1076
27.1k
  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1077
27.1k
  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1078
27.1k
  uint64_t curr_sb_sad = UINT64_MAX;
1079
  // Avoid the border as sad_blk_64x64 may not be set for the border
1080
  // in the scene detection.
1081
27.1k
  if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1082
22.3k
      (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1083
22.3k
    return curr_sb_sad;
1084
22.3k
  }
1085
4.81k
  const uint64_t *const src_sad_blk_64x64_data =
1086
4.81k
      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1087
4.81k
                              blk_64x64_row_index * num_blk_64x64_cols];
1088
4.81k
  if (cm->seq_params->sb_size == BLOCK_128X128) {
1089
    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1090
    // superblock
1091
0
    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1092
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
1093
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1094
4.81k
  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1095
4.80k
    curr_sb_sad = src_sad_blk_64x64_data[0];
1096
4.80k
  }
1097
4.81k
  return curr_sb_sad;
1098
27.1k
}
1099
1100
/*!\brief Determine whether grading content can be skipped based on sad stat
1101
 *
1102
 * \ingroup partition_search
1103
 * \callgraph
1104
 * \callergraph
1105
 */
1106
static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1107
                                              MACROBLOCK *const x, int mi_row,
1108
27.1k
                                              int mi_col) {
1109
27.1k
  if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1110
0
    return true;
1111
27.1k
  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1112
27.1k
  if (curr_sb_sad == UINT64_MAX) return true;
1113
4.80k
  if (curr_sb_sad == 0) {
1114
0
    x->content_state_sb.source_sad_nonrd = kZeroSad;
1115
0
    return false;
1116
0
  }
1117
4.80k
  AV1_COMMON *const cm = &cpi->common;
1118
4.80k
  bool do_calc_src_content = true;
1119
1120
4.80k
  if (cpi->oxcf.speed < 9) return do_calc_src_content;
1121
1122
  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1123
1.26k
  if (AOMMIN(cm->width, cm->height) < 360) {
1124
    // Derive Average 64x64 block source SAD from SB source SAD
1125
1.25k
    const uint64_t avg_64x64_blk_sad =
1126
1.25k
        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1127
1.25k
                                                   : curr_sb_sad;
1128
1129
    // The threshold is determined based on kLowSad and kHighSad threshold and
1130
    // test results.
1131
1.25k
    uint64_t thresh_low = 15000;
1132
1.25k
    uint64_t thresh_high = 40000;
1133
1134
1.25k
    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1135
0
      thresh_low = thresh_low << 1;
1136
0
      thresh_high = thresh_high << 1;
1137
0
    }
1138
1139
1.26k
    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1140
0
      do_calc_src_content = false;
1141
      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1142
      // to RTC rd path.
1143
0
      x->content_state_sb.source_sad_nonrd = kMedSad;
1144
0
    }
1145
1.25k
  }
1146
1147
1.26k
  return do_calc_src_content;
1148
4.80k
}
1149
1150
/*!\brief Determine whether grading content is needed based on sf and frame stat
1151
 *
1152
 * \ingroup partition_search
1153
 * \callgraph
1154
 * \callergraph
1155
 */
1156
// TODO(any): consolidate sfs to make interface cleaner
1157
static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1158
                                           TileDataEnc *tile_data, int mi_row,
1159
432k
                                           int mi_col) {
1160
432k
  AV1_COMMON *const cm = &cpi->common;
1161
432k
  if (cm->current_frame.frame_type == KEY_FRAME ||
1162
68.8k
      (cpi->ppi->use_svc &&
1163
363k
       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1164
363k
    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1165
363k
    assert(x->content_state_sb.source_sad_rd == kMedSad);
1166
363k
    return;
1167
363k
  }
1168
432k
  bool calc_src_content = false;
1169
1170
68.8k
  if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1171
27.2k
    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1172
27.1k
      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1173
27.1k
    } else {
1174
101
      x->content_state_sb.source_sad_nonrd = kZeroSad;
1175
101
    }
1176
41.5k
  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1177
0
             (cm->width * cm->height <= 352 * 288)) {
1178
0
    if (cpi->rc.frame_source_sad > 0)
1179
0
      calc_src_content = true;
1180
0
    else
1181
0
      x->content_state_sb.source_sad_rd = kZeroSad;
1182
0
  }
1183
68.8k
  if (calc_src_content)
1184
27.1k
    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1185
68.8k
}
1186
1187
/*!\brief Encode a superblock row by breaking it into superblocks
1188
 *
1189
 * \ingroup partition_search
1190
 * \callgraph
1191
 * \callergraph
1192
 * Do partition and mode search for an sb row: one row of superblocks filling up
1193
 * the width of the current tile.
1194
 */
1195
static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1196
                                 TileDataEnc *tile_data, int mi_row,
1197
363k
                                 TokenExtra **tp) {
1198
363k
  AV1_COMMON *const cm = &cpi->common;
1199
363k
  const TileInfo *const tile_info = &tile_data->tile_info;
1200
363k
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1201
363k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1202
363k
  AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1203
363k
  bool row_mt_enabled = mt_info->row_mt_enabled;
1204
363k
  MACROBLOCK *const x = &td->mb;
1205
363k
  MACROBLOCKD *const xd = &x->e_mbd;
1206
363k
  const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1207
363k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1208
363k
  const int mib_size = cm->seq_params->mib_size;
1209
363k
  const int mib_size_log2 = cm->seq_params->mib_size_log2;
1210
363k
  const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1211
363k
  const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1212
1213
#if CONFIG_COLLECT_COMPONENT_TIMING
1214
  start_timing(cpi, encode_sb_row_time);
1215
#endif
1216
1217
  // Initialize the left context for the new SB row
1218
363k
  av1_zero_left_context(xd);
1219
1220
  // Reset delta for quantizer and loof filters at the beginning of every tile
1221
363k
  if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1222
355k
    if (cm->delta_q_info.delta_q_present_flag)
1223
1.82k
      xd->current_base_qindex = cm->quant_params.base_qindex;
1224
355k
    if (cm->delta_q_info.delta_lf_present_flag) {
1225
0
      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1226
0
    }
1227
355k
  }
1228
1229
363k
  reset_thresh_freq_fact(x);
1230
1231
  // Code each SB in the row
1232
363k
  for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1233
795k
       mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1234
    // In realtime/allintra mode and when frequency of cost updates is off/tile,
1235
    // wait for the top superblock to finish encoding. Otherwise, wait for the
1236
    // top-right superblock to finish encoding.
1237
432k
    enc_row_mt->sync_read_ptr(
1238
432k
        row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1239
1240
432k
#if CONFIG_MULTITHREAD
1241
432k
    if (row_mt_enabled) {
1242
338k
      pthread_mutex_lock(enc_row_mt->mutex_);
1243
338k
      const bool row_mt_exit = enc_row_mt->row_mt_exit;
1244
338k
      pthread_mutex_unlock(enc_row_mt->mutex_);
1245
      // Exit in case any worker has encountered an error.
1246
338k
      if (row_mt_exit) return;
1247
338k
    }
1248
432k
#endif
1249
1250
432k
    const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1251
432k
    if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1252
53.7k
      if ((tile_info->mi_col_start == mi_col)) {
1253
        // restore frame context at the 1st column sb
1254
42.9k
        *xd->tile_ctx = *x->row_ctx;
1255
42.9k
      } else {
1256
        // update context
1257
10.8k
        int wt_left = AVG_CDF_WEIGHT_LEFT;
1258
10.8k
        int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1259
10.8k
        if (tile_info->mi_col_end > (mi_col + mib_size))
1260
6.20k
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1261
6.20k
                              wt_left, wt_tr);
1262
4.62k
        else
1263
4.62k
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1264
4.62k
                              wt_left, wt_tr);
1265
10.8k
      }
1266
53.7k
    }
1267
1268
    // Update the rate cost tables for some symbols
1269
432k
    av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1270
1271
    // Reset color coding related parameters
1272
432k
    av1_zero(x->color_sensitivity_sb);
1273
432k
    av1_zero(x->color_sensitivity_sb_g);
1274
432k
    av1_zero(x->color_sensitivity_sb_alt);
1275
432k
    av1_zero(x->color_sensitivity);
1276
432k
    x->content_state_sb.source_sad_nonrd = kMedSad;
1277
432k
    x->content_state_sb.source_sad_rd = kMedSad;
1278
432k
    x->content_state_sb.lighting_change = 0;
1279
432k
    x->content_state_sb.low_sumdiff = 0;
1280
432k
    x->force_zeromv_skip_for_sb = 0;
1281
432k
    x->sb_me_block = 0;
1282
432k
    x->sb_me_partition = 0;
1283
432k
    x->sb_me_mv.as_int = 0;
1284
432k
    x->sb_col_scroll = 0;
1285
432k
    x->sb_row_scroll = 0;
1286
432k
    x->sb_force_fixed_part = 1;
1287
432k
    x->color_palette_thresh = 64;
1288
432k
    x->force_color_check_block_level = 0;
1289
432k
    x->nonrd_prune_ref_frame_search =
1290
432k
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1291
1292
432k
    if (cpi->oxcf.mode == ALLINTRA) {
1293
276k
      x->intra_sb_rdmult_modifier = 128;
1294
276k
    }
1295
1296
432k
    xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1297
432k
    x->source_variance = UINT_MAX;
1298
432k
    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1299
1300
    // Get segment id and skip flag
1301
432k
    const struct segmentation *const seg = &cm->seg;
1302
432k
    int seg_skip = 0;
1303
432k
    if (seg->enabled) {
1304
0
      const uint8_t *const map =
1305
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1306
0
      const uint8_t segment_id =
1307
0
          map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1308
0
              : 0;
1309
0
      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1310
0
    }
1311
1312
432k
    produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1313
1314
432k
    init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1315
432k
                                        sb_size);
1316
1317
    // Grade the temporal variation of the sb, the grade will be used to decide
1318
    // fast mode search strategy for coding blocks
1319
432k
    if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1320
1321
    // encode the superblock
1322
432k
    if (use_nonrd_mode) {
1323
149k
      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1324
283k
    } else {
1325
283k
      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1326
283k
    }
1327
1328
    // Update the top-right context in row_mt coding
1329
432k
    if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1330
53.7k
      if (sb_cols_in_tile == 1)
1331
38.2k
        x->row_ctx[0] = *xd->tile_ctx;
1332
15.4k
      else if (sb_col_in_tile >= 1)
1333
10.8k
        x->row_ctx[sb_col_in_tile - 1] = *xd->tile_ctx;
1334
53.7k
    }
1335
432k
    enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1336
432k
                               sb_cols_in_tile);
1337
432k
  }
1338
1339
#if CONFIG_COLLECT_COMPONENT_TIMING
1340
  end_timing(cpi, encode_sb_row_time);
1341
#endif
1342
363k
}
1343
1344
117k
static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1345
117k
  AV1_COMMON *const cm = &cpi->common;
1346
117k
  const int num_planes = av1_num_planes(cm);
1347
117k
  MACROBLOCK *const x = &cpi->td.mb;
1348
117k
  MACROBLOCKD *const xd = &x->e_mbd;
1349
1350
  // Copy data over into macro block data structures.
1351
117k
  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1352
117k
                       cm->seq_params->sb_size);
1353
1354
117k
  av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1355
117k
                         cm->seq_params->subsampling_y, num_planes);
1356
117k
}
1357
1358
83.4k
void av1_alloc_tile_data(AV1_COMP *cpi) {
1359
83.4k
  AV1_COMMON *const cm = &cpi->common;
1360
83.4k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1361
83.4k
  const int tile_cols = cm->tiles.cols;
1362
83.4k
  const int tile_rows = cm->tiles.rows;
1363
1364
83.4k
  av1_row_mt_mem_dealloc(cpi);
1365
1366
83.4k
  aom_free(cpi->tile_data);
1367
83.4k
  cpi->allocated_tiles = 0;
1368
83.4k
  enc_row_mt->allocated_tile_cols = 0;
1369
83.4k
  enc_row_mt->allocated_tile_rows = 0;
1370
1371
83.4k
  CHECK_MEM_ERROR(
1372
83.4k
      cm, cpi->tile_data,
1373
83.4k
      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1374
1375
83.4k
  cpi->allocated_tiles = tile_cols * tile_rows;
1376
83.4k
  enc_row_mt->allocated_tile_cols = tile_cols;
1377
83.4k
  enc_row_mt->allocated_tile_rows = tile_rows;
1378
219k
  for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1379
376k
    for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1380
240k
      const int tile_index = tile_row * tile_cols + tile_col;
1381
240k
      TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1382
240k
      av1_zero(this_tile->row_mt_sync);
1383
240k
      this_tile->row_ctx = NULL;
1384
240k
    }
1385
136k
  }
1386
83.4k
}
1387
1388
148k
void av1_init_tile_data(AV1_COMP *cpi) {
1389
148k
  AV1_COMMON *const cm = &cpi->common;
1390
148k
  const int num_planes = av1_num_planes(cm);
1391
148k
  const int tile_cols = cm->tiles.cols;
1392
148k
  const int tile_rows = cm->tiles.rows;
1393
148k
  int tile_col, tile_row;
1394
148k
  TokenInfo *const token_info = &cpi->token_info;
1395
148k
  TokenExtra *pre_tok = token_info->tile_tok[0][0];
1396
148k
  TokenList *tplist = token_info->tplist[0][0];
1397
148k
  unsigned int tile_tok = 0;
1398
148k
  int tplist_count = 0;
1399
1400
148k
  if (!is_stat_generation_stage(cpi) &&
1401
117k
      cm->features.allow_screen_content_tools) {
1402
    // Number of tokens for which token info needs to be allocated.
1403
0
    unsigned int tokens_required =
1404
0
        get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1405
0
                        MAX_SB_SIZE_LOG2, num_planes);
1406
    // Allocate/reallocate memory for token related info if the number of tokens
1407
    // required is more than the number of tokens already allocated. This could
1408
    // occur in case of the following:
1409
    // 1) If the memory is not yet allocated
1410
    // 2) If the frame dimensions have changed
1411
0
    const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1412
0
    if (realloc_tokens) {
1413
0
      free_token_info(token_info);
1414
0
      alloc_token_info(cm, token_info, tokens_required);
1415
0
      pre_tok = token_info->tile_tok[0][0];
1416
0
      tplist = token_info->tplist[0][0];
1417
0
    }
1418
0
  }
1419
1420
372k
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1421
588k
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1422
364k
      TileDataEnc *const tile_data =
1423
364k
          &cpi->tile_data[tile_row * tile_cols + tile_col];
1424
364k
      TileInfo *const tile_info = &tile_data->tile_info;
1425
364k
      av1_tile_init(tile_info, cm, tile_row, tile_col);
1426
364k
      tile_data->firstpass_top_mv = kZeroMv;
1427
364k
      tile_data->abs_sum_level = 0;
1428
1429
364k
      if (is_token_info_allocated(token_info)) {
1430
0
        token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1431
0
        pre_tok = token_info->tile_tok[tile_row][tile_col];
1432
0
        tile_tok = allocated_tokens(
1433
0
            tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1434
0
            num_planes);
1435
0
        token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1436
0
        tplist = token_info->tplist[tile_row][tile_col];
1437
0
        tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1438
0
      }
1439
364k
      tile_data->allow_update_cdf = !cm->tiles.large_scale;
1440
364k
      tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1441
364k
                                    !cm->features.disable_cdf_update &&
1442
364k
                                    !delay_wait_for_top_right_sb(cpi);
1443
364k
      tile_data->tctx = *cm->fc;
1444
364k
    }
1445
224k
  }
1446
148k
}
1447
1448
// Populate the start palette token info prior to encoding an SB row.
1449
static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1450
                                   int tile_row, int tile_col, int mi_row,
1451
363k
                                   TokenExtra **tp) {
1452
363k
  const TokenInfo *token_info = &cpi->token_info;
1453
363k
  if (!is_token_info_allocated(token_info)) return;
1454
1455
128
  const AV1_COMMON *cm = &cpi->common;
1456
128
  const int num_planes = av1_num_planes(cm);
1457
128
  TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1458
128
  const int sb_row_in_tile =
1459
128
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1460
1461
128
  get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1462
128
                cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1463
128
  assert(tplist != NULL);
1464
128
  tplist[sb_row_in_tile].start = *tp;
1465
128
}
1466
1467
// Populate the token count after encoding an SB row.
1468
static inline void populate_token_count(AV1_COMP *cpi,
1469
                                        const TileInfo *tile_info, int tile_row,
1470
                                        int tile_col, int mi_row,
1471
363k
                                        TokenExtra *tok) {
1472
363k
  const TokenInfo *token_info = &cpi->token_info;
1473
363k
  if (!is_token_info_allocated(token_info)) return;
1474
1475
30
  const AV1_COMMON *cm = &cpi->common;
1476
30
  const int num_planes = av1_num_planes(cm);
1477
30
  TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1478
30
  const int sb_row_in_tile =
1479
30
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1480
30
  const int tile_mb_cols =
1481
30
      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1482
30
  const int num_mb_rows_in_sb =
1483
30
      ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1484
30
  tplist[sb_row_in_tile].count =
1485
30
      (unsigned int)(tok - tplist[sb_row_in_tile].start);
1486
1487
30
  assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1488
30
         get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1489
30
                         cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1490
30
                         num_planes));
1491
1492
30
  (void)num_planes;
1493
30
  (void)tile_mb_cols;
1494
30
  (void)num_mb_rows_in_sb;
1495
30
}
1496
1497
/*!\brief Encode a superblock row
1498
 *
1499
 * \ingroup partition_search
1500
 */
1501
void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1502
363k
                       int tile_col, int mi_row) {
1503
363k
  AV1_COMMON *const cm = &cpi->common;
1504
363k
  const int tile_cols = cm->tiles.cols;
1505
363k
  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1506
363k
  const TileInfo *const tile_info = &this_tile->tile_info;
1507
363k
  TokenExtra *tok = NULL;
1508
1509
363k
  get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1510
1511
363k
  encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1512
1513
363k
  populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1514
363k
}
1515
1516
/*!\brief Encode a tile
1517
 *
1518
 * \ingroup partition_search
1519
 */
1520
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1521
64.3k
                     int tile_col) {
1522
64.3k
  AV1_COMMON *const cm = &cpi->common;
1523
64.3k
  TileDataEnc *const this_tile =
1524
64.3k
      &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1525
64.3k
  const TileInfo *const tile_info = &this_tile->tile_info;
1526
1527
64.3k
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1528
1529
64.3k
  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1530
64.3k
                         tile_info->mi_col_end, tile_row);
1531
64.3k
  av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1532
64.3k
                         &td->mb.e_mbd);
1533
1534
64.3k
#if !CONFIG_REALTIME_ONLY
1535
64.3k
  if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1536
64.3k
    cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1537
64.3k
#endif
1538
1539
64.3k
  if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1540
22.6k
    av1_crc32c_calculator_init(
1541
22.6k
        &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1542
22.6k
  }
1543
1544
136k
  for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1545
72.4k
       mi_row += cm->seq_params->mib_size) {
1546
72.4k
    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1547
72.4k
  }
1548
64.3k
  this_tile->abs_sum_level = td->abs_sum_level;
1549
64.3k
}
1550
1551
/*!\brief Break one frame into tiles and encode the tiles
1552
 *
1553
 * \ingroup partition_search
1554
 *
1555
 * \param[in]    cpi    Top-level encoder structure
1556
 */
1557
43.9k
static inline void encode_tiles(AV1_COMP *cpi) {
1558
43.9k
  AV1_COMMON *const cm = &cpi->common;
1559
43.9k
  const int tile_cols = cm->tiles.cols;
1560
43.9k
  const int tile_rows = cm->tiles.rows;
1561
43.9k
  int tile_col, tile_row;
1562
1563
43.9k
  MACROBLOCK *const mb = &cpi->td.mb;
1564
43.9k
  assert(IMPLIES(cpi->tile_data == NULL,
1565
43.9k
                 cpi->allocated_tiles < tile_cols * tile_rows));
1566
43.9k
  if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1567
1568
43.9k
  av1_init_tile_data(cpi);
1569
43.9k
  av1_alloc_mb_data(cpi, mb);
1570
1571
94.9k
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1572
115k
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1573
64.3k
      TileDataEnc *const this_tile =
1574
64.3k
          &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1575
64.3k
      cpi->td.intrabc_used = 0;
1576
64.3k
      cpi->td.deltaq_used = 0;
1577
64.3k
      cpi->td.abs_sum_level = 0;
1578
64.3k
      cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1579
64.3k
      cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1580
64.3k
      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1581
64.3k
      cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1582
64.3k
      av1_init_rtc_counters(&cpi->td.mb);
1583
64.3k
      cpi->td.mb.palette_pixels = 0;
1584
64.3k
      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1585
64.3k
      if (!frame_is_intra_only(&cpi->common))
1586
13.1k
        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1587
64.3k
      cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1588
64.3k
      cpi->intrabc_used |= cpi->td.intrabc_used;
1589
64.3k
      cpi->deltaq_used |= cpi->td.deltaq_used;
1590
64.3k
    }
1591
50.9k
  }
1592
1593
43.9k
  av1_dealloc_mb_data(mb, av1_num_planes(cm));
1594
43.9k
}
1595
1596
// Set the relative distance of a reference frame w.r.t. current frame
1597
static inline void set_rel_frame_dist(
1598
    const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1599
117k
    const int ref_frame_flags) {
1600
117k
  MV_REFERENCE_FRAME ref_frame;
1601
117k
  int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1602
117k
  ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1603
117k
  ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1604
943k
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1605
825k
    ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1606
825k
    if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1607
137k
      int dist = av1_encoder_get_relative_dist(
1608
137k
          cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1609
137k
          cm->current_frame.display_order_hint);
1610
137k
      ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1611
      // Get the nearest ref_frame in the past
1612
137k
      if (abs(dist) < min_past_dist && dist < 0) {
1613
32.3k
        ref_frame_dist_info->nearest_past_ref = ref_frame;
1614
32.3k
        min_past_dist = abs(dist);
1615
32.3k
      }
1616
      // Get the nearest ref_frame in the future
1617
137k
      if (dist < min_future_dist && dist > 0) {
1618
1.00k
        ref_frame_dist_info->nearest_future_ref = ref_frame;
1619
1.00k
        min_future_dist = dist;
1620
1.00k
      }
1621
137k
    }
1622
825k
  }
1623
117k
}
1624
1625
27.0k
static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1626
27.0k
  assert(!frame_is_intra_only(cm));
1627
1628
27.0k
  int one_sided_refs = 1;
1629
27.0k
  const int cur_display_order_hint = cm->current_frame.display_order_hint;
1630
216k
  for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1631
189k
    const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1632
189k
    if (buf == NULL) continue;
1633
189k
    if (av1_encoder_get_relative_dist(buf->display_order_hint,
1634
189k
                                      cur_display_order_hint) > 0) {
1635
0
      one_sided_refs = 0;  // bwd reference
1636
0
      break;
1637
0
    }
1638
189k
  }
1639
27.0k
  return one_sided_refs;
1640
27.0k
}
1641
1642
static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1643
10.3k
                                             int ref_order_hint[2]) {
1644
10.3k
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1645
10.3k
  ref_order_hint[0] = ref_order_hint[1] = 0;
1646
10.3k
  if (!skip_mode_info->skip_mode_allowed) return;
1647
1648
10.3k
  const RefCntBuffer *const buf_0 =
1649
10.3k
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1650
10.3k
  const RefCntBuffer *const buf_1 =
1651
10.3k
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1652
10.3k
  assert(buf_0 != NULL && buf_1 != NULL);
1653
1654
10.3k
  ref_order_hint[0] = buf_0->order_hint;
1655
10.3k
  ref_order_hint[1] = buf_1->order_hint;
1656
10.3k
}
1657
1658
117k
static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1659
117k
  AV1_COMMON *const cm = &cpi->common;
1660
1661
117k
  av1_setup_skip_mode_allowed(cm);
1662
117k
  if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1663
1664
  // Turn off skip mode if the temporal distances of the reference pair to the
1665
  // current frame are different by more than 1 frame.
1666
10.3k
  const int cur_offset = (int)cm->current_frame.order_hint;
1667
10.3k
  int ref_offset[2];
1668
10.3k
  get_skip_mode_ref_offsets(cm, ref_offset);
1669
10.3k
  const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1670
10.3k
                                            cur_offset, ref_offset[0]);
1671
10.3k
  const int cur_to_ref1 = abs(get_relative_dist(
1672
10.3k
      &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1673
10.3k
  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1674
1675
  // High Latency: Turn off skip mode if all refs are fwd.
1676
9.34k
  if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1677
1678
6.23k
  const int ref_frame[2] = {
1679
6.23k
    cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1680
6.23k
    cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1681
6.23k
  };
1682
6.23k
  if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1683
6.23k
      !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1684
1.83k
    return 0;
1685
1686
4.40k
  return 1;
1687
6.23k
}
1688
1689
static inline void set_default_interp_skip_flags(
1690
117k
    const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1691
117k
  const int num_planes = av1_num_planes(cm);
1692
117k
  interp_search_flags->default_interp_skip_flags =
1693
117k
      (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1694
117k
                        : INTERP_SKIP_LUMA_SKIP_CHROMA;
1695
117k
}
1696
1697
117k
static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1698
117k
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1699
117k
       cpi->sf.inter_sf.disable_onesided_comp) &&
1700
42.1k
      cpi->all_one_sided_refs) {
1701
    // Disable all compound references
1702
16.8k
    cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1703
101k
  } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1704
62.3k
             cpi->sf.inter_sf.selective_ref_frame >= 2) {
1705
25.2k
    AV1_COMMON *const cm = &cpi->common;
1706
25.2k
    const int cur_frame_display_order_hint =
1707
25.2k
        cm->current_frame.display_order_hint;
1708
25.2k
    unsigned int *ref_display_order_hint =
1709
25.2k
        cm->cur_frame->ref_display_order_hint;
1710
25.2k
    const int arf2_dist = av1_encoder_get_relative_dist(
1711
25.2k
        ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1712
25.2k
        cur_frame_display_order_hint);
1713
25.2k
    const int bwd_dist = av1_encoder_get_relative_dist(
1714
25.2k
        ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1715
25.2k
        cur_frame_display_order_hint);
1716
1717
556k
    for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1718
530k
      MV_REFERENCE_FRAME rf[2];
1719
530k
      av1_set_ref_frame(rf, ref_idx);
1720
530k
      if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1721
530k
          !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1722
530k
        continue;
1723
530k
      }
1724
1725
0
      if (!cpi->all_one_sided_refs) {
1726
0
        int ref_dist[2];
1727
0
        for (int i = 0; i < 2; ++i) {
1728
0
          ref_dist[i] = av1_encoder_get_relative_dist(
1729
0
              ref_display_order_hint[rf[i] - LAST_FRAME],
1730
0
              cur_frame_display_order_hint);
1731
0
        }
1732
1733
        // One-sided compound is used only when all reference frames are
1734
        // one-sided.
1735
0
        if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1736
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1737
0
        }
1738
0
      }
1739
1740
0
      if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1741
0
          (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1742
0
          (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1743
        // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1744
0
        if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1745
          // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1746
          // reference to the current frame than ALTREF2_FRAME
1747
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1748
0
        }
1749
0
      }
1750
0
    }
1751
25.2k
  }
1752
117k
}
1753
1754
100k
static int allow_deltaq_mode(AV1_COMP *cpi) {
1755
100k
#if !CONFIG_REALTIME_ONLY
1756
100k
  AV1_COMMON *const cm = &cpi->common;
1757
100k
  BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1758
100k
  int sbs_wide = mi_size_wide[sb_size];
1759
100k
  int sbs_high = mi_size_high[sb_size];
1760
1761
100k
  int64_t delta_rdcost = 0;
1762
291k
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1763
579k
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1764
387k
      int64_t this_delta_rdcost = 0;
1765
387k
      av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1766
387k
                                     mi_row, mi_col);
1767
387k
      delta_rdcost += this_delta_rdcost;
1768
387k
    }
1769
191k
  }
1770
100k
  return delta_rdcost < 0;
1771
#else
1772
  (void)cpi;
1773
  return 1;
1774
#endif  // !CONFIG_REALTIME_ONLY
1775
100k
}
1776
1777
0
#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1778
#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1779
1780
// Populates block level thresholds for force zeromv-skip decision
1781
117k
static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1782
117k
  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1783
1784
  // Threshold for forcing zeromv-skip decision is as below:
1785
  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1786
  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1787
  // allowing slightly higher error for smaller blocks.
1788
  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1789
  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1790
  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1791
  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1792
  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1793
  // small blocks, the same is clipped to 4.
1794
0
  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1795
0
  const int num_128x128_pix =
1796
0
      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1797
1798
0
  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1799
0
    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1800
1801
    // Calculate the threshold for zeromv-skip decision based on area of the
1802
    // partition
1803
0
    unsigned int thresh_exit_part_blk =
1804
0
        (unsigned int)(thresh_exit_128x128_part *
1805
0
                           sqrt((double)num_block_pix / num_128x128_pix) +
1806
0
                       0.5);
1807
0
    thresh_exit_part_blk = AOMMIN(
1808
0
        thresh_exit_part_blk,
1809
0
        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1810
0
    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1811
0
  }
1812
0
}
1813
1814
0
static void free_block_hash_buffers(uint32_t *block_hash_values[2]) {
1815
0
  for (int j = 0; j < 2; ++j) {
1816
0
    aom_free(block_hash_values[j]);
1817
0
  }
1818
0
}
1819
1820
/*!\brief Determines delta_q_res value for Variance Boost modulation.
1821
 */
1822
0
static int aom_get_variance_boost_delta_q_res(int qindex) {
1823
  // Signaling delta_q changes across superblocks comes with inherent syntax
1824
  // element overhead, which adds up to total payload size. This overhead
1825
  // becomes proportionally bigger the higher the base qindex (i.e. lower
1826
  // quality, smaller file size), so a balance needs to be struck.
1827
  // - Smaller delta_q_res: more granular delta_q control, more bits spent
1828
  // signaling deltas.
1829
  // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1830
  // deltas.
1831
  //
1832
  // At the same time, SB qindex fluctuations become larger the higher
1833
  // the base qindex (between lowest and highest-variance regions):
1834
  // - For QP 5: up to 8 qindexes
1835
  // - For QP 60: up to 52 qindexes
1836
  //
1837
  // With these factors in mind, it was found that the best strategy that
1838
  // maximizes quality per bitrate is by having very finely-grained delta_q
1839
  // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1840
  // and progressively making them coarser as base qindex increases (to reduce
1841
  // total signaling overhead).
1842
0
  int delta_q_res = 1;
1843
1844
0
  if (qindex >= 160) {
1845
0
    delta_q_res = 8;
1846
0
  } else if (qindex >= 120) {
1847
0
    delta_q_res = 4;
1848
0
  } else if (qindex >= 80) {
1849
0
    delta_q_res = 2;
1850
0
  } else {
1851
0
    delta_q_res = 1;
1852
0
  }
1853
1854
0
  return delta_q_res;
1855
0
}
1856
1857
#if !CONFIG_REALTIME_ONLY
1858
0
static float get_thresh_based_on_q(int qindex, int speed) {
1859
0
  const float min_threshold_arr[2] = { 0.06f, 0.09f };
1860
0
  const float max_threshold_arr[2] = { 0.10f, 0.13f };
1861
1862
0
  const float min_thresh = min_threshold_arr[speed >= 3];
1863
0
  const float max_thresh = max_threshold_arr[speed >= 3];
1864
0
  const float thresh = min_thresh + (max_thresh - min_thresh) *
1865
0
                                        ((float)MAXQ - (float)qindex) /
1866
0
                                        (float)(MAXQ - MINQ);
1867
0
  return thresh;
1868
0
}
1869
1870
0
static int get_mv_err(MV cur_mv, MV ref_mv) {
1871
0
  const MV diff = { cur_mv.row - ref_mv.row, cur_mv.col - ref_mv.col };
1872
0
  const MV abs_diff = { abs(diff.row), abs(diff.col) };
1873
0
  const int mv_err = (abs_diff.row + abs_diff.col);
1874
0
  return mv_err;
1875
0
}
1876
1877
0
static void check_mv_err_and_update(MV cur_mv, MV ref_mv, int *best_mv_err) {
1878
0
  const int mv_err = get_mv_err(cur_mv, ref_mv);
1879
0
  *best_mv_err = AOMMIN(mv_err, *best_mv_err);
1880
0
}
1881
1882
static int is_inside_frame_border(int mi_row, int mi_col, int row_offset,
1883
                                  int col_offset, int num_mi_rows,
1884
0
                                  int num_mi_cols) {
1885
0
  if (mi_row + row_offset < 0 || mi_row + row_offset >= num_mi_rows ||
1886
0
      mi_col + col_offset < 0 || mi_col + col_offset >= num_mi_cols)
1887
0
    return 0;
1888
1889
0
  return 1;
1890
0
}
1891
1892
// Compute the minimum MV error between current MV and spatial MV predictors.
1893
static int get_spatial_mvpred_err(AV1_COMMON *cm, TplParams *const tpl_data,
1894
                                  int tpl_idx, int mi_row, int mi_col,
1895
                                  int ref_idx, int_mv cur_mv, int allow_hp,
1896
0
                                  int is_integer) {
1897
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
1898
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
1899
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
1900
1901
0
  int mv_err = INT32_MAX;
1902
0
  const int step = 1 << block_mis_log2;
1903
0
  const int mv_pred_pos_in_mis[6][2] = {
1904
0
    { -step, 0 },     { 0, -step },     { -step, step },
1905
0
    { -step, -step }, { -2 * step, 0 }, { 0, -2 * step },
1906
0
  };
1907
1908
0
  for (int i = 0; i < 6; i++) {
1909
0
    int row_offset = mv_pred_pos_in_mis[i][0];
1910
0
    int col_offset = mv_pred_pos_in_mis[i][1];
1911
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
1912
0
                                tpl_frame->mi_rows, tpl_frame->mi_cols)) {
1913
0
      continue;
1914
0
    }
1915
1916
0
    const TplDepStats *tpl_stats =
1917
0
        &tpl_ptr[av1_tpl_ptr_pos(mi_row + row_offset, mi_col + col_offset,
1918
0
                                 tpl_frame->stride, block_mis_log2)];
1919
0
    int_mv this_refmv = tpl_stats->mv[ref_idx];
1920
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
1921
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
1922
0
  }
1923
1924
  // Check MV error w.r.t. Global MV / Zero MV
1925
0
  int_mv gm_mv = { 0 };
1926
0
  if (cm->global_motion[ref_idx + LAST_FRAME].wmtype > TRANSLATION) {
1927
0
    const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
1928
0
    gm_mv = gm_get_motion_vector(&cm->global_motion[ref_idx + LAST_FRAME],
1929
0
                                 allow_hp, bsize, mi_col, mi_row, is_integer);
1930
0
  }
1931
0
  check_mv_err_and_update(cur_mv.as_mv, gm_mv.as_mv, &mv_err);
1932
1933
0
  return mv_err;
1934
0
}
1935
1936
// Compute the minimum MV error between current MV and temporal MV predictors.
1937
static int get_temporal_mvpred_err(AV1_COMMON *cm, int mi_row, int mi_col,
1938
                                   int num_mi_rows, int num_mi_cols,
1939
                                   int ref_idx, int_mv cur_mv, int allow_hp,
1940
0
                                   int is_integer) {
1941
0
  const RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ref_idx + LAST_FRAME);
1942
0
  if (ref_buf == NULL) return INT32_MAX;
1943
0
  int cur_to_ref_dist =
1944
0
      get_relative_dist(&cm->seq_params->order_hint_info,
1945
0
                        cm->cur_frame->order_hint, ref_buf->order_hint);
1946
1947
0
  int mv_err = INT32_MAX;
1948
0
  const int mv_pred_pos_in_mis[7][2] = {
1949
0
    { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, { 4, -2 }, { 4, 4 }, { 2, 4 },
1950
0
  };
1951
1952
0
  for (int i = 0; i < 7; i++) {
1953
0
    int row_offset = mv_pred_pos_in_mis[i][0];
1954
0
    int col_offset = mv_pred_pos_in_mis[i][1];
1955
0
    if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset,
1956
0
                                num_mi_rows, num_mi_cols)) {
1957
0
      continue;
1958
0
    }
1959
0
    const TPL_MV_REF *ref_mvs =
1960
0
        cm->tpl_mvs +
1961
0
        ((mi_row + row_offset) >> 1) * (cm->mi_params.mi_stride >> 1) +
1962
0
        ((mi_col + col_offset) >> 1);
1963
0
    if (ref_mvs->mfmv0.as_int == INVALID_MV) continue;
1964
1965
0
    int_mv this_refmv;
1966
0
    av1_get_mv_projection(&this_refmv.as_mv, ref_mvs->mfmv0.as_mv,
1967
0
                          cur_to_ref_dist, ref_mvs->ref_frame_offset);
1968
0
    lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer);
1969
0
    check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err);
1970
0
  }
1971
1972
0
  return mv_err;
1973
0
}
1974
1975
// Determine whether to disable temporal MV prediction for the current frame
1976
// based on TPL and motion field data. Temporal MV prediction is disabled if the
1977
// reduction in MV error by including temporal MVs as MV predictors is small.
1978
117k
static void check_to_disable_ref_frame_mvs(AV1_COMP *cpi) {
1979
117k
  AV1_COMMON *cm = &cpi->common;
1980
117k
  if (!cm->features.allow_ref_frame_mvs || cpi->sf.hl_sf.ref_frame_mvs_lvl != 1)
1981
117k
    return;
1982
1983
0
  const int tpl_idx = cpi->gf_frame_index;
1984
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
1985
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
1986
1987
0
  const SUBPEL_FORCE_STOP tpl_subpel_precision =
1988
0
      cpi->sf.tpl_sf.subpel_force_stop;
1989
0
  const int allow_high_precision_mv = tpl_subpel_precision == EIGHTH_PEL &&
1990
0
                                      cm->features.allow_high_precision_mv;
1991
0
  const int force_integer_mv = tpl_subpel_precision == FULL_PEL ||
1992
0
                               cm->features.cur_frame_force_integer_mv;
1993
1994
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
1995
0
  TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr;
1996
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
1997
0
  const int step = 1 << block_mis_log2;
1998
1999
0
  uint64_t accum_spatial_mvpred_err = 0;
2000
0
  uint64_t accum_best_err = 0;
2001
2002
0
  for (int mi_row = 0; mi_row < tpl_frame->mi_rows; mi_row += step) {
2003
0
    for (int mi_col = 0; mi_col < tpl_frame->mi_cols; mi_col += step) {
2004
0
      TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos(
2005
0
          mi_row, mi_col, tpl_frame->stride, block_mis_log2)];
2006
0
      const int cur_best_ref_idx = tpl_stats_ptr->ref_frame_index[0];
2007
0
      if (cur_best_ref_idx == NONE_FRAME) continue;
2008
2009
0
      int_mv cur_mv = tpl_stats_ptr->mv[cur_best_ref_idx];
2010
0
      lower_mv_precision(&cur_mv.as_mv, allow_high_precision_mv,
2011
0
                         force_integer_mv);
2012
2013
0
      const int cur_spatial_mvpred_err = get_spatial_mvpred_err(
2014
0
          cm, tpl_data, tpl_idx, mi_row, mi_col, cur_best_ref_idx, cur_mv,
2015
0
          allow_high_precision_mv, force_integer_mv);
2016
2017
0
      const int cur_temporal_mvpred_err = get_temporal_mvpred_err(
2018
0
          cm, mi_row, mi_col, tpl_frame->mi_rows, tpl_frame->mi_cols,
2019
0
          cur_best_ref_idx, cur_mv, allow_high_precision_mv, force_integer_mv);
2020
2021
0
      const int cur_best_err =
2022
0
          AOMMIN(cur_spatial_mvpred_err, cur_temporal_mvpred_err);
2023
0
      accum_spatial_mvpred_err += cur_spatial_mvpred_err;
2024
0
      accum_best_err += cur_best_err;
2025
0
    }
2026
0
  }
2027
2028
0
  const float threshold =
2029
0
      get_thresh_based_on_q(cm->quant_params.base_qindex, cpi->oxcf.speed);
2030
0
  const float mv_err_reduction =
2031
0
      (float)(accum_spatial_mvpred_err - accum_best_err);
2032
2033
0
  if (mv_err_reduction <= threshold * accum_spatial_mvpred_err)
2034
0
    cm->features.allow_ref_frame_mvs = 0;
2035
0
}
2036
#endif  // !CONFIG_REALTIME_ONLY
2037
2038
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
2039
 * for a single frame
2040
 *
2041
 * \ingroup high_level_algo
2042
 */
2043
117k
static inline void encode_frame_internal(AV1_COMP *cpi) {
2044
117k
  ThreadData *const td = &cpi->td;
2045
117k
  MACROBLOCK *const x = &td->mb;
2046
117k
  AV1_COMMON *const cm = &cpi->common;
2047
117k
  CommonModeInfoParams *const mi_params = &cm->mi_params;
2048
117k
  FeatureFlags *const features = &cm->features;
2049
117k
  MACROBLOCKD *const xd = &x->e_mbd;
2050
117k
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2051
#if CONFIG_FPMT_TEST
2052
  FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
2053
  FrameProbInfo *const temp_frame_probs_simulation =
2054
      &cpi->ppi->temp_frame_probs_simulation;
2055
#endif
2056
117k
  FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
2057
117k
  IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
2058
117k
  MultiThreadInfo *const mt_info = &cpi->mt_info;
2059
117k
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
2060
117k
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2061
117k
  const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
2062
117k
  int i;
2063
2064
117k
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
2065
79.2k
    mi_params->setup_mi(mi_params);
2066
79.2k
  }
2067
2068
117k
  set_mi_offsets(mi_params, xd, 0, 0);
2069
2070
117k
  av1_zero(*td->counts);
2071
117k
  av1_zero(rdc->tx_type_used);
2072
117k
  av1_zero(rdc->obmc_used);
2073
117k
  av1_zero(rdc->warped_used);
2074
117k
  av1_zero(rdc->seg_tmp_pred_cost);
2075
2076
  // Reset the flag.
2077
117k
  cpi->intrabc_used = 0;
2078
  // Need to disable intrabc when superres is selected
2079
117k
  if (av1_superres_scaled(cm)) {
2080
0
    features->allow_intrabc = 0;
2081
0
  }
2082
2083
117k
  features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
2084
2085
117k
  if (features->allow_warped_motion &&
2086
27.0k
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2087
10.1k
    const FRAME_UPDATE_TYPE update_type =
2088
10.1k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2089
10.1k
    int warped_probability =
2090
#if CONFIG_FPMT_TEST
2091
        cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
2092
            ? temp_frame_probs->warped_probs[update_type]
2093
            :
2094
#endif  // CONFIG_FPMT_TEST
2095
10.1k
            frame_probs->warped_probs[update_type];
2096
10.1k
    if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
2097
0
      features->allow_warped_motion = 0;
2098
10.1k
  }
2099
2100
117k
  int hash_table_created = 0;
2101
117k
  if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
2102
0
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
2103
    // TODO(any): move this outside of the recoding loop to avoid recalculating
2104
    // the hash table.
2105
    // add to hash table
2106
0
    const int pic_width = cpi->source->y_crop_width;
2107
0
    const int pic_height = cpi->source->y_crop_height;
2108
0
    uint32_t *block_hash_values[2] = { NULL };  // two buffers used ping-pong
2109
0
    bool error = false;
2110
2111
0
    for (int j = 0; j < 2; ++j) {
2112
0
      block_hash_values[j] = (uint32_t *)aom_malloc(
2113
0
          sizeof(*block_hash_values[j]) * pic_width * pic_height);
2114
0
      if (!block_hash_values[j]) {
2115
0
        error = true;
2116
0
        break;
2117
0
      }
2118
0
    }
2119
2120
0
    av1_hash_table_init(intrabc_hash_info);
2121
0
    if (error ||
2122
0
        !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
2123
0
      free_block_hash_buffers(block_hash_values);
2124
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2125
0
                         "Error allocating intrabc_hash_table and buffers");
2126
0
    }
2127
0
    hash_table_created = 1;
2128
0
    av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0]);
2129
    // Hash data generated for screen contents is used for intraBC ME
2130
0
    const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
2131
0
    int max_sb_size = (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
2132
2133
0
    if (cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks) {
2134
0
      max_sb_size = AOMMIN(8, max_sb_size);
2135
0
    }
2136
2137
0
    int src_idx = 0;
2138
0
    for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
2139
0
      const int dst_idx = !src_idx;
2140
0
      av1_generate_block_hash_value(intrabc_hash_info, cpi->source, size,
2141
0
                                    block_hash_values[src_idx],
2142
0
                                    block_hash_values[dst_idx]);
2143
0
      if (size >= min_alloc_size &&
2144
0
          !av1_add_to_hash_map_by_row_with_precal_data(
2145
0
              &intrabc_hash_info->intrabc_hash_table,
2146
0
              block_hash_values[dst_idx], pic_width, pic_height, size)) {
2147
0
        error = true;
2148
0
        break;
2149
0
      }
2150
0
    }
2151
2152
0
    free_block_hash_buffers(block_hash_values);
2153
2154
0
    if (error) {
2155
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
2156
0
                         "Error adding data to intrabc_hash_table");
2157
0
    }
2158
0
  }
2159
2160
117k
  const CommonQuantParams *quant_params = &cm->quant_params;
2161
1.06M
  for (i = 0; i < MAX_SEGMENTS; ++i) {
2162
943k
    const int qindex =
2163
943k
        cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
2164
943k
                        : quant_params->base_qindex;
2165
943k
    xd->lossless[i] =
2166
943k
        qindex == 0 && quant_params->y_dc_delta_q == 0 &&
2167
35.6k
        quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
2168
35.6k
        quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
2169
943k
    if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
2170
943k
    xd->qindex[i] = qindex;
2171
943k
    if (xd->lossless[i]) {
2172
35.6k
      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
2173
907k
    } else {
2174
907k
      cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
2175
907k
    }
2176
943k
  }
2177
117k
  features->coded_lossless = is_coded_lossless(cm, xd);
2178
117k
  features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
2179
2180
  // Fix delta q resolution for the moment
2181
2182
117k
  cm->delta_q_info.delta_q_res = 0;
2183
117k
  if (cpi->use_ducky_encode) {
2184
0
    cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
2185
117k
  } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ &&
2186
117k
             !cpi->roi.enabled) {
2187
117k
    if (deltaq_mode == DELTA_Q_OBJECTIVE)
2188
117k
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
2189
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
2190
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2191
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
2192
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2193
0
    else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
2194
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2195
0
    else if (deltaq_mode == DELTA_Q_HDR)
2196
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
2197
0
    else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
2198
0
      cm->delta_q_info.delta_q_res =
2199
0
          aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
2200
    // Set delta_q_present_flag before it is used for the first time
2201
117k
    cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
2202
117k
    cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
2203
2204
    // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
2205
    // is used for ineligible frames. That effectively will turn off row_mt
2206
    // usage. Note objective delta_q and tpl eligible frames are only altref
2207
    // frames currently.
2208
117k
    const GF_GROUP *gf_group = &cpi->ppi->gf_group;
2209
117k
    if (cm->delta_q_info.delta_q_present_flag) {
2210
117k
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2211
117k
          gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
2212
17.5k
        cm->delta_q_info.delta_q_present_flag = 0;
2213
2214
117k
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
2215
117k
          cm->delta_q_info.delta_q_present_flag) {
2216
100k
        cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
2217
100k
      }
2218
117k
    }
2219
2220
    // Reset delta_q_used flag
2221
117k
    cpi->deltaq_used = 0;
2222
2223
117k
    cm->delta_q_info.delta_lf_present_flag =
2224
117k
        cm->delta_q_info.delta_q_present_flag &&
2225
571
        oxcf->tool_cfg.enable_deltalf_mode;
2226
117k
    cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2227
2228
    // update delta_q_present_flag and delta_lf_present_flag based on
2229
    // base_qindex
2230
117k
    cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2231
117k
    cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2232
117k
  } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2233
0
             cpi->svc.number_temporal_layers == 1) {
2234
0
    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2235
0
    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2236
0
  }
2237
117k
  cpi->rc.cnt_zeromv = 0;
2238
2239
117k
  av1_frame_init_quantizer(cpi);
2240
117k
  init_encode_frame_mb_context(cpi);
2241
117k
  set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2242
2243
117k
  if (cm->prev_frame && cm->prev_frame->seg.enabled &&
2244
0
      cpi->svc.number_spatial_layers == 1)
2245
0
    cm->last_frame_seg_map = cm->prev_frame->seg_map;
2246
117k
  else
2247
117k
    cm->last_frame_seg_map = NULL;
2248
117k
  if (features->allow_intrabc || features->coded_lossless) {
2249
4.46k
    av1_set_default_ref_deltas(cm->lf.ref_deltas);
2250
4.46k
    av1_set_default_mode_deltas(cm->lf.mode_deltas);
2251
113k
  } else if (cm->prev_frame) {
2252
20.0k
    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2253
20.0k
    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2254
20.0k
  }
2255
117k
  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2256
117k
  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2257
2258
117k
  cpi->all_one_sided_refs =
2259
117k
      frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2260
2261
117k
  cpi->prune_ref_frame_mask = 0;
2262
  // Figure out which ref frames can be skipped at frame level.
2263
117k
  setup_prune_ref_frame_mask(cpi);
2264
2265
117k
  x->txfm_search_info.txb_split_count = 0;
2266
#if CONFIG_SPEED_STATS
2267
  x->txfm_search_info.tx_search_count = 0;
2268
#endif  // CONFIG_SPEED_STATS
2269
2270
117k
#if !CONFIG_REALTIME_ONLY
2271
#if CONFIG_COLLECT_COMPONENT_TIMING
2272
  start_timing(cpi, av1_compute_global_motion_time);
2273
#endif
2274
117k
  av1_compute_global_motion_facade(cpi);
2275
#if CONFIG_COLLECT_COMPONENT_TIMING
2276
  end_timing(cpi, av1_compute_global_motion_time);
2277
#endif
2278
117k
#endif  // !CONFIG_REALTIME_ONLY
2279
2280
#if CONFIG_COLLECT_COMPONENT_TIMING
2281
  start_timing(cpi, av1_setup_motion_field_time);
2282
#endif
2283
117k
  av1_calculate_ref_frame_side(cm);
2284
2285
117k
  features->allow_ref_frame_mvs &= !(cpi->sf.hl_sf.ref_frame_mvs_lvl == 2);
2286
117k
  if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2287
117k
#if !CONFIG_REALTIME_ONLY
2288
117k
  check_to_disable_ref_frame_mvs(cpi);
2289
117k
#endif  // !CONFIG_REALTIME_ONLY
2290
2291
#if CONFIG_COLLECT_COMPONENT_TIMING
2292
  end_timing(cpi, av1_setup_motion_field_time);
2293
#endif
2294
2295
117k
  cm->current_frame.skip_mode_info.skip_mode_flag =
2296
117k
      check_skip_mode_enabled(cpi);
2297
2298
  // Initialization of skip mode cost depends on the value of
2299
  // 'skip_mode_flag'. This initialization happens in the function
2300
  // av1_fill_mode_rates(), which is in turn called in
2301
  // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2302
  // has to be called after 'skip_mode_flag' is initialized.
2303
117k
  av1_initialize_rd_consts(cpi);
2304
117k
  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2305
117k
  populate_thresh_to_force_zeromv_skip(cpi);
2306
2307
117k
  enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2308
117k
  enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2309
117k
  mt_info->row_mt_enabled = 0;
2310
117k
  mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2311
117k
                                       cm->tiles.cols * cm->tiles.rows) > 1;
2312
2313
117k
  if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2314
74.0k
    mt_info->row_mt_enabled = 1;
2315
74.0k
    enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2316
74.0k
    enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2317
74.0k
    av1_encode_tiles_row_mt(cpi);
2318
74.0k
  } else {
2319
43.9k
    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2320
0
      av1_encode_tiles_mt(cpi);
2321
43.9k
    } else {
2322
      // Preallocate the pc_tree for realtime coding to reduce the cost of
2323
      // memory allocation.
2324
43.9k
      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2325
43.9k
      if (use_nonrd_mode) {
2326
14.0k
        td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2327
14.0k
        if (!td->pc_root)
2328
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2329
0
                             "Failed to allocate PC_TREE");
2330
29.9k
      } else {
2331
29.9k
        td->pc_root = NULL;
2332
29.9k
      }
2333
2334
43.9k
      encode_tiles(cpi);
2335
43.9k
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2336
43.9k
                                 cpi->sf.part_sf.partition_search_type);
2337
43.9k
      td->pc_root = NULL;
2338
43.9k
    }
2339
43.9k
  }
2340
2341
  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2342
117k
  if (features->allow_intrabc && !cpi->intrabc_used) {
2343
0
    features->allow_intrabc = 0;
2344
0
  }
2345
117k
  if (features->allow_intrabc) {
2346
0
    cm->delta_q_info.delta_lf_present_flag = 0;
2347
0
  }
2348
2349
117k
  if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2350
260
    cm->delta_q_info.delta_q_present_flag = 0;
2351
260
  }
2352
2353
  // Set the transform size appropriately before bitstream creation
2354
117k
  const MODE_EVAL_TYPE eval_type =
2355
117k
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2356
117k
          ? WINNER_MODE_EVAL
2357
117k
          : DEFAULT_EVAL;
2358
117k
  const TX_SIZE_SEARCH_METHOD tx_search_type =
2359
117k
      cpi->winner_mode_params.tx_size_search_methods[eval_type];
2360
117k
  assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2361
117k
  features->tx_mode = select_tx_mode(cm, tx_search_type);
2362
2363
  // Retain the frame level probability update conditions for parallel frames.
2364
  // These conditions will be consumed during postencode stage to update the
2365
  // probability.
2366
117k
  if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2367
0
    cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2368
0
        cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2369
0
    cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2370
0
        (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2371
0
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2372
0
    cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2373
0
        (features->allow_warped_motion &&
2374
0
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2375
0
    cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2376
0
        (cm->current_frame.frame_type != KEY_FRAME &&
2377
0
         cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2378
0
         features->interp_filter == SWITCHABLE);
2379
0
  }
2380
2381
117k
  if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2382
117k
      ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2383
117k
        INT_MAX) &&
2384
63.6k
       (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2385
42.6k
    const FRAME_UPDATE_TYPE update_type =
2386
42.6k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2387
852k
    for (i = 0; i < TX_SIZES_ALL; i++) {
2388
809k
      int sum = 0;
2389
809k
      int j;
2390
809k
      int left = MAX_TX_TYPE_PROB;
2391
2392
13.7M
      for (j = 0; j < TX_TYPES; j++)
2393
12.9M
        sum += cpi->td.rd_counts.tx_type_used[i][j];
2394
2395
13.7M
      for (j = TX_TYPES - 1; j >= 0; j--) {
2396
12.9M
        int update_txtype_frameprobs = 1;
2397
12.9M
        const int new_prob =
2398
12.9M
            sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2399
1.49M
                        cpi->td.rd_counts.tx_type_used[i][j] / sum)
2400
12.9M
                : (j ? 0 : MAX_TX_TYPE_PROB);
2401
#if CONFIG_FPMT_TEST
2402
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2403
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2404
              0) {
2405
            int prob =
2406
                (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2407
                 new_prob) >>
2408
                1;
2409
            left -= prob;
2410
            if (j == 0) prob += left;
2411
            temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2412
                prob;
2413
            // Copy temp_frame_probs_simulation to temp_frame_probs
2414
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2415
                 update_type_idx++) {
2416
              temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2417
                  temp_frame_probs_simulation
2418
                      ->tx_type_probs[update_type_idx][i][j];
2419
            }
2420
          }
2421
          update_txtype_frameprobs = 0;
2422
        }
2423
#endif  // CONFIG_FPMT_TEST
2424
        // Track the frame probabilities of parallel encode frames to update
2425
        // during postencode stage.
2426
12.9M
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2427
0
          update_txtype_frameprobs = 0;
2428
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2429
0
              .tx_type_probs[update_type][i][j] = new_prob;
2430
0
        }
2431
12.9M
        if (update_txtype_frameprobs) {
2432
12.9M
          int prob =
2433
12.9M
              (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2434
12.9M
          left -= prob;
2435
12.9M
          if (j == 0) prob += left;
2436
12.9M
          frame_probs->tx_type_probs[update_type][i][j] = prob;
2437
12.9M
        }
2438
12.9M
      }
2439
809k
    }
2440
42.6k
  }
2441
2442
117k
  if (cm->seg.enabled) {
2443
0
    cm->seg.temporal_update = 1;
2444
0
    if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2445
0
      cm->seg.temporal_update = 0;
2446
0
  }
2447
2448
117k
  if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2449
42.1k
      cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2450
0
    const FRAME_UPDATE_TYPE update_type =
2451
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2452
2453
0
    for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2454
0
      int sum = 0;
2455
0
      int update_obmc_frameprobs = 1;
2456
0
      for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2457
2458
0
      const int new_prob =
2459
0
          sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2460
#if CONFIG_FPMT_TEST
2461
      if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2462
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2463
          temp_frame_probs_simulation->obmc_probs[update_type][i] =
2464
              (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2465
               new_prob) >>
2466
              1;
2467
          // Copy temp_frame_probs_simulation to temp_frame_probs
2468
          for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2469
               update_type_idx++) {
2470
            temp_frame_probs->obmc_probs[update_type_idx][i] =
2471
                temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2472
          }
2473
        }
2474
        update_obmc_frameprobs = 0;
2475
      }
2476
#endif  // CONFIG_FPMT_TEST
2477
      // Track the frame probabilities of parallel encode frames to update
2478
      // during postencode stage.
2479
0
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2480
0
        update_obmc_frameprobs = 0;
2481
0
        cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2482
0
            new_prob;
2483
0
      }
2484
0
      if (update_obmc_frameprobs) {
2485
0
        frame_probs->obmc_probs[update_type][i] =
2486
0
            (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2487
0
      }
2488
0
    }
2489
0
  }
2490
2491
117k
  if (features->allow_warped_motion &&
2492
27.0k
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2493
10.1k
    const FRAME_UPDATE_TYPE update_type =
2494
10.1k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2495
10.1k
    int update_warp_frameprobs = 1;
2496
10.1k
    int sum = 0;
2497
30.5k
    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2498
10.1k
    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2499
#if CONFIG_FPMT_TEST
2500
    if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2501
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2502
        temp_frame_probs_simulation->warped_probs[update_type] =
2503
            (temp_frame_probs_simulation->warped_probs[update_type] +
2504
             new_prob) >>
2505
            1;
2506
        // Copy temp_frame_probs_simulation to temp_frame_probs
2507
        for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2508
             update_type_idx++) {
2509
          temp_frame_probs->warped_probs[update_type_idx] =
2510
              temp_frame_probs_simulation->warped_probs[update_type_idx];
2511
        }
2512
      }
2513
      update_warp_frameprobs = 0;
2514
    }
2515
#endif  // CONFIG_FPMT_TEST
2516
    // Track the frame probabilities of parallel encode frames to update
2517
    // during postencode stage.
2518
10.1k
    if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2519
0
      update_warp_frameprobs = 0;
2520
0
      cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2521
0
          new_prob;
2522
0
    }
2523
10.1k
    if (update_warp_frameprobs) {
2524
10.1k
      frame_probs->warped_probs[update_type] =
2525
10.1k
          (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2526
10.1k
    }
2527
10.1k
  }
2528
2529
117k
  if (cm->current_frame.frame_type != KEY_FRAME &&
2530
27.0k
      cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2531
16.8k
      features->interp_filter == SWITCHABLE) {
2532
16.8k
    const FRAME_UPDATE_TYPE update_type =
2533
16.8k
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2534
2535
286k
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2536
269k
      int sum = 0;
2537
269k
      int j;
2538
269k
      int left = 1536;
2539
2540
1.07M
      for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2541
809k
        sum += cpi->td.counts->switchable_interp[i][j];
2542
809k
      }
2543
2544
1.07M
      for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2545
809k
        int update_interpfilter_frameprobs = 1;
2546
809k
        const int new_prob =
2547
809k
            sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2548
809k
                : (j ? 0 : 1536);
2549
#if CONFIG_FPMT_TEST
2550
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2551
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2552
              0) {
2553
            int prob = (temp_frame_probs_simulation
2554
                            ->switchable_interp_probs[update_type][i][j] +
2555
                        new_prob) >>
2556
                       1;
2557
            left -= prob;
2558
            if (j == 0) prob += left;
2559
            temp_frame_probs_simulation
2560
                ->switchable_interp_probs[update_type][i][j] = prob;
2561
            // Copy temp_frame_probs_simulation to temp_frame_probs
2562
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2563
                 update_type_idx++) {
2564
              temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2565
                  temp_frame_probs_simulation
2566
                      ->switchable_interp_probs[update_type_idx][i][j];
2567
            }
2568
          }
2569
          update_interpfilter_frameprobs = 0;
2570
        }
2571
#endif  // CONFIG_FPMT_TEST
2572
        // Track the frame probabilities of parallel encode frames to update
2573
        // during postencode stage.
2574
809k
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2575
0
          update_interpfilter_frameprobs = 0;
2576
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2577
0
              .switchable_interp_probs[update_type][i][j] = new_prob;
2578
0
        }
2579
809k
        if (update_interpfilter_frameprobs) {
2580
809k
          int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2581
809k
                      new_prob) >>
2582
809k
                     1;
2583
809k
          left -= prob;
2584
809k
          if (j == 0) prob += left;
2585
809k
          frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2586
809k
        }
2587
809k
      }
2588
269k
    }
2589
16.8k
  }
2590
117k
  if (hash_table_created) {
2591
0
    av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2592
0
  }
2593
117k
}
2594
2595
/*!\brief Setup reference frame buffers and encode a frame
2596
 *
2597
 * \ingroup high_level_algo
2598
 * \callgraph
2599
 * \callergraph
2600
 *
2601
 * \param[in]    cpi    Top-level encoder structure
2602
 */
2603
117k
void av1_encode_frame(AV1_COMP *cpi) {
2604
117k
  AV1_COMMON *const cm = &cpi->common;
2605
117k
  CurrentFrame *const current_frame = &cm->current_frame;
2606
117k
  FeatureFlags *const features = &cm->features;
2607
117k
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2608
117k
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2609
  // Indicates whether or not to use a default reduced set for ext-tx
2610
  // rather than the potential full set of 16 transforms
2611
117k
  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2612
2613
  // Make sure segment_id is no larger than last_active_segid.
2614
117k
  if (cm->seg.enabled && cm->seg.update_map) {
2615
0
    const int mi_rows = cm->mi_params.mi_rows;
2616
0
    const int mi_cols = cm->mi_params.mi_cols;
2617
0
    const int last_active_segid = cm->seg.last_active_segid;
2618
0
    uint8_t *map = cpi->enc_seg.map;
2619
0
    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2620
0
      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2621
0
        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2622
0
      }
2623
0
      map += mi_cols;
2624
0
    }
2625
0
  }
2626
2627
117k
  av1_setup_frame_buf_refs(cm);
2628
117k
  enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2629
117k
                         cm->cur_frame->ref_display_order_hint,
2630
117k
                         cm->current_frame.display_order_hint);
2631
117k
  set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2632
117k
                     cpi->ref_frame_flags);
2633
117k
  av1_setup_frame_sign_bias(cm);
2634
2635
  // If global motion is enabled, then every buffer which is used as either
2636
  // a source or a ref frame should have an image pyramid allocated.
2637
  // Check here so that issues can be caught early in debug mode
2638
#if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2639
  if (cpi->alloc_pyramid) {
2640
    assert(cpi->source->y_pyramid);
2641
    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2642
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2643
      if (buf != NULL) {
2644
        assert(buf->buf.y_pyramid);
2645
      }
2646
    }
2647
  }
2648
#endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2649
2650
#if CONFIG_MISMATCH_DEBUG
2651
  mismatch_reset_frame(av1_num_planes(cm));
2652
#endif
2653
2654
117k
  rdc->newmv_or_intra_blocks = 0;
2655
117k
  cpi->palette_pixel_num = 0;
2656
2657
117k
  if (cpi->sf.hl_sf.frame_parameter_update ||
2658
109k
      cpi->sf.rt_sf.use_comp_ref_nonrd) {
2659
109k
    if (frame_is_intra_only(cm))
2660
86.1k
      current_frame->reference_mode = SINGLE_REFERENCE;
2661
22.8k
    else
2662
22.8k
      current_frame->reference_mode = REFERENCE_MODE_SELECT;
2663
2664
109k
    features->interp_filter = SWITCHABLE;
2665
109k
    if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2666
2667
109k
    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2668
109k
        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2669
2670
109k
    rdc->compound_ref_used_flag = 0;
2671
109k
    rdc->skip_mode_used_flag = 0;
2672
2673
109k
    encode_frame_internal(cpi);
2674
2675
109k
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2676
      // Use a flag that includes 4x4 blocks
2677
22.8k
      if (rdc->compound_ref_used_flag == 0) {
2678
22.8k
        current_frame->reference_mode = SINGLE_REFERENCE;
2679
#if CONFIG_ENTROPY_STATS
2680
        av1_zero(cpi->td.counts->comp_inter);
2681
#endif  // CONFIG_ENTROPY_STATS
2682
22.8k
      }
2683
22.8k
    }
2684
    // Re-check on the skip mode status as reference mode may have been
2685
    // changed.
2686
109k
    SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2687
109k
    if (frame_is_intra_only(cm) ||
2688
109k
        current_frame->reference_mode == SINGLE_REFERENCE) {
2689
109k
      skip_mode_info->skip_mode_allowed = 0;
2690
109k
      skip_mode_info->skip_mode_flag = 0;
2691
109k
    }
2692
109k
    if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2693
0
      skip_mode_info->skip_mode_flag = 0;
2694
2695
109k
    if (!cm->tiles.large_scale) {
2696
109k
      if (features->tx_mode == TX_MODE_SELECT &&
2697
105k
          cpi->td.mb.txfm_search_info.txb_split_count == 0)
2698
52.6k
        features->tx_mode = TX_MODE_LARGEST;
2699
109k
    }
2700
109k
  } else {
2701
    // This is needed if real-time speed setting is changed on the fly
2702
    // from one using compound prediction to one using single reference.
2703
8.92k
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2704
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2705
8.92k
    encode_frame_internal(cpi);
2706
8.92k
  }
2707
117k
}