Coverage Report

Created: 2025-06-22 08:04

/src/aom/av1/encoder/encodeframe.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <float.h>
14
#include <math.h>
15
#include <stdbool.h>
16
#include <stdio.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/binary_codes_writer.h"
24
#include "aom_ports/mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_util/aom_pthread.h"
27
#if CONFIG_MISMATCH_DEBUG
28
#include "aom_util/debug_util.h"
29
#endif  // CONFIG_MISMATCH_DEBUG
30
31
#include "av1/common/cfl.h"
32
#include "av1/common/common.h"
33
#include "av1/common/common_data.h"
34
#include "av1/common/entropy.h"
35
#include "av1/common/entropymode.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mv.h"
38
#include "av1/common/mvref_common.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconintra.h"
42
#include "av1/common/reconinter.h"
43
#include "av1/common/seg_common.h"
44
#include "av1/common/tile_common.h"
45
#include "av1/common/warped_motion.h"
46
47
#include "av1/encoder/allintra_vis.h"
48
#include "av1/encoder/aq_complexity.h"
49
#include "av1/encoder/aq_cyclicrefresh.h"
50
#include "av1/encoder/aq_variance.h"
51
#include "av1/encoder/global_motion_facade.h"
52
#include "av1/encoder/encodeframe.h"
53
#include "av1/encoder/encodeframe_utils.h"
54
#include "av1/encoder/encodemb.h"
55
#include "av1/encoder/encodemv.h"
56
#include "av1/encoder/encodetxb.h"
57
#include "av1/encoder/ethread.h"
58
#include "av1/encoder/extend.h"
59
#include "av1/encoder/intra_mode_search_utils.h"
60
#include "av1/encoder/ml.h"
61
#include "av1/encoder/motion_search_facade.h"
62
#include "av1/encoder/partition_strategy.h"
63
#if !CONFIG_REALTIME_ONLY
64
#include "av1/encoder/partition_model_weights.h"
65
#endif
66
#include "av1/encoder/partition_search.h"
67
#include "av1/encoder/rd.h"
68
#include "av1/encoder/rdopt.h"
69
#include "av1/encoder/reconinter_enc.h"
70
#include "av1/encoder/segmentation.h"
71
#include "av1/encoder/tokenize.h"
72
#include "av1/encoder/tpl_model.h"
73
#include "av1/encoder/var_based_part.h"
74
75
#if CONFIG_TUNE_VMAF
76
#include "av1/encoder/tune_vmaf.h"
77
#endif
78
79
/*!\cond */
80
// This is used as a reference when computing the source variance for the
81
//  purposes of activity masking.
82
// Eventually this should be replaced by custom no-reference routines,
83
//  which will be faster.
84
static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93
  128, 128, 128, 128, 128, 128, 128, 128
94
};
95
96
#if CONFIG_AV1_HIGHBITDEPTH
97
static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106
  128, 128, 128, 128, 128, 128, 128, 128
107
};
108
109
static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125
  128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126
};
127
128
static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146
  128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147
  128 * 16, 128 * 16
148
};
149
#endif  // CONFIG_AV1_HIGHBITDEPTH
150
/*!\endcond */
151
152
// For the given bit depth, returns a constant array used to assist the
153
// calculation of source block variance, which will then be used to decide
154
// adaptive quantizers.
155
0
static const uint8_t *get_var_offs(int use_hbd, int bd) {
156
0
#if CONFIG_AV1_HIGHBITDEPTH
157
0
  if (use_hbd) {
158
0
    assert(bd == 8 || bd == 10 || bd == 12);
159
0
    const int off_index = (bd - 8) >> 1;
160
0
    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161
0
                                                AV1_HIGH_VAR_OFFS_10,
162
0
                                                AV1_HIGH_VAR_OFFS_12 };
163
0
    return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164
0
  }
165
#else
166
  (void)use_hbd;
167
  (void)bd;
168
  assert(!use_hbd);
169
#endif
170
0
  assert(bd == 8);
171
0
  return AV1_VAR_OFFS;
172
0
}
173
174
0
void av1_init_rtc_counters(MACROBLOCK *const x) {
175
0
  av1_init_cyclic_refresh_counters(x);
176
0
  x->cnt_zeromv = 0;
177
0
}
178
179
0
void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180
0
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181
0
    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182
0
  cpi->rc.cnt_zeromv += x->cnt_zeromv;
183
0
}
184
185
unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186
                                       const MACROBLOCKD *xd,
187
                                       const struct buf_2d *ref,
188
                                       BLOCK_SIZE bsize, int plane,
189
0
                                       int use_hbd) {
190
0
  const int subsampling_x = xd->plane[plane].subsampling_x;
191
0
  const int subsampling_y = xd->plane[plane].subsampling_y;
192
0
  const BLOCK_SIZE plane_bsize =
193
0
      get_plane_block_size(bsize, subsampling_x, subsampling_y);
194
0
  unsigned int sse;
195
0
  const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196
0
      ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197
0
  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198
0
}
199
200
unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201
                                              const MACROBLOCKD *xd,
202
                                              const struct buf_2d *ref,
203
0
                                              BLOCK_SIZE bsize, int plane) {
204
0
  const int use_hbd = is_cur_buf_hbd(xd);
205
0
  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206
0
}
207
208
void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209
                          int mi_row, int mi_col, const int num_planes,
210
0
                          BLOCK_SIZE bsize) {
211
  // Set current frame pointer.
212
0
  x->e_mbd.cur_buf = src;
213
214
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215
  // the static analysis warnings.
216
0
  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217
0
    const int is_uv = i > 0;
218
0
    setup_pred_plane(
219
0
        &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220
0
        src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221
0
        x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222
0
  }
223
0
}
224
225
#if !CONFIG_REALTIME_ONLY
226
/*!\brief Assigns different quantization parameters to each super
227
 * block based on its TPL weight.
228
 *
229
 * \ingroup tpl_modelling
230
 *
231
 * \param[in]     cpi         Top level encoder instance structure
232
 * \param[in,out] td          Thread data structure
233
 * \param[in,out] x           Macro block level data for this block.
234
 * \param[in]     tile_info   Tile infromation / identification
235
 * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
236
 * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
237
 * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
238
 *
239
 * \remark No return value but updates macroblock and thread data
240
 * related to the q / q delta to be used.
241
 */
242
static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243
                                 MACROBLOCK *const x,
244
                                 const TileInfo *const tile_info, int mi_row,
245
0
                                 int mi_col, int num_planes) {
246
0
  AV1_COMMON *const cm = &cpi->common;
247
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
248
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249
0
  assert(delta_q_info->delta_q_present_flag);
250
251
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252
  // Delta-q modulation based on variance
253
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255
0
  const int delta_q_res = delta_q_info->delta_q_res;
256
0
  int current_qindex = cm->quant_params.base_qindex;
257
0
  if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258
0
                                   DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259
0
    const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260
0
    const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261
0
    const int sb_cols =
262
0
        CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263
0
    const int sb_index = sb_row * sb_cols + sb_col;
264
0
    current_qindex =
265
0
        cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267
0
    if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268
0
      const int block_wavelet_energy_level =
269
0
          av1_block_wavelet_energy_level(cpi, x, sb_size);
270
0
      x->sb_energy_level = block_wavelet_energy_level;
271
0
      current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272
0
          cpi, block_wavelet_energy_level);
273
0
    } else {
274
0
      const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275
0
      x->sb_energy_level = block_var_level;
276
0
      current_qindex =
277
0
          av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278
0
    }
279
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280
0
             cpi->oxcf.algo_cfg.enable_tpl_model) {
281
    // Setup deltaq based on tpl stats
282
0
    current_qindex =
283
0
        av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285
0
    current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287
0
    current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288
0
  } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289
0
    current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290
0
  } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
291
0
    current_qindex = av1_get_sbq_variance_boost(cpi, x);
292
0
  }
293
294
0
  x->rdmult_cur_qindex = current_qindex;
295
0
  MACROBLOCKD *const xd = &x->e_mbd;
296
0
  const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
297
0
      delta_q_res, xd->current_base_qindex, current_qindex);
298
0
  if (cpi->use_ducky_encode) {
299
0
    assert(adjusted_qindex == current_qindex);
300
0
  }
301
0
  current_qindex = adjusted_qindex;
302
303
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
304
0
  x->rdmult_delta_qindex = x->delta_qindex;
305
306
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
307
0
  xd->mi[0]->current_qindex = current_qindex;
308
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
309
310
  // keep track of any non-zero delta-q used
311
0
  td->deltaq_used |= (x->delta_qindex != 0);
312
313
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
314
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
315
0
    const int lfmask = ~(delta_lf_res - 1);
316
0
    const int delta_lf_from_base =
317
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
318
0
    const int8_t delta_lf =
319
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
320
0
    const int frame_lf_count =
321
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
322
0
    const int mib_size = cm->seq_params->mib_size;
323
324
    // pre-set the delta lf for loop filter. Note that this value is set
325
    // before mi is assigned for each block in current superblock
326
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
327
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
328
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
329
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
330
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
331
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
332
0
        }
333
0
      }
334
0
    }
335
0
  }
336
0
}
337
338
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
339
0
                                 int mi_col) {
340
0
  const AV1_COMMON *cm = &cpi->common;
341
0
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
342
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
343
0
  MACROBLOCK *x = &td->mb;
344
0
  const int frame_idx = cpi->gf_frame_index;
345
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
346
0
  const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
347
348
0
  av1_zero(x->tpl_keep_ref_frame);
349
350
0
  if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
351
0
  if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
352
0
  if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
353
354
0
  const int is_overlay =
355
0
      cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
356
0
  if (is_overlay) {
357
0
    memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
358
0
    return;
359
0
  }
360
361
0
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
362
0
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
363
0
  const int tpl_stride = tpl_frame->stride;
364
0
  int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
365
0
  const int step = 1 << block_mis_log2;
366
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
367
368
0
  const int mi_row_end =
369
0
      AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
370
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
371
0
  const int mi_col_sr =
372
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
373
0
  const int mi_col_end_sr =
374
0
      AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
375
0
                                  cm->superres_scale_denominator),
376
0
             mi_cols_sr);
377
0
  const int row_step = step;
378
0
  const int col_step_sr =
379
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
380
0
  for (int row = mi_row; row < mi_row_end; row += row_step) {
381
0
    for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
382
0
      const TplDepStats *this_stats =
383
0
          &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
384
0
      int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
385
      // Find the winner ref frame idx for the current block
386
0
      int64_t best_inter_cost = this_stats->pred_error[0];
387
0
      int best_rf_idx = 0;
388
0
      for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
389
0
        if ((this_stats->pred_error[idx] < best_inter_cost) &&
390
0
            (this_stats->pred_error[idx] != 0)) {
391
0
          best_inter_cost = this_stats->pred_error[idx];
392
0
          best_rf_idx = idx;
393
0
        }
394
0
      }
395
      // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
396
      // LAST_FRAME.
397
0
      tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
398
0
                                    this_stats->pred_error[LAST_FRAME - 1];
399
400
0
      for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
401
0
        inter_cost[rf_idx] += tpl_pred_error[rf_idx];
402
0
    }
403
0
  }
404
405
0
  int rank_index[INTER_REFS_PER_FRAME - 1];
406
0
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
407
0
    rank_index[idx] = idx + 1;
408
0
    for (int i = idx; i > 0; --i) {
409
0
      if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
410
0
        const int tmp = rank_index[i - 1];
411
0
        rank_index[i - 1] = rank_index[i];
412
0
        rank_index[i] = tmp;
413
0
      }
414
0
    }
415
0
  }
416
417
0
  x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
418
0
  x->tpl_keep_ref_frame[LAST_FRAME] = 1;
419
420
0
  int cutoff_ref = 0;
421
0
  for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
422
0
    x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
423
0
    if (idx > 2) {
424
0
      if (!cutoff_ref) {
425
        // If the predictive coding gains are smaller than the previous more
426
        // relevant frame over certain amount, discard this frame and all the
427
        // frames afterwards.
428
0
        if (llabs(inter_cost[rank_index[idx]]) <
429
0
                llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
430
0
            inter_cost[rank_index[idx]] == 0)
431
0
          cutoff_ref = 1;
432
0
      }
433
434
0
      if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
435
0
    }
436
0
  }
437
0
}
438
439
static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
440
0
                                           int mi_row, int mi_col) {
441
0
  const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
442
0
  const int orig_rdmult = cpi->rd.RDMULT;
443
444
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
445
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
446
0
  const int gf_group_index = cpi->gf_frame_index;
447
0
  if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
448
0
      cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
449
0
      cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
450
0
    const int dr =
451
0
        av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
452
0
    x->rdmult = dr;
453
0
  }
454
0
}
455
#endif  // !CONFIG_REALTIME_ONLY
456
457
#if CONFIG_RT_ML_PARTITIONING
458
// Get a prediction(stored in x->est_pred) for the whole superblock.
459
static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
460
                               MACROBLOCK *x, int mi_row, int mi_col) {
461
  AV1_COMMON *const cm = &cpi->common;
462
  const int is_key_frame = frame_is_intra_only(cm);
463
  MACROBLOCKD *xd = &x->e_mbd;
464
465
  // TODO(kyslov) Extend to 128x128
466
  assert(cm->seq_params->sb_size == BLOCK_64X64);
467
468
  av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
469
470
  if (!is_key_frame) {
471
    MB_MODE_INFO *mi = xd->mi[0];
472
    const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
473
474
    assert(yv12 != NULL);
475
476
    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
477
                         get_ref_scale_factors(cm, LAST_FRAME), 1);
478
    mi->ref_frame[0] = LAST_FRAME;
479
    mi->ref_frame[1] = NONE;
480
    mi->bsize = BLOCK_64X64;
481
    mi->mv[0].as_int = 0;
482
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
483
484
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
485
486
    xd->plane[0].dst.buf = x->est_pred;
487
    xd->plane[0].dst.stride = 64;
488
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
489
  } else {
490
#if CONFIG_AV1_HIGHBITDEPTH
491
    switch (xd->bd) {
492
      case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
493
      case 10:
494
        memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
495
        break;
496
      case 12:
497
        memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
498
        break;
499
    }
500
#else
501
    memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
502
#endif  // CONFIG_VP9_HIGHBITDEPTH
503
  }
504
}
505
#endif  // CONFIG_RT_ML_PARTITIONING
506
507
0
#define AVG_CDF_WEIGHT_LEFT 3
508
0
#define AVG_CDF_WEIGHT_TOP_RIGHT 1
509
510
/*!\brief Encode a superblock (minimal RD search involved)
511
 *
512
 * \ingroup partition_search
513
 * Encodes the superblock by a pre-determined partition pattern, only minor
514
 * rd-based searches are allowed to adjust the initial pattern. It is only used
515
 * by realtime encoding.
516
 */
517
static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
518
                                   TileDataEnc *tile_data, TokenExtra **tp,
519
                                   const int mi_row, const int mi_col,
520
0
                                   const int seg_skip) {
521
0
  AV1_COMMON *const cm = &cpi->common;
522
0
  MACROBLOCK *const x = &td->mb;
523
0
  const SPEED_FEATURES *const sf = &cpi->sf;
524
0
  const TileInfo *const tile_info = &tile_data->tile_info;
525
0
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
526
0
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
527
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
528
0
  PC_TREE *const pc_root = td->pc_root;
529
530
#if CONFIG_RT_ML_PARTITIONING
531
  if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
532
    RD_STATS dummy_rdc;
533
    get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
534
    av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
535
                             BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
536
    return;
537
  }
538
#endif
539
  // Set the partition
540
0
  if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
541
0
      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
542
0
       (!frame_is_intra_only(cm) &&
543
0
        (!cpi->ppi->use_svc ||
544
0
         !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
545
    // set a fixed-size partition
546
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
547
0
    BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
548
0
    if (sf->rt_sf.use_fast_fixed_part &&
549
0
        x->content_state_sb.source_sad_nonrd < kLowSad) {
550
0
      bsize_select = cm->seq_params->sb_size;
551
0
    }
552
0
    if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
553
0
        cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
554
0
      bsize_select = cm->seq_params->sb_size;
555
0
      x->force_zeromv_skip_for_sb = 1;
556
0
    }
557
0
    const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
558
0
    if (x->content_state_sb.source_sad_nonrd > kZeroSad)
559
0
      x->force_color_check_block_level = 1;
560
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
561
0
  } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
562
    // set a variance-based partition
563
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
564
0
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
565
0
  }
566
0
  assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
567
0
         sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
568
0
  set_cb_offsets(td->mb.cb_offset, 0, 0);
569
570
  // Initialize the flag to skip cdef to 1.
571
0
  if (sf->rt_sf.skip_cdef_sb) {
572
0
    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
573
    // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
574
    // "blocks".
575
0
    for (int r = 0; r < block64_in_sb; ++r) {
576
0
      for (int c = 0; c < block64_in_sb; ++c) {
577
0
        const int idx_in_sb =
578
0
            r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
579
0
        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
580
0
      }
581
0
    }
582
0
  }
583
584
#if CONFIG_COLLECT_COMPONENT_TIMING
585
  start_timing(cpi, nonrd_use_partition_time);
586
#endif
587
0
  av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
588
0
                          pc_root);
589
#if CONFIG_COLLECT_COMPONENT_TIMING
590
  end_timing(cpi, nonrd_use_partition_time);
591
#endif
592
0
}
593
594
// This function initializes the stats for encode_rd_sb.
595
static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
596
                                     const TileDataEnc *tile_data,
597
                                     SIMPLE_MOTION_DATA_TREE *sms_root,
598
                                     RD_STATS *rd_cost, int mi_row, int mi_col,
599
0
                                     int gather_tpl_data) {
600
0
  const AV1_COMMON *cm = &cpi->common;
601
0
  const TileInfo *tile_info = &tile_data->tile_info;
602
0
  MACROBLOCK *x = &td->mb;
603
604
0
  const SPEED_FEATURES *sf = &cpi->sf;
605
0
  const int use_simple_motion_search =
606
0
      (sf->part_sf.simple_motion_search_split ||
607
0
       sf->part_sf.simple_motion_search_prune_rect ||
608
0
       sf->part_sf.simple_motion_search_early_term_none ||
609
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
610
0
      !frame_is_intra_only(cm);
611
0
  if (use_simple_motion_search) {
612
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
613
0
                                             mi_row, mi_col);
614
0
  }
615
616
0
#if !CONFIG_REALTIME_ONLY
617
0
  if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
618
0
        cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
619
0
    init_ref_frame_space(cpi, td, mi_row, mi_col);
620
0
    x->sb_energy_level = 0;
621
0
    x->part_search_info.cnn_output_valid = 0;
622
0
    if (gather_tpl_data) {
623
0
      if (cm->delta_q_info.delta_q_present_flag) {
624
0
        const int num_planes = av1_num_planes(cm);
625
0
        const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
626
0
        setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
627
0
        av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
628
0
      }
629
630
      // TODO(jingning): revisit this function.
631
0
      if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
632
0
        adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
633
0
      }
634
0
    }
635
0
  }
636
#else
637
  (void)tile_info;
638
  (void)mi_row;
639
  (void)mi_col;
640
  (void)gather_tpl_data;
641
#endif
642
643
0
  x->reuse_inter_pred = false;
644
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
645
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
646
0
  av1_zero(x->picked_ref_frames_mask);
647
0
  av1_invalid_rd_stats(rd_cost);
648
0
}
649
650
#if !CONFIG_REALTIME_ONLY
651
static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
652
                                        const TileDataEnc *tile_data,
653
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
654
                                        RD_STATS *rd_cost, int mi_row,
655
0
                                        int mi_col, int delta_qp_ofs) {
656
0
  AV1_COMMON *const cm = &cpi->common;
657
0
  MACROBLOCK *const x = &td->mb;
658
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
659
0
  const TileInfo *tile_info = &tile_data->tile_info;
660
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
661
0
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
662
0
  assert(delta_q_info->delta_q_present_flag);
663
0
  const int delta_q_res = delta_q_info->delta_q_res;
664
665
0
  const SPEED_FEATURES *sf = &cpi->sf;
666
0
  const int use_simple_motion_search =
667
0
      (sf->part_sf.simple_motion_search_split ||
668
0
       sf->part_sf.simple_motion_search_prune_rect ||
669
0
       sf->part_sf.simple_motion_search_early_term_none ||
670
0
       sf->part_sf.ml_early_term_after_part_split_level) &&
671
0
      !frame_is_intra_only(cm);
672
0
  if (use_simple_motion_search) {
673
0
    av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
674
0
                                             mi_row, mi_col);
675
0
  }
676
677
0
  int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
678
679
0
  MACROBLOCKD *const xd = &x->e_mbd;
680
0
  current_qindex = av1_adjust_q_from_delta_q_res(
681
0
      delta_q_res, xd->current_base_qindex, current_qindex);
682
683
0
  x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
684
685
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
686
0
  xd->mi[0]->current_qindex = current_qindex;
687
0
  av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
688
689
  // keep track of any non-zero delta-q used
690
0
  td->deltaq_used |= (x->delta_qindex != 0);
691
692
0
  if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
693
0
    const int delta_lf_res = delta_q_info->delta_lf_res;
694
0
    const int lfmask = ~(delta_lf_res - 1);
695
0
    const int delta_lf_from_base =
696
0
        ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
697
0
    const int8_t delta_lf =
698
0
        (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
699
0
    const int frame_lf_count =
700
0
        av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
701
0
    const int mib_size = cm->seq_params->mib_size;
702
703
    // pre-set the delta lf for loop filter. Note that this value is set
704
    // before mi is assigned for each block in current superblock
705
0
    for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
706
0
      for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
707
0
        const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
708
0
        mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
709
0
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
710
0
          mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
711
0
        }
712
0
      }
713
0
    }
714
0
  }
715
716
0
  x->reuse_inter_pred = false;
717
0
  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
718
0
  reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
719
0
  av1_zero(x->picked_ref_frames_mask);
720
0
  av1_invalid_rd_stats(rd_cost);
721
0
}
722
723
static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
724
                       TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
725
                       int mi_col, BLOCK_SIZE bsize,
726
                       SIMPLE_MOTION_DATA_TREE *sms_tree,
727
0
                       SB_FIRST_PASS_STATS *sb_org_stats) {
728
0
  AV1_COMMON *const cm = &cpi->common;
729
0
  MACROBLOCK *const x = &td->mb;
730
0
  RD_STATS rdc_winner, cur_rdc;
731
0
  av1_invalid_rd_stats(&rdc_winner);
732
733
0
  int best_qindex = td->mb.rdmult_delta_qindex;
734
0
  const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
735
0
  const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
736
0
  const int step = cm->delta_q_info.delta_q_res;
737
738
0
  for (int sweep_qp_delta = start; sweep_qp_delta <= end;
739
0
       sweep_qp_delta += step) {
740
0
    sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
741
0
                                mi_col, sweep_qp_delta);
742
743
0
    const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
744
0
    const int backup_current_qindex =
745
0
        cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
746
747
0
    av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
748
0
    av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
749
0
    cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
750
751
0
    td->pc_root = av1_alloc_pc_tree_node(bsize);
752
0
    if (!td->pc_root)
753
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
754
0
                         "Failed to allocate PC_TREE");
755
0
    av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
756
0
                          &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
757
0
                          SB_DRY_PASS, NULL);
758
759
0
    if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
760
0
        (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
761
0
         rdc_winner.rdcost == cur_rdc.rdcost)) {
762
0
      rdc_winner = cur_rdc;
763
0
      best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
764
0
    }
765
0
  }
766
767
0
  return best_qindex;
768
0
}
769
#endif  //! CONFIG_REALTIME_ONLY
770
771
/*!\brief Encode a superblock (RD-search-based)
772
 *
773
 * \ingroup partition_search
774
 * Conducts partition search for a superblock, based on rate-distortion costs,
775
 * from scratch or adjusting from a pre-calculated partition pattern.
776
 */
777
static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
778
                                TileDataEnc *tile_data, TokenExtra **tp,
779
                                const int mi_row, const int mi_col,
780
0
                                const int seg_skip) {
781
0
  AV1_COMMON *const cm = &cpi->common;
782
0
  MACROBLOCK *const x = &td->mb;
783
0
  MACROBLOCKD *const xd = &x->e_mbd;
784
0
  const SPEED_FEATURES *const sf = &cpi->sf;
785
0
  const TileInfo *const tile_info = &tile_data->tile_info;
786
0
  MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
787
0
                      get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
788
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
789
0
  const int num_planes = av1_num_planes(cm);
790
0
  int dummy_rate;
791
0
  int64_t dummy_dist;
792
0
  RD_STATS dummy_rdc;
793
0
  SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
794
795
#if CONFIG_REALTIME_ONLY
796
  (void)seg_skip;
797
#endif  // CONFIG_REALTIME_ONLY
798
799
0
  init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
800
0
                    1);
801
802
  // Encode the superblock
803
0
  if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
804
    // partition search starting from a variance-based partition
805
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
806
0
    av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
807
808
#if CONFIG_COLLECT_COMPONENT_TIMING
809
    start_timing(cpi, rd_use_partition_time);
810
#endif
811
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
812
0
    if (!td->pc_root)
813
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
814
0
                         "Failed to allocate PC_TREE");
815
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
816
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
817
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
818
0
                               sf->part_sf.partition_search_type);
819
0
    td->pc_root = NULL;
820
#if CONFIG_COLLECT_COMPONENT_TIMING
821
    end_timing(cpi, rd_use_partition_time);
822
#endif
823
0
  }
824
0
#if !CONFIG_REALTIME_ONLY
825
0
  else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
826
    // partition search by adjusting a fixed-size partition
827
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
828
0
    const BLOCK_SIZE bsize =
829
0
        seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
830
0
    av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
831
0
    td->pc_root = av1_alloc_pc_tree_node(sb_size);
832
0
    if (!td->pc_root)
833
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
834
0
                         "Failed to allocate PC_TREE");
835
0
    av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
836
0
                         &dummy_rate, &dummy_dist, 1, td->pc_root);
837
0
    av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
838
0
                               sf->part_sf.partition_search_type);
839
0
    td->pc_root = NULL;
840
0
  } else {
841
    // The most exhaustive recursive partition search
842
0
    SuperBlockEnc *sb_enc = &x->sb_enc;
843
    // No stats for overlay frames. Exclude key frame.
844
0
    av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
845
846
    // Reset the tree for simple motion search data
847
0
    av1_reset_simple_motion_tree_partition(sms_root, sb_size);
848
849
#if CONFIG_COLLECT_COMPONENT_TIMING
850
    start_timing(cpi, rd_pick_partition_time);
851
#endif
852
853
    // Estimate the maximum square partition block size, which will be used
854
    // as the starting block size for partitioning the sb
855
0
    set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
856
857
    // The superblock can be searched only once, or twice consecutively for
858
    // better quality. Note that the meaning of passes here is different from
859
    // the general concept of 1-pass/2-pass encoders.
860
0
    const int num_passes =
861
0
        cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
862
863
0
    if (cpi->oxcf.sb_qp_sweep &&
864
0
        !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
865
0
          cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
866
0
        cm->delta_q_info.delta_q_present_flag) {
867
0
      AOM_CHECK_MEM_ERROR(
868
0
          x->e_mbd.error_info, td->mb.sb_stats_cache,
869
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
870
0
      av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
871
0
                          mi_col);
872
0
      assert(x->rdmult_delta_qindex == x->delta_qindex);
873
874
0
      const int best_qp_diff =
875
0
          sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
876
0
                      td->mb.sb_stats_cache) -
877
0
          x->rdmult_delta_qindex;
878
879
0
      sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
880
0
                                  mi_row, mi_col, best_qp_diff);
881
882
0
      const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
883
0
      const int backup_current_qindex =
884
0
          cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
885
886
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
887
0
      av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
888
0
                           mi_col);
889
890
0
      cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
891
0
          backup_current_qindex;
892
0
      aom_free(td->mb.sb_stats_cache);
893
0
      td->mb.sb_stats_cache = NULL;
894
0
    }
895
0
    if (num_passes == 1) {
896
#if CONFIG_PARTITION_SEARCH_ORDER
897
      if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
898
        av1_reset_part_sf(&cpi->sf.part_sf);
899
        av1_reset_sf_for_ext_part(cpi);
900
        RD_STATS this_rdc;
901
        av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
902
                                mi_col, sb_size, &this_rdc);
903
      } else {
904
        td->pc_root = av1_alloc_pc_tree_node(sb_size);
905
        if (!td->pc_root)
906
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907
                             "Failed to allocate PC_TREE");
908
        av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909
                              &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
910
                              NULL, SB_SINGLE_PASS, NULL);
911
      }
912
#else
913
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
914
0
      if (!td->pc_root)
915
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
916
0
                           "Failed to allocate PC_TREE");
917
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
918
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
919
0
                            SB_SINGLE_PASS, NULL);
920
0
#endif  // CONFIG_PARTITION_SEARCH_ORDER
921
0
    } else {
922
      // First pass
923
0
      AOM_CHECK_MEM_ERROR(
924
0
          x->e_mbd.error_info, td->mb.sb_fp_stats,
925
0
          (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
926
0
      av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
927
0
                          mi_col);
928
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
929
0
      if (!td->pc_root)
930
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
931
0
                           "Failed to allocate PC_TREE");
932
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
933
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
934
0
                            SB_DRY_PASS, NULL);
935
936
      // Second pass
937
0
      init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
938
0
                        mi_col, 0);
939
0
      av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
940
0
      av1_reset_simple_motion_tree_partition(sms_root, sb_size);
941
942
0
      av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
943
0
                           mi_col);
944
945
0
      td->pc_root = av1_alloc_pc_tree_node(sb_size);
946
0
      if (!td->pc_root)
947
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
948
0
                           "Failed to allocate PC_TREE");
949
0
      av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
950
0
                            &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
951
0
                            SB_WET_PASS, NULL);
952
0
      aom_free(td->mb.sb_fp_stats);
953
0
      td->mb.sb_fp_stats = NULL;
954
0
    }
955
956
    // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
957
0
    sb_enc->tpl_data_count = 0;
958
#if CONFIG_COLLECT_COMPONENT_TIMING
959
    end_timing(cpi, rd_pick_partition_time);
960
#endif
961
0
  }
962
0
#endif  // !CONFIG_REALTIME_ONLY
963
964
  // Update the inter rd model
965
  // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
966
0
  if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
967
0
      cm->tiles.cols == 1 && cm->tiles.rows == 1) {
968
0
    av1_inter_mode_data_fit(tile_data, x->rdmult);
969
0
  }
970
0
}
971
972
// Check if the cost update of symbols mode, coeff and dv are tile or off.
973
static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
974
0
    const AV1_COMP *const cpi) {
975
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
976
977
0
  return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
978
0
          inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
979
0
          cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
980
0
}
981
982
// When row-mt is enabled and cost update frequencies are set to off/tile,
983
// processing of current SB can start even before processing of top-right SB
984
// is finished. This function checks if it is sufficient to wait for top SB
985
// to finish processing before current SB starts processing.
986
0
static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
987
0
  const MODE mode = cpi->oxcf.mode;
988
0
  if (mode == GOOD) return 0;
989
990
0
  if (mode == ALLINTRA)
991
0
    return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
992
0
  else if (mode == REALTIME)
993
0
    return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
994
0
            cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
995
0
  else
996
0
    return 0;
997
0
}
998
999
/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1000
 *
1001
 * \ingroup partition_search
1002
 * \callgraph
1003
 * \callergraph
1004
 */
1005
static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1006
0
                                         int mi_col) {
1007
0
  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1008
1009
0
  const AV1_COMMON *const cm = &cpi->common;
1010
0
  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1011
0
                                   ? (cm->seq_params->mib_size >> 1)
1012
0
                                   : cm->seq_params->mib_size;
1013
0
  const int num_blk_64x64_cols =
1014
0
      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1015
0
  const int num_blk_64x64_rows =
1016
0
      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1017
0
  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1018
0
  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1019
0
  uint64_t curr_sb_sad = UINT64_MAX;
1020
  // Avoid the border as sad_blk_64x64 may not be set for the border
1021
  // in the scene detection.
1022
0
  if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1023
0
      (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1024
0
    return curr_sb_sad;
1025
0
  }
1026
0
  const uint64_t *const src_sad_blk_64x64_data =
1027
0
      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1028
0
                              blk_64x64_row_index * num_blk_64x64_cols];
1029
0
  if (cm->seq_params->sb_size == BLOCK_128X128) {
1030
    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1031
    // superblock
1032
0
    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1033
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
1034
0
                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1035
0
  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1036
0
    curr_sb_sad = src_sad_blk_64x64_data[0];
1037
0
  }
1038
0
  return curr_sb_sad;
1039
0
}
1040
1041
/*!\brief Determine whether grading content can be skipped based on sad stat
1042
 *
1043
 * \ingroup partition_search
1044
 * \callgraph
1045
 * \callergraph
1046
 */
1047
static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1048
                                              MACROBLOCK *const x, int mi_row,
1049
0
                                              int mi_col) {
1050
0
  if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1051
0
    return true;
1052
0
  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1053
0
  if (curr_sb_sad == UINT64_MAX) return true;
1054
0
  if (curr_sb_sad == 0) {
1055
0
    x->content_state_sb.source_sad_nonrd = kZeroSad;
1056
0
    return false;
1057
0
  }
1058
0
  AV1_COMMON *const cm = &cpi->common;
1059
0
  bool do_calc_src_content = true;
1060
1061
0
  if (cpi->oxcf.speed < 9) return do_calc_src_content;
1062
1063
  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1064
0
  if (AOMMIN(cm->width, cm->height) < 360) {
1065
    // Derive Average 64x64 block source SAD from SB source SAD
1066
0
    const uint64_t avg_64x64_blk_sad =
1067
0
        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1068
0
                                                   : curr_sb_sad;
1069
1070
    // The threshold is determined based on kLowSad and kHighSad threshold and
1071
    // test results.
1072
0
    uint64_t thresh_low = 15000;
1073
0
    uint64_t thresh_high = 40000;
1074
1075
0
    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1076
0
      thresh_low = thresh_low << 1;
1077
0
      thresh_high = thresh_high << 1;
1078
0
    }
1079
1080
0
    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1081
0
      do_calc_src_content = false;
1082
      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1083
      // to RTC rd path.
1084
0
      x->content_state_sb.source_sad_nonrd = kMedSad;
1085
0
    }
1086
0
  }
1087
1088
0
  return do_calc_src_content;
1089
0
}
1090
1091
/*!\brief Determine whether grading content is needed based on sf and frame stat
1092
 *
1093
 * \ingroup partition_search
1094
 * \callgraph
1095
 * \callergraph
1096
 */
1097
// TODO(any): consolidate sfs to make interface cleaner
1098
static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1099
                                           TileDataEnc *tile_data, int mi_row,
1100
0
                                           int mi_col) {
1101
0
  AV1_COMMON *const cm = &cpi->common;
1102
0
  if (cm->current_frame.frame_type == KEY_FRAME ||
1103
0
      (cpi->ppi->use_svc &&
1104
0
       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1105
0
    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1106
0
    assert(x->content_state_sb.source_sad_rd == kMedSad);
1107
0
    return;
1108
0
  }
1109
0
  bool calc_src_content = false;
1110
1111
0
  if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1112
0
    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1113
0
      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1114
0
    } else {
1115
0
      x->content_state_sb.source_sad_nonrd = kZeroSad;
1116
0
    }
1117
0
  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1118
0
             (cm->width * cm->height <= 352 * 288)) {
1119
0
    if (cpi->rc.frame_source_sad > 0)
1120
0
      calc_src_content = true;
1121
0
    else
1122
0
      x->content_state_sb.source_sad_rd = kZeroSad;
1123
0
  }
1124
0
  if (calc_src_content)
1125
0
    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1126
0
}
1127
1128
/*!\brief Encode a superblock row by breaking it into superblocks
1129
 *
1130
 * \ingroup partition_search
1131
 * \callgraph
1132
 * \callergraph
1133
 * Do partition and mode search for an sb row: one row of superblocks filling up
1134
 * the width of the current tile.
1135
 */
1136
static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1137
                                 TileDataEnc *tile_data, int mi_row,
1138
0
                                 TokenExtra **tp) {
1139
0
  AV1_COMMON *const cm = &cpi->common;
1140
0
  const TileInfo *const tile_info = &tile_data->tile_info;
1141
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1142
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1143
0
  AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1144
0
  bool row_mt_enabled = mt_info->row_mt_enabled;
1145
0
  MACROBLOCK *const x = &td->mb;
1146
0
  MACROBLOCKD *const xd = &x->e_mbd;
1147
0
  const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1148
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1149
0
  const int mib_size = cm->seq_params->mib_size;
1150
0
  const int mib_size_log2 = cm->seq_params->mib_size_log2;
1151
0
  const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1152
0
  const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1153
1154
#if CONFIG_COLLECT_COMPONENT_TIMING
1155
  start_timing(cpi, encode_sb_row_time);
1156
#endif
1157
1158
  // Initialize the left context for the new SB row
1159
0
  av1_zero_left_context(xd);
1160
1161
  // Reset delta for quantizer and loof filters at the beginning of every tile
1162
0
  if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1163
0
    if (cm->delta_q_info.delta_q_present_flag)
1164
0
      xd->current_base_qindex = cm->quant_params.base_qindex;
1165
0
    if (cm->delta_q_info.delta_lf_present_flag) {
1166
0
      av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1167
0
    }
1168
0
  }
1169
1170
0
  reset_thresh_freq_fact(x);
1171
1172
  // Code each SB in the row
1173
0
  for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1174
0
       mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1175
    // In realtime/allintra mode and when frequency of cost updates is off/tile,
1176
    // wait for the top superblock to finish encoding. Otherwise, wait for the
1177
    // top-right superblock to finish encoding.
1178
0
    enc_row_mt->sync_read_ptr(
1179
0
        row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1180
1181
0
#if CONFIG_MULTITHREAD
1182
0
    if (row_mt_enabled) {
1183
0
      pthread_mutex_lock(enc_row_mt->mutex_);
1184
0
      const bool row_mt_exit = enc_row_mt->row_mt_exit;
1185
0
      pthread_mutex_unlock(enc_row_mt->mutex_);
1186
      // Exit in case any worker has encountered an error.
1187
0
      if (row_mt_exit) return;
1188
0
    }
1189
0
#endif
1190
1191
0
    const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1192
0
    if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1193
0
      if ((tile_info->mi_col_start == mi_col)) {
1194
        // restore frame context at the 1st column sb
1195
0
        memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1196
0
      } else {
1197
        // update context
1198
0
        int wt_left = AVG_CDF_WEIGHT_LEFT;
1199
0
        int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1200
0
        if (tile_info->mi_col_end > (mi_col + mib_size))
1201
0
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1202
0
                              wt_left, wt_tr);
1203
0
        else
1204
0
          av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1205
0
                              wt_left, wt_tr);
1206
0
      }
1207
0
    }
1208
1209
    // Update the rate cost tables for some symbols
1210
0
    av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1211
1212
    // Reset color coding related parameters
1213
0
    av1_zero(x->color_sensitivity_sb);
1214
0
    av1_zero(x->color_sensitivity_sb_g);
1215
0
    av1_zero(x->color_sensitivity_sb_alt);
1216
0
    av1_zero(x->color_sensitivity);
1217
0
    x->content_state_sb.source_sad_nonrd = kMedSad;
1218
0
    x->content_state_sb.source_sad_rd = kMedSad;
1219
0
    x->content_state_sb.lighting_change = 0;
1220
0
    x->content_state_sb.low_sumdiff = 0;
1221
0
    x->force_zeromv_skip_for_sb = 0;
1222
0
    x->sb_me_block = 0;
1223
0
    x->sb_me_partition = 0;
1224
0
    x->sb_me_mv.as_int = 0;
1225
0
    x->sb_force_fixed_part = 1;
1226
0
    x->color_palette_thresh = 64;
1227
0
    x->force_color_check_block_level = 0;
1228
0
    x->nonrd_prune_ref_frame_search =
1229
0
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1230
1231
0
    if (cpi->oxcf.mode == ALLINTRA) {
1232
0
      x->intra_sb_rdmult_modifier = 128;
1233
0
    }
1234
1235
0
    xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1236
0
    x->source_variance = UINT_MAX;
1237
0
    td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1238
1239
    // Get segment id and skip flag
1240
0
    const struct segmentation *const seg = &cm->seg;
1241
0
    int seg_skip = 0;
1242
0
    if (seg->enabled) {
1243
0
      const uint8_t *const map =
1244
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1245
0
      const uint8_t segment_id =
1246
0
          map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1247
0
              : 0;
1248
0
      seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1249
0
    }
1250
1251
0
    produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1252
1253
0
    init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1254
0
                                        sb_size);
1255
1256
    // Grade the temporal variation of the sb, the grade will be used to decide
1257
    // fast mode search strategy for coding blocks
1258
0
    if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1259
1260
    // encode the superblock
1261
0
    if (use_nonrd_mode) {
1262
0
      encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1263
0
    } else {
1264
0
      encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1265
0
    }
1266
1267
    // Update the top-right context in row_mt coding
1268
0
    if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1269
0
      if (sb_cols_in_tile == 1)
1270
0
        memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1271
0
      else if (sb_col_in_tile >= 1)
1272
0
        memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1273
0
               sizeof(*xd->tile_ctx));
1274
0
    }
1275
0
    enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1276
0
                               sb_cols_in_tile);
1277
0
  }
1278
1279
#if CONFIG_COLLECT_COMPONENT_TIMING
1280
  end_timing(cpi, encode_sb_row_time);
1281
#endif
1282
0
}
1283
1284
0
static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1285
0
  AV1_COMMON *const cm = &cpi->common;
1286
0
  const int num_planes = av1_num_planes(cm);
1287
0
  MACROBLOCK *const x = &cpi->td.mb;
1288
0
  MACROBLOCKD *const xd = &x->e_mbd;
1289
1290
  // Copy data over into macro block data structures.
1291
0
  av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1292
0
                       cm->seq_params->sb_size);
1293
1294
0
  av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1295
0
                         cm->seq_params->subsampling_y, num_planes);
1296
0
}
1297
1298
0
void av1_alloc_tile_data(AV1_COMP *cpi) {
1299
0
  AV1_COMMON *const cm = &cpi->common;
1300
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1301
0
  const int tile_cols = cm->tiles.cols;
1302
0
  const int tile_rows = cm->tiles.rows;
1303
1304
0
  av1_row_mt_mem_dealloc(cpi);
1305
1306
0
  aom_free(cpi->tile_data);
1307
0
  cpi->allocated_tiles = 0;
1308
0
  enc_row_mt->allocated_tile_cols = 0;
1309
0
  enc_row_mt->allocated_tile_rows = 0;
1310
1311
0
  CHECK_MEM_ERROR(
1312
0
      cm, cpi->tile_data,
1313
0
      aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1314
1315
0
  cpi->allocated_tiles = tile_cols * tile_rows;
1316
0
  enc_row_mt->allocated_tile_cols = tile_cols;
1317
0
  enc_row_mt->allocated_tile_rows = tile_rows;
1318
0
  for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1319
0
    for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1320
0
      const int tile_index = tile_row * tile_cols + tile_col;
1321
0
      TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1322
0
      av1_zero(this_tile->row_mt_sync);
1323
0
      this_tile->row_ctx = NULL;
1324
0
    }
1325
0
  }
1326
0
}
1327
1328
0
void av1_init_tile_data(AV1_COMP *cpi) {
1329
0
  AV1_COMMON *const cm = &cpi->common;
1330
0
  const int num_planes = av1_num_planes(cm);
1331
0
  const int tile_cols = cm->tiles.cols;
1332
0
  const int tile_rows = cm->tiles.rows;
1333
0
  int tile_col, tile_row;
1334
0
  TokenInfo *const token_info = &cpi->token_info;
1335
0
  TokenExtra *pre_tok = token_info->tile_tok[0][0];
1336
0
  TokenList *tplist = token_info->tplist[0][0];
1337
0
  unsigned int tile_tok = 0;
1338
0
  int tplist_count = 0;
1339
1340
0
  if (!is_stat_generation_stage(cpi) &&
1341
0
      cm->features.allow_screen_content_tools) {
1342
    // Number of tokens for which token info needs to be allocated.
1343
0
    unsigned int tokens_required =
1344
0
        get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1345
0
                        MAX_SB_SIZE_LOG2, num_planes);
1346
    // Allocate/reallocate memory for token related info if the number of tokens
1347
    // required is more than the number of tokens already allocated. This could
1348
    // occur in case of the following:
1349
    // 1) If the memory is not yet allocated
1350
    // 2) If the frame dimensions have changed
1351
0
    const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1352
0
    if (realloc_tokens) {
1353
0
      free_token_info(token_info);
1354
0
      alloc_token_info(cm, token_info, tokens_required);
1355
0
      pre_tok = token_info->tile_tok[0][0];
1356
0
      tplist = token_info->tplist[0][0];
1357
0
    }
1358
0
  }
1359
1360
0
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1361
0
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1362
0
      TileDataEnc *const tile_data =
1363
0
          &cpi->tile_data[tile_row * tile_cols + tile_col];
1364
0
      TileInfo *const tile_info = &tile_data->tile_info;
1365
0
      av1_tile_init(tile_info, cm, tile_row, tile_col);
1366
0
      tile_data->firstpass_top_mv = kZeroMv;
1367
0
      tile_data->abs_sum_level = 0;
1368
1369
0
      if (is_token_info_allocated(token_info)) {
1370
0
        token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1371
0
        pre_tok = token_info->tile_tok[tile_row][tile_col];
1372
0
        tile_tok = allocated_tokens(
1373
0
            tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1374
0
            num_planes);
1375
0
        token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1376
0
        tplist = token_info->tplist[tile_row][tile_col];
1377
0
        tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1378
0
      }
1379
0
      tile_data->allow_update_cdf = !cm->tiles.large_scale;
1380
0
      tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1381
0
                                    !cm->features.disable_cdf_update &&
1382
0
                                    !delay_wait_for_top_right_sb(cpi);
1383
0
      tile_data->tctx = *cm->fc;
1384
0
    }
1385
0
  }
1386
0
}
1387
1388
// Populate the start palette token info prior to encoding an SB row.
1389
static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1390
                                   int tile_row, int tile_col, int mi_row,
1391
0
                                   TokenExtra **tp) {
1392
0
  const TokenInfo *token_info = &cpi->token_info;
1393
0
  if (!is_token_info_allocated(token_info)) return;
1394
1395
0
  const AV1_COMMON *cm = &cpi->common;
1396
0
  const int num_planes = av1_num_planes(cm);
1397
0
  TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1398
0
  const int sb_row_in_tile =
1399
0
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1400
1401
0
  get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1402
0
                cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1403
0
  assert(tplist != NULL);
1404
0
  tplist[sb_row_in_tile].start = *tp;
1405
0
}
1406
1407
// Populate the token count after encoding an SB row.
1408
static inline void populate_token_count(AV1_COMP *cpi,
1409
                                        const TileInfo *tile_info, int tile_row,
1410
                                        int tile_col, int mi_row,
1411
0
                                        TokenExtra *tok) {
1412
0
  const TokenInfo *token_info = &cpi->token_info;
1413
0
  if (!is_token_info_allocated(token_info)) return;
1414
1415
0
  const AV1_COMMON *cm = &cpi->common;
1416
0
  const int num_planes = av1_num_planes(cm);
1417
0
  TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1418
0
  const int sb_row_in_tile =
1419
0
      (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1420
0
  const int tile_mb_cols =
1421
0
      (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1422
0
  const int num_mb_rows_in_sb =
1423
0
      ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1424
0
  tplist[sb_row_in_tile].count =
1425
0
      (unsigned int)(tok - tplist[sb_row_in_tile].start);
1426
1427
0
  assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1428
0
         get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1429
0
                         cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1430
0
                         num_planes));
1431
1432
0
  (void)num_planes;
1433
0
  (void)tile_mb_cols;
1434
0
  (void)num_mb_rows_in_sb;
1435
0
}
1436
1437
/*!\brief Encode a superblock row
1438
 *
1439
 * \ingroup partition_search
1440
 */
1441
void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1442
0
                       int tile_col, int mi_row) {
1443
0
  AV1_COMMON *const cm = &cpi->common;
1444
0
  const int tile_cols = cm->tiles.cols;
1445
0
  TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1446
0
  const TileInfo *const tile_info = &this_tile->tile_info;
1447
0
  TokenExtra *tok = NULL;
1448
1449
0
  get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1450
1451
0
  encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1452
1453
0
  populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1454
0
}
1455
1456
/*!\brief Encode a tile
1457
 *
1458
 * \ingroup partition_search
1459
 */
1460
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1461
0
                     int tile_col) {
1462
0
  AV1_COMMON *const cm = &cpi->common;
1463
0
  TileDataEnc *const this_tile =
1464
0
      &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1465
0
  const TileInfo *const tile_info = &this_tile->tile_info;
1466
1467
0
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1468
1469
0
  av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1470
0
                         tile_info->mi_col_end, tile_row);
1471
0
  av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1472
0
                         &td->mb.e_mbd);
1473
1474
0
#if !CONFIG_REALTIME_ONLY
1475
0
  if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1476
0
    cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1477
0
#endif
1478
1479
0
  if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1480
0
    av1_crc32c_calculator_init(
1481
0
        &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1482
0
  }
1483
1484
0
  for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1485
0
       mi_row += cm->seq_params->mib_size) {
1486
0
    av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1487
0
  }
1488
0
  this_tile->abs_sum_level = td->abs_sum_level;
1489
0
}
1490
1491
/*!\brief Break one frame into tiles and encode the tiles
1492
 *
1493
 * \ingroup partition_search
1494
 *
1495
 * \param[in]    cpi    Top-level encoder structure
1496
 */
1497
0
static inline void encode_tiles(AV1_COMP *cpi) {
1498
0
  AV1_COMMON *const cm = &cpi->common;
1499
0
  const int tile_cols = cm->tiles.cols;
1500
0
  const int tile_rows = cm->tiles.rows;
1501
0
  int tile_col, tile_row;
1502
1503
0
  MACROBLOCK *const mb = &cpi->td.mb;
1504
0
  assert(IMPLIES(cpi->tile_data == NULL,
1505
0
                 cpi->allocated_tiles < tile_cols * tile_rows));
1506
0
  if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1507
1508
0
  av1_init_tile_data(cpi);
1509
0
  av1_alloc_mb_data(cpi, mb);
1510
1511
0
  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1512
0
    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1513
0
      TileDataEnc *const this_tile =
1514
0
          &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1515
0
      cpi->td.intrabc_used = 0;
1516
0
      cpi->td.deltaq_used = 0;
1517
0
      cpi->td.abs_sum_level = 0;
1518
0
      cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1519
0
      cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1520
0
      cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1521
0
      cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1522
0
      av1_init_rtc_counters(&cpi->td.mb);
1523
0
      cpi->td.mb.palette_pixels = 0;
1524
0
      av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1525
0
      if (!frame_is_intra_only(&cpi->common))
1526
0
        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1527
0
      cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1528
0
      cpi->intrabc_used |= cpi->td.intrabc_used;
1529
0
      cpi->deltaq_used |= cpi->td.deltaq_used;
1530
0
    }
1531
0
  }
1532
1533
0
  av1_dealloc_mb_data(mb, av1_num_planes(cm));
1534
0
}
1535
1536
// Set the relative distance of a reference frame w.r.t. current frame
1537
static inline void set_rel_frame_dist(
1538
    const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1539
0
    const int ref_frame_flags) {
1540
0
  MV_REFERENCE_FRAME ref_frame;
1541
0
  int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1542
0
  ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1543
0
  ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1544
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1545
0
    ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1546
0
    if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1547
0
      int dist = av1_encoder_get_relative_dist(
1548
0
          cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1549
0
          cm->current_frame.display_order_hint);
1550
0
      ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1551
      // Get the nearest ref_frame in the past
1552
0
      if (abs(dist) < min_past_dist && dist < 0) {
1553
0
        ref_frame_dist_info->nearest_past_ref = ref_frame;
1554
0
        min_past_dist = abs(dist);
1555
0
      }
1556
      // Get the nearest ref_frame in the future
1557
0
      if (dist < min_future_dist && dist > 0) {
1558
0
        ref_frame_dist_info->nearest_future_ref = ref_frame;
1559
0
        min_future_dist = dist;
1560
0
      }
1561
0
    }
1562
0
  }
1563
0
}
1564
1565
0
static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1566
0
  assert(!frame_is_intra_only(cm));
1567
1568
0
  int one_sided_refs = 1;
1569
0
  const int cur_display_order_hint = cm->current_frame.display_order_hint;
1570
0
  for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1571
0
    const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1572
0
    if (buf == NULL) continue;
1573
0
    if (av1_encoder_get_relative_dist(buf->display_order_hint,
1574
0
                                      cur_display_order_hint) > 0) {
1575
0
      one_sided_refs = 0;  // bwd reference
1576
0
      break;
1577
0
    }
1578
0
  }
1579
0
  return one_sided_refs;
1580
0
}
1581
1582
static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1583
0
                                             int ref_order_hint[2]) {
1584
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1585
0
  ref_order_hint[0] = ref_order_hint[1] = 0;
1586
0
  if (!skip_mode_info->skip_mode_allowed) return;
1587
1588
0
  const RefCntBuffer *const buf_0 =
1589
0
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1590
0
  const RefCntBuffer *const buf_1 =
1591
0
      get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1592
0
  assert(buf_0 != NULL && buf_1 != NULL);
1593
1594
0
  ref_order_hint[0] = buf_0->order_hint;
1595
0
  ref_order_hint[1] = buf_1->order_hint;
1596
0
}
1597
1598
0
static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1599
0
  AV1_COMMON *const cm = &cpi->common;
1600
1601
0
  av1_setup_skip_mode_allowed(cm);
1602
0
  if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1603
1604
  // Turn off skip mode if the temporal distances of the reference pair to the
1605
  // current frame are different by more than 1 frame.
1606
0
  const int cur_offset = (int)cm->current_frame.order_hint;
1607
0
  int ref_offset[2];
1608
0
  get_skip_mode_ref_offsets(cm, ref_offset);
1609
0
  const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1610
0
                                            cur_offset, ref_offset[0]);
1611
0
  const int cur_to_ref1 = abs(get_relative_dist(
1612
0
      &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1613
0
  if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1614
1615
  // High Latency: Turn off skip mode if all refs are fwd.
1616
0
  if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1617
1618
0
  const int ref_frame[2] = {
1619
0
    cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1620
0
    cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1621
0
  };
1622
0
  if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1623
0
      !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1624
0
    return 0;
1625
1626
0
  return 1;
1627
0
}
1628
1629
static inline void set_default_interp_skip_flags(
1630
0
    const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1631
0
  const int num_planes = av1_num_planes(cm);
1632
0
  interp_search_flags->default_interp_skip_flags =
1633
0
      (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1634
0
                        : INTERP_SKIP_LUMA_SKIP_CHROMA;
1635
0
}
1636
1637
0
static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1638
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1639
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
1640
0
      cpi->all_one_sided_refs) {
1641
    // Disable all compound references
1642
0
    cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1643
0
  } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1644
0
             cpi->sf.inter_sf.selective_ref_frame >= 2) {
1645
0
    AV1_COMMON *const cm = &cpi->common;
1646
0
    const int cur_frame_display_order_hint =
1647
0
        cm->current_frame.display_order_hint;
1648
0
    unsigned int *ref_display_order_hint =
1649
0
        cm->cur_frame->ref_display_order_hint;
1650
0
    const int arf2_dist = av1_encoder_get_relative_dist(
1651
0
        ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1652
0
        cur_frame_display_order_hint);
1653
0
    const int bwd_dist = av1_encoder_get_relative_dist(
1654
0
        ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1655
0
        cur_frame_display_order_hint);
1656
1657
0
    for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1658
0
      MV_REFERENCE_FRAME rf[2];
1659
0
      av1_set_ref_frame(rf, ref_idx);
1660
0
      if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1661
0
          !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1662
0
        continue;
1663
0
      }
1664
1665
0
      if (!cpi->all_one_sided_refs) {
1666
0
        int ref_dist[2];
1667
0
        for (int i = 0; i < 2; ++i) {
1668
0
          ref_dist[i] = av1_encoder_get_relative_dist(
1669
0
              ref_display_order_hint[rf[i] - LAST_FRAME],
1670
0
              cur_frame_display_order_hint);
1671
0
        }
1672
1673
        // One-sided compound is used only when all reference frames are
1674
        // one-sided.
1675
0
        if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1676
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1677
0
        }
1678
0
      }
1679
1680
0
      if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1681
0
          (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1682
0
          (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1683
        // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1684
0
        if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1685
          // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1686
          // reference to the current frame than ALTREF2_FRAME
1687
0
          cpi->prune_ref_frame_mask |= 1 << ref_idx;
1688
0
        }
1689
0
      }
1690
0
    }
1691
0
  }
1692
0
}
1693
1694
0
static int allow_deltaq_mode(AV1_COMP *cpi) {
1695
0
#if !CONFIG_REALTIME_ONLY
1696
0
  AV1_COMMON *const cm = &cpi->common;
1697
0
  BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1698
0
  int sbs_wide = mi_size_wide[sb_size];
1699
0
  int sbs_high = mi_size_high[sb_size];
1700
1701
0
  int64_t delta_rdcost = 0;
1702
0
  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1703
0
    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1704
0
      int64_t this_delta_rdcost = 0;
1705
0
      av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1706
0
                                     mi_row, mi_col);
1707
0
      delta_rdcost += this_delta_rdcost;
1708
0
    }
1709
0
  }
1710
0
  return delta_rdcost < 0;
1711
#else
1712
  (void)cpi;
1713
  return 1;
1714
#endif  // !CONFIG_REALTIME_ONLY
1715
0
}
1716
1717
0
#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1718
#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1719
1720
// Populates block level thresholds for force zeromv-skip decision
1721
0
static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1722
0
  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1723
1724
  // Threshold for forcing zeromv-skip decision is as below:
1725
  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1726
  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1727
  // allowing slightly higher error for smaller blocks.
1728
  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1729
  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1730
  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1731
  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1732
  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1733
  // small blocks, the same is clipped to 4.
1734
0
  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1735
0
  const int num_128x128_pix =
1736
0
      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1737
1738
0
  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1739
0
    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1740
1741
    // Calculate the threshold for zeromv-skip decision based on area of the
1742
    // partition
1743
0
    unsigned int thresh_exit_part_blk =
1744
0
        (unsigned int)(thresh_exit_128x128_part *
1745
0
                           sqrt((double)num_block_pix / num_128x128_pix) +
1746
0
                       0.5);
1747
0
    thresh_exit_part_blk = AOMMIN(
1748
0
        thresh_exit_part_blk,
1749
0
        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1750
0
    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1751
0
  }
1752
0
}
1753
1754
static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1755
0
                                    int8_t *is_block_same[2][3]) {
1756
0
  for (int k = 0; k < 2; ++k) {
1757
0
    for (int j = 0; j < 2; ++j) {
1758
0
      aom_free(block_hash_values[k][j]);
1759
0
    }
1760
1761
0
    for (int j = 0; j < 3; ++j) {
1762
0
      aom_free(is_block_same[k][j]);
1763
0
    }
1764
0
  }
1765
0
}
1766
1767
/*!\brief Determines delta_q_res value for Variance Boost modulation.
1768
 */
1769
0
static int aom_get_variance_boost_delta_q_res(int qindex) {
1770
  // Signaling delta_q changes across superblocks comes with inherent syntax
1771
  // element overhead, which adds up to total payload size. This overhead
1772
  // becomes proportionally bigger the higher the base qindex (i.e. lower
1773
  // quality, smaller file size), so a balance needs to be struck.
1774
  // - Smaller delta_q_res: more granular delta_q control, more bits spent
1775
  // signaling deltas.
1776
  // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1777
  // deltas.
1778
  //
1779
  // At the same time, SB qindex fluctuations become larger the higher
1780
  // the base qindex (between lowest and highest-variance regions):
1781
  // - For QP 5: up to 8 qindexes
1782
  // - For QP 60: up to 52 qindexes
1783
  //
1784
  // With these factors in mind, it was found that the best strategy that
1785
  // maximizes quality per bitrate is by having very finely-grained delta_q
1786
  // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1787
  // and progressively making them coarser as base qindex increases (to reduce
1788
  // total signaling overhead).
1789
0
  int delta_q_res = 1;
1790
1791
0
  if (qindex >= 160) {
1792
0
    delta_q_res = 8;
1793
0
  } else if (qindex >= 120) {
1794
0
    delta_q_res = 4;
1795
0
  } else if (qindex >= 80) {
1796
0
    delta_q_res = 2;
1797
0
  } else {
1798
0
    delta_q_res = 1;
1799
0
  }
1800
1801
0
  return delta_q_res;
1802
0
}
1803
1804
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1805
 * for a single frame
1806
 *
1807
 * \ingroup high_level_algo
1808
 */
1809
0
static inline void encode_frame_internal(AV1_COMP *cpi) {
1810
0
  ThreadData *const td = &cpi->td;
1811
0
  MACROBLOCK *const x = &td->mb;
1812
0
  AV1_COMMON *const cm = &cpi->common;
1813
0
  CommonModeInfoParams *const mi_params = &cm->mi_params;
1814
0
  FeatureFlags *const features = &cm->features;
1815
0
  MACROBLOCKD *const xd = &x->e_mbd;
1816
0
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
1817
#if CONFIG_FPMT_TEST
1818
  FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1819
  FrameProbInfo *const temp_frame_probs_simulation =
1820
      &cpi->ppi->temp_frame_probs_simulation;
1821
#endif
1822
0
  FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1823
0
  IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1824
0
  MultiThreadInfo *const mt_info = &cpi->mt_info;
1825
0
  AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1826
0
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1827
0
  const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1828
0
  int i;
1829
1830
0
  if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1831
0
    mi_params->setup_mi(mi_params);
1832
0
  }
1833
1834
0
  set_mi_offsets(mi_params, xd, 0, 0);
1835
1836
0
  av1_zero(*td->counts);
1837
0
  av1_zero(rdc->tx_type_used);
1838
0
  av1_zero(rdc->obmc_used);
1839
0
  av1_zero(rdc->warped_used);
1840
0
  av1_zero(rdc->seg_tmp_pred_cost);
1841
1842
  // Reset the flag.
1843
0
  cpi->intrabc_used = 0;
1844
  // Need to disable intrabc when superres is selected
1845
0
  if (av1_superres_scaled(cm)) {
1846
0
    features->allow_intrabc = 0;
1847
0
  }
1848
1849
0
  features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1850
1851
0
  if (features->allow_warped_motion &&
1852
0
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1853
0
    const FRAME_UPDATE_TYPE update_type =
1854
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1855
0
    int warped_probability =
1856
#if CONFIG_FPMT_TEST
1857
        cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1858
            ? temp_frame_probs->warped_probs[update_type]
1859
            :
1860
#endif  // CONFIG_FPMT_TEST
1861
0
            frame_probs->warped_probs[update_type];
1862
0
    if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1863
0
      features->allow_warped_motion = 0;
1864
0
  }
1865
1866
0
  int hash_table_created = 0;
1867
0
  if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1868
0
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1869
    // TODO(any): move this outside of the recoding loop to avoid recalculating
1870
    // the hash table.
1871
    // add to hash table
1872
0
    const int pic_width = cpi->source->y_crop_width;
1873
0
    const int pic_height = cpi->source->y_crop_height;
1874
0
    uint32_t *block_hash_values[2][2] = { { NULL } };
1875
0
    int8_t *is_block_same[2][3] = { { NULL } };
1876
0
    int k, j;
1877
0
    bool error = false;
1878
1879
0
    for (k = 0; k < 2 && !error; ++k) {
1880
0
      for (j = 0; j < 2; ++j) {
1881
0
        block_hash_values[k][j] = (uint32_t *)aom_malloc(
1882
0
            sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1883
0
        if (!block_hash_values[k][j]) {
1884
0
          error = true;
1885
0
          break;
1886
0
        }
1887
0
      }
1888
1889
0
      for (j = 0; j < 3 && !error; ++j) {
1890
0
        is_block_same[k][j] = (int8_t *)aom_malloc(
1891
0
            sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1892
0
        if (!is_block_same[k][j]) error = true;
1893
0
      }
1894
0
    }
1895
1896
0
    av1_hash_table_init(intrabc_hash_info);
1897
0
    if (error ||
1898
0
        !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1899
0
      free_block_hash_buffers(block_hash_values, is_block_same);
1900
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1901
0
                         "Error allocating intrabc_hash_table and buffers");
1902
0
    }
1903
0
    hash_table_created = 1;
1904
0
    av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1905
0
                                      block_hash_values[0], is_block_same[0]);
1906
    // Hash data generated for screen contents is used for intraBC ME
1907
0
    const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1908
0
    const int max_sb_size =
1909
0
        (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1910
0
    int src_idx = 0;
1911
0
    for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1912
0
      const int dst_idx = !src_idx;
1913
0
      av1_generate_block_hash_value(
1914
0
          intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1915
0
          block_hash_values[dst_idx], is_block_same[src_idx],
1916
0
          is_block_same[dst_idx]);
1917
0
      if (size >= min_alloc_size) {
1918
0
        if (!av1_add_to_hash_map_by_row_with_precal_data(
1919
0
                &intrabc_hash_info->intrabc_hash_table,
1920
0
                block_hash_values[dst_idx], is_block_same[dst_idx][2],
1921
0
                pic_width, pic_height, size)) {
1922
0
          error = true;
1923
0
          break;
1924
0
        }
1925
0
      }
1926
0
    }
1927
1928
0
    free_block_hash_buffers(block_hash_values, is_block_same);
1929
1930
0
    if (error) {
1931
0
      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1932
0
                         "Error adding data to intrabc_hash_table");
1933
0
    }
1934
0
  }
1935
1936
0
  const CommonQuantParams *quant_params = &cm->quant_params;
1937
0
  for (i = 0; i < MAX_SEGMENTS; ++i) {
1938
0
    const int qindex =
1939
0
        cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1940
0
                        : quant_params->base_qindex;
1941
0
    xd->lossless[i] =
1942
0
        qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1943
0
        quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1944
0
        quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1945
0
    if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1946
0
    xd->qindex[i] = qindex;
1947
0
    if (xd->lossless[i]) {
1948
0
      cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1949
0
    } else {
1950
0
      cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1951
0
    }
1952
0
  }
1953
0
  features->coded_lossless = is_coded_lossless(cm, xd);
1954
0
  features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1955
1956
  // Fix delta q resolution for the moment
1957
1958
0
  cm->delta_q_info.delta_q_res = 0;
1959
0
  if (cpi->use_ducky_encode) {
1960
0
    cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1961
0
  } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1962
0
    if (deltaq_mode == DELTA_Q_OBJECTIVE)
1963
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1964
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1965
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1966
0
    else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1967
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1968
0
    else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1969
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1970
0
    else if (deltaq_mode == DELTA_Q_HDR)
1971
0
      cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1972
0
    else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
1973
0
      cm->delta_q_info.delta_q_res =
1974
0
          aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
1975
    // Set delta_q_present_flag before it is used for the first time
1976
0
    cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1977
0
    cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1978
1979
    // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1980
    // is used for ineligible frames. That effectively will turn off row_mt
1981
    // usage. Note objective delta_q and tpl eligible frames are only altref
1982
    // frames currently.
1983
0
    const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1984
0
    if (cm->delta_q_info.delta_q_present_flag) {
1985
0
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1986
0
          gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1987
0
        cm->delta_q_info.delta_q_present_flag = 0;
1988
1989
0
      if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1990
0
          cm->delta_q_info.delta_q_present_flag) {
1991
0
        cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1992
0
      }
1993
0
    }
1994
1995
    // Reset delta_q_used flag
1996
0
    cpi->deltaq_used = 0;
1997
1998
0
    cm->delta_q_info.delta_lf_present_flag =
1999
0
        cm->delta_q_info.delta_q_present_flag &&
2000
0
        oxcf->tool_cfg.enable_deltalf_mode;
2001
0
    cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2002
2003
    // update delta_q_present_flag and delta_lf_present_flag based on
2004
    // base_qindex
2005
0
    cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2006
0
    cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2007
0
  } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2008
0
             cpi->svc.number_temporal_layers == 1) {
2009
0
    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2010
0
    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2011
0
  }
2012
0
  cpi->rc.cnt_zeromv = 0;
2013
2014
0
  av1_frame_init_quantizer(cpi);
2015
0
  init_encode_frame_mb_context(cpi);
2016
0
  set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2017
2018
0
  if (cm->prev_frame && cm->prev_frame->seg.enabled)
2019
0
    cm->last_frame_seg_map = cm->prev_frame->seg_map;
2020
0
  else
2021
0
    cm->last_frame_seg_map = NULL;
2022
0
  if (features->allow_intrabc || features->coded_lossless) {
2023
0
    av1_set_default_ref_deltas(cm->lf.ref_deltas);
2024
0
    av1_set_default_mode_deltas(cm->lf.mode_deltas);
2025
0
  } else if (cm->prev_frame) {
2026
0
    memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2027
0
    memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2028
0
  }
2029
0
  memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2030
0
  memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2031
2032
0
  cpi->all_one_sided_refs =
2033
0
      frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2034
2035
0
  cpi->prune_ref_frame_mask = 0;
2036
  // Figure out which ref frames can be skipped at frame level.
2037
0
  setup_prune_ref_frame_mask(cpi);
2038
2039
0
  x->txfm_search_info.txb_split_count = 0;
2040
#if CONFIG_SPEED_STATS
2041
  x->txfm_search_info.tx_search_count = 0;
2042
#endif  // CONFIG_SPEED_STATS
2043
2044
0
#if !CONFIG_REALTIME_ONLY
2045
#if CONFIG_COLLECT_COMPONENT_TIMING
2046
  start_timing(cpi, av1_compute_global_motion_time);
2047
#endif
2048
0
  av1_compute_global_motion_facade(cpi);
2049
#if CONFIG_COLLECT_COMPONENT_TIMING
2050
  end_timing(cpi, av1_compute_global_motion_time);
2051
#endif
2052
0
#endif  // !CONFIG_REALTIME_ONLY
2053
2054
#if CONFIG_COLLECT_COMPONENT_TIMING
2055
  start_timing(cpi, av1_setup_motion_field_time);
2056
#endif
2057
0
  av1_calculate_ref_frame_side(cm);
2058
0
  if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2059
#if CONFIG_COLLECT_COMPONENT_TIMING
2060
  end_timing(cpi, av1_setup_motion_field_time);
2061
#endif
2062
2063
0
  cm->current_frame.skip_mode_info.skip_mode_flag =
2064
0
      check_skip_mode_enabled(cpi);
2065
2066
  // Initialization of skip mode cost depends on the value of
2067
  // 'skip_mode_flag'. This initialization happens in the function
2068
  // av1_fill_mode_rates(), which is in turn called in
2069
  // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2070
  // has to be called after 'skip_mode_flag' is initialized.
2071
0
  av1_initialize_rd_consts(cpi);
2072
0
  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2073
0
  populate_thresh_to_force_zeromv_skip(cpi);
2074
2075
0
  enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2076
0
  enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2077
0
  mt_info->row_mt_enabled = 0;
2078
0
  mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2079
0
                                       cm->tiles.cols * cm->tiles.rows) > 1;
2080
2081
0
  if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2082
0
    mt_info->row_mt_enabled = 1;
2083
0
    enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2084
0
    enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2085
0
    av1_encode_tiles_row_mt(cpi);
2086
0
  } else {
2087
0
    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2088
0
      av1_encode_tiles_mt(cpi);
2089
0
    } else {
2090
      // Preallocate the pc_tree for realtime coding to reduce the cost of
2091
      // memory allocation.
2092
0
      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2093
0
      if (use_nonrd_mode) {
2094
0
        td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2095
0
        if (!td->pc_root)
2096
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2097
0
                             "Failed to allocate PC_TREE");
2098
0
      } else {
2099
0
        td->pc_root = NULL;
2100
0
      }
2101
2102
0
      encode_tiles(cpi);
2103
0
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2104
0
                                 cpi->sf.part_sf.partition_search_type);
2105
0
      td->pc_root = NULL;
2106
0
    }
2107
0
  }
2108
2109
  // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2110
0
  if (features->allow_intrabc && !cpi->intrabc_used) {
2111
0
    features->allow_intrabc = 0;
2112
0
  }
2113
0
  if (features->allow_intrabc) {
2114
0
    cm->delta_q_info.delta_lf_present_flag = 0;
2115
0
  }
2116
2117
0
  if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2118
0
    cm->delta_q_info.delta_q_present_flag = 0;
2119
0
  }
2120
2121
  // Set the transform size appropriately before bitstream creation
2122
0
  const MODE_EVAL_TYPE eval_type =
2123
0
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2124
0
          ? WINNER_MODE_EVAL
2125
0
          : DEFAULT_EVAL;
2126
0
  const TX_SIZE_SEARCH_METHOD tx_search_type =
2127
0
      cpi->winner_mode_params.tx_size_search_methods[eval_type];
2128
0
  assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2129
0
  features->tx_mode = select_tx_mode(cm, tx_search_type);
2130
2131
  // Retain the frame level probability update conditions for parallel frames.
2132
  // These conditions will be consumed during postencode stage to update the
2133
  // probability.
2134
0
  if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2135
0
    cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2136
0
        cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2137
0
    cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2138
0
        (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2139
0
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2140
0
    cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2141
0
        (features->allow_warped_motion &&
2142
0
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2143
0
    cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2144
0
        (cm->current_frame.frame_type != KEY_FRAME &&
2145
0
         cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2146
0
         features->interp_filter == SWITCHABLE);
2147
0
  }
2148
2149
0
  if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2150
0
      ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2151
0
        INT_MAX) &&
2152
0
       (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2153
0
    const FRAME_UPDATE_TYPE update_type =
2154
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2155
0
    for (i = 0; i < TX_SIZES_ALL; i++) {
2156
0
      int sum = 0;
2157
0
      int j;
2158
0
      int left = MAX_TX_TYPE_PROB;
2159
2160
0
      for (j = 0; j < TX_TYPES; j++)
2161
0
        sum += cpi->td.rd_counts.tx_type_used[i][j];
2162
2163
0
      for (j = TX_TYPES - 1; j >= 0; j--) {
2164
0
        int update_txtype_frameprobs = 1;
2165
0
        const int new_prob =
2166
0
            sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2167
0
                        cpi->td.rd_counts.tx_type_used[i][j] / sum)
2168
0
                : (j ? 0 : MAX_TX_TYPE_PROB);
2169
#if CONFIG_FPMT_TEST
2170
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2171
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2172
              0) {
2173
            int prob =
2174
                (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2175
                 new_prob) >>
2176
                1;
2177
            left -= prob;
2178
            if (j == 0) prob += left;
2179
            temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2180
                prob;
2181
            // Copy temp_frame_probs_simulation to temp_frame_probs
2182
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2183
                 update_type_idx++) {
2184
              temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2185
                  temp_frame_probs_simulation
2186
                      ->tx_type_probs[update_type_idx][i][j];
2187
            }
2188
          }
2189
          update_txtype_frameprobs = 0;
2190
        }
2191
#endif  // CONFIG_FPMT_TEST
2192
        // Track the frame probabilities of parallel encode frames to update
2193
        // during postencode stage.
2194
0
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2195
0
          update_txtype_frameprobs = 0;
2196
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2197
0
              .tx_type_probs[update_type][i][j] = new_prob;
2198
0
        }
2199
0
        if (update_txtype_frameprobs) {
2200
0
          int prob =
2201
0
              (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2202
0
          left -= prob;
2203
0
          if (j == 0) prob += left;
2204
0
          frame_probs->tx_type_probs[update_type][i][j] = prob;
2205
0
        }
2206
0
      }
2207
0
    }
2208
0
  }
2209
2210
0
  if (cm->seg.enabled) {
2211
0
    cm->seg.temporal_update = 1;
2212
0
    if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2213
0
      cm->seg.temporal_update = 0;
2214
0
  }
2215
2216
0
  if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2217
0
      cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2218
0
    const FRAME_UPDATE_TYPE update_type =
2219
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2220
2221
0
    for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2222
0
      int sum = 0;
2223
0
      int update_obmc_frameprobs = 1;
2224
0
      for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2225
2226
0
      const int new_prob =
2227
0
          sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2228
#if CONFIG_FPMT_TEST
2229
      if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2230
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2231
          temp_frame_probs_simulation->obmc_probs[update_type][i] =
2232
              (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2233
               new_prob) >>
2234
              1;
2235
          // Copy temp_frame_probs_simulation to temp_frame_probs
2236
          for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2237
               update_type_idx++) {
2238
            temp_frame_probs->obmc_probs[update_type_idx][i] =
2239
                temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2240
          }
2241
        }
2242
        update_obmc_frameprobs = 0;
2243
      }
2244
#endif  // CONFIG_FPMT_TEST
2245
      // Track the frame probabilities of parallel encode frames to update
2246
      // during postencode stage.
2247
0
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2248
0
        update_obmc_frameprobs = 0;
2249
0
        cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2250
0
            new_prob;
2251
0
      }
2252
0
      if (update_obmc_frameprobs) {
2253
0
        frame_probs->obmc_probs[update_type][i] =
2254
0
            (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2255
0
      }
2256
0
    }
2257
0
  }
2258
2259
0
  if (features->allow_warped_motion &&
2260
0
      cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2261
0
    const FRAME_UPDATE_TYPE update_type =
2262
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2263
0
    int update_warp_frameprobs = 1;
2264
0
    int sum = 0;
2265
0
    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2266
0
    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2267
#if CONFIG_FPMT_TEST
2268
    if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2269
      if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2270
        temp_frame_probs_simulation->warped_probs[update_type] =
2271
            (temp_frame_probs_simulation->warped_probs[update_type] +
2272
             new_prob) >>
2273
            1;
2274
        // Copy temp_frame_probs_simulation to temp_frame_probs
2275
        for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2276
             update_type_idx++) {
2277
          temp_frame_probs->warped_probs[update_type_idx] =
2278
              temp_frame_probs_simulation->warped_probs[update_type_idx];
2279
        }
2280
      }
2281
      update_warp_frameprobs = 0;
2282
    }
2283
#endif  // CONFIG_FPMT_TEST
2284
    // Track the frame probabilities of parallel encode frames to update
2285
    // during postencode stage.
2286
0
    if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2287
0
      update_warp_frameprobs = 0;
2288
0
      cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2289
0
          new_prob;
2290
0
    }
2291
0
    if (update_warp_frameprobs) {
2292
0
      frame_probs->warped_probs[update_type] =
2293
0
          (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2294
0
    }
2295
0
  }
2296
2297
0
  if (cm->current_frame.frame_type != KEY_FRAME &&
2298
0
      cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2299
0
      features->interp_filter == SWITCHABLE) {
2300
0
    const FRAME_UPDATE_TYPE update_type =
2301
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2302
2303
0
    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2304
0
      int sum = 0;
2305
0
      int j;
2306
0
      int left = 1536;
2307
2308
0
      for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2309
0
        sum += cpi->td.counts->switchable_interp[i][j];
2310
0
      }
2311
2312
0
      for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2313
0
        int update_interpfilter_frameprobs = 1;
2314
0
        const int new_prob =
2315
0
            sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2316
0
                : (j ? 0 : 1536);
2317
#if CONFIG_FPMT_TEST
2318
        if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2319
          if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2320
              0) {
2321
            int prob = (temp_frame_probs_simulation
2322
                            ->switchable_interp_probs[update_type][i][j] +
2323
                        new_prob) >>
2324
                       1;
2325
            left -= prob;
2326
            if (j == 0) prob += left;
2327
            temp_frame_probs_simulation
2328
                ->switchable_interp_probs[update_type][i][j] = prob;
2329
            // Copy temp_frame_probs_simulation to temp_frame_probs
2330
            for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2331
                 update_type_idx++) {
2332
              temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2333
                  temp_frame_probs_simulation
2334
                      ->switchable_interp_probs[update_type_idx][i][j];
2335
            }
2336
          }
2337
          update_interpfilter_frameprobs = 0;
2338
        }
2339
#endif  // CONFIG_FPMT_TEST
2340
        // Track the frame probabilities of parallel encode frames to update
2341
        // during postencode stage.
2342
0
        if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2343
0
          update_interpfilter_frameprobs = 0;
2344
0
          cpi->frame_new_probs[cpi->num_frame_recode]
2345
0
              .switchable_interp_probs[update_type][i][j] = new_prob;
2346
0
        }
2347
0
        if (update_interpfilter_frameprobs) {
2348
0
          int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2349
0
                      new_prob) >>
2350
0
                     1;
2351
0
          left -= prob;
2352
0
          if (j == 0) prob += left;
2353
0
          frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2354
0
        }
2355
0
      }
2356
0
    }
2357
0
  }
2358
0
  if (hash_table_created) {
2359
0
    av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2360
0
  }
2361
0
}
2362
2363
/*!\brief Setup reference frame buffers and encode a frame
2364
 *
2365
 * \ingroup high_level_algo
2366
 * \callgraph
2367
 * \callergraph
2368
 *
2369
 * \param[in]    cpi    Top-level encoder structure
2370
 */
2371
0
void av1_encode_frame(AV1_COMP *cpi) {
2372
0
  AV1_COMMON *const cm = &cpi->common;
2373
0
  CurrentFrame *const current_frame = &cm->current_frame;
2374
0
  FeatureFlags *const features = &cm->features;
2375
0
  RD_COUNTS *const rdc = &cpi->td.rd_counts;
2376
0
  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2377
  // Indicates whether or not to use a default reduced set for ext-tx
2378
  // rather than the potential full set of 16 transforms
2379
0
  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2380
2381
  // Make sure segment_id is no larger than last_active_segid.
2382
0
  if (cm->seg.enabled && cm->seg.update_map) {
2383
0
    const int mi_rows = cm->mi_params.mi_rows;
2384
0
    const int mi_cols = cm->mi_params.mi_cols;
2385
0
    const int last_active_segid = cm->seg.last_active_segid;
2386
0
    uint8_t *map = cpi->enc_seg.map;
2387
0
    for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2388
0
      for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2389
0
        map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2390
0
      }
2391
0
      map += mi_cols;
2392
0
    }
2393
0
  }
2394
2395
0
  av1_setup_frame_buf_refs(cm);
2396
0
  enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2397
0
                         cm->cur_frame->ref_display_order_hint,
2398
0
                         cm->current_frame.display_order_hint);
2399
0
  set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2400
0
                     cpi->ref_frame_flags);
2401
0
  av1_setup_frame_sign_bias(cm);
2402
2403
  // If global motion is enabled, then every buffer which is used as either
2404
  // a source or a ref frame should have an image pyramid allocated.
2405
  // Check here so that issues can be caught early in debug mode
2406
#if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2407
  if (cpi->alloc_pyramid) {
2408
    assert(cpi->source->y_pyramid);
2409
    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2410
      const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2411
      if (buf != NULL) {
2412
        assert(buf->buf.y_pyramid);
2413
      }
2414
    }
2415
  }
2416
#endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2417
2418
#if CONFIG_MISMATCH_DEBUG
2419
  mismatch_reset_frame(av1_num_planes(cm));
2420
#endif
2421
2422
0
  rdc->newmv_or_intra_blocks = 0;
2423
0
  cpi->palette_pixel_num = 0;
2424
2425
0
  if (cpi->sf.hl_sf.frame_parameter_update ||
2426
0
      cpi->sf.rt_sf.use_comp_ref_nonrd) {
2427
0
    if (frame_is_intra_only(cm))
2428
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2429
0
    else
2430
0
      current_frame->reference_mode = REFERENCE_MODE_SELECT;
2431
2432
0
    features->interp_filter = SWITCHABLE;
2433
0
    if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2434
2435
0
    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2436
0
        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2437
2438
0
    rdc->compound_ref_used_flag = 0;
2439
0
    rdc->skip_mode_used_flag = 0;
2440
2441
0
    encode_frame_internal(cpi);
2442
2443
0
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2444
      // Use a flag that includes 4x4 blocks
2445
0
      if (rdc->compound_ref_used_flag == 0) {
2446
0
        current_frame->reference_mode = SINGLE_REFERENCE;
2447
#if CONFIG_ENTROPY_STATS
2448
        av1_zero(cpi->td.counts->comp_inter);
2449
#endif  // CONFIG_ENTROPY_STATS
2450
0
      }
2451
0
    }
2452
    // Re-check on the skip mode status as reference mode may have been
2453
    // changed.
2454
0
    SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2455
0
    if (frame_is_intra_only(cm) ||
2456
0
        current_frame->reference_mode == SINGLE_REFERENCE) {
2457
0
      skip_mode_info->skip_mode_allowed = 0;
2458
0
      skip_mode_info->skip_mode_flag = 0;
2459
0
    }
2460
0
    if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2461
0
      skip_mode_info->skip_mode_flag = 0;
2462
2463
0
    if (!cm->tiles.large_scale) {
2464
0
      if (features->tx_mode == TX_MODE_SELECT &&
2465
0
          cpi->td.mb.txfm_search_info.txb_split_count == 0)
2466
0
        features->tx_mode = TX_MODE_LARGEST;
2467
0
    }
2468
0
  } else {
2469
    // This is needed if real-time speed setting is changed on the fly
2470
    // from one using compound prediction to one using single reference.
2471
0
    if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2472
0
      current_frame->reference_mode = SINGLE_REFERENCE;
2473
0
    encode_frame_internal(cpi);
2474
0
  }
2475
0
}