Coverage Report

Created: 2022-08-24 06:15

/src/aom/av1/encoder/partition_search.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include "av1/common/av1_common_int.h"
13
#include "av1/common/blockd.h"
14
#include "av1/common/enums.h"
15
#include "av1/common/reconintra.h"
16
17
#include "av1/encoder/aq_complexity.h"
18
#include "av1/encoder/aq_variance.h"
19
#include "av1/encoder/context_tree.h"
20
#include "av1/encoder/encoder.h"
21
#include "av1/encoder/encodeframe.h"
22
#include "av1/encoder/encodeframe_utils.h"
23
#include "av1/encoder/encodemv.h"
24
#include "av1/encoder/motion_search_facade.h"
25
#include "av1/encoder/partition_search.h"
26
#include "av1/encoder/partition_strategy.h"
27
#include "av1/encoder/reconinter_enc.h"
28
#include "av1/encoder/tokenize.h"
29
#include "av1/encoder/var_based_part.h"
30
#include "av1/encoder/av1_ml_partition_models.h"
31
32
#if CONFIG_TUNE_VMAF
33
#include "av1/encoder/tune_vmaf.h"
34
#endif
35
36
187k
#define COLLECT_MOTION_SEARCH_FEATURE_SB 0
37
#define ML_PARTITION_WHOLE_TREE_DECISION 0
38
39
0
void av1_reset_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
40
0
  part_sf->partition_search_type = SEARCH_PARTITION;
41
0
  part_sf->less_rectangular_check_level = 0;
42
0
  part_sf->use_square_partition_only_threshold = BLOCK_128X128;
43
0
  part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
44
0
  part_sf->default_max_partition_size = BLOCK_LARGEST;
45
0
  part_sf->default_min_partition_size = BLOCK_4X4;
46
0
  part_sf->adjust_var_based_rd_partitioning = 0;
47
0
  part_sf->max_intra_bsize = BLOCK_LARGEST;
48
  // This setting only takes effect when partition_search_type is set
49
  // to FIXED_PARTITION.
50
0
  part_sf->fixed_partition_size = BLOCK_16X16;
51
  // Recode loop tolerance %.
52
0
  part_sf->partition_search_breakout_dist_thr = 0;
53
0
  part_sf->partition_search_breakout_rate_thr = 0;
54
0
  part_sf->prune_ext_partition_types_search_level = 0;
55
0
  part_sf->prune_part4_search = 0;
56
0
  part_sf->ml_prune_partition = 0;
57
0
  part_sf->ml_early_term_after_part_split_level = 0;
58
0
  for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
59
0
    part_sf->ml_partition_search_breakout_thresh[i] =
60
0
        -1;  // -1 means not enabled.
61
0
  }
62
0
  part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0;
63
0
  part_sf->simple_motion_search_split = 0;
64
0
  part_sf->simple_motion_search_prune_rect = 0;
65
0
  part_sf->simple_motion_search_early_term_none = 0;
66
0
  part_sf->simple_motion_search_reduce_search_steps = 0;
67
0
  part_sf->intra_cnn_based_part_prune_level = 0;
68
0
  part_sf->ext_partition_eval_thresh = BLOCK_8X8;
69
0
  part_sf->rect_partition_eval_thresh = BLOCK_128X128;
70
0
  part_sf->prune_ext_part_using_split_info = 0;
71
0
  part_sf->prune_rectangular_split_based_on_qidx = 0;
72
0
  part_sf->early_term_after_none_split = 0;
73
0
  part_sf->ml_predict_breakout_level = 0;
74
0
  part_sf->prune_sub_8x8_partition_level = 0;
75
0
  part_sf->simple_motion_search_rect_split = 0;
76
0
  part_sf->reuse_prev_rd_results_for_part_ab = 0;
77
0
  part_sf->reuse_best_prediction_for_part_ab = 0;
78
0
  part_sf->use_best_rd_for_pruning = 0;
79
0
  part_sf->skip_non_sq_part_based_on_none = 0;
80
0
}
81
82
// Reset speed features that works for the baseline encoding, but
83
// blocks the external partition search.
84
0
void av1_reset_sf_for_ext_part(AV1_COMP *const cpi) {
85
0
  cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions = 0;
86
0
}
87
88
#if !CONFIG_REALTIME_ONLY
89
// If input |features| is NULL, write tpl stats to file for each super block.
90
// Otherwise, store tpl stats to |features|.
91
// The tpl stats is computed in the unit of tpl_bsize_1d (16x16).
92
// When writing to text file:
93
// The first row contains super block position, super block size,
94
// tpl unit length, number of units in the super block.
95
// The second row contains the intra prediction cost for each unit.
96
// The third row contains the inter prediction cost for each unit.
97
// The forth row contains the motion compensated dependency cost for each unit.
98
static void collect_tpl_stats_sb(const AV1_COMP *const cpi,
99
                                 const BLOCK_SIZE bsize, const int mi_row,
100
                                 const int mi_col,
101
0
                                 aom_partition_features_t *features) {
102
0
  const AV1_COMMON *const cm = &cpi->common;
103
0
  GF_GROUP *gf_group = &cpi->ppi->gf_group;
104
0
  if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE ||
105
0
      gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) {
106
0
    return;
107
0
  }
108
109
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
110
0
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index];
111
0
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
112
  // If tpl stats is not established, early return
113
0
  if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) {
114
0
    if (features != NULL) features->sb_features.tpl_features.available = 0;
115
0
    return;
116
0
  }
117
118
0
  const int tpl_stride = tpl_frame->stride;
119
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
120
0
  const int mi_width =
121
0
      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
122
0
  const int mi_height =
123
0
      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
124
0
  const int col_steps = (mi_width / step) + ((mi_width % step) > 0);
125
0
  const int row_steps = (mi_height / step) + ((mi_height % step) > 0);
126
0
  const int num_blocks = col_steps * row_steps;
127
128
0
  if (features == NULL) {
129
0
    char filename[256];
130
0
    snprintf(filename, sizeof(filename), "%s/tpl_feature_sb%d",
131
0
             cpi->oxcf.partition_info_path, cpi->sb_counter);
132
0
    FILE *pfile = fopen(filename, "w");
133
0
    fprintf(pfile, "%d,%d,%d,%d,%d\n", mi_row, mi_col, bsize,
134
0
            tpl_data->tpl_bsize_1d, num_blocks);
135
0
    int count = 0;
136
0
    for (int row = 0; row < mi_height; row += step) {
137
0
      for (int col = 0; col < mi_width; col += step) {
138
0
        TplDepStats *this_stats =
139
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
140
0
                                       tpl_data->tpl_stats_block_mis_log2)];
141
0
        fprintf(pfile, "%.0f", (double)this_stats->intra_cost);
142
0
        if (count < num_blocks - 1) fprintf(pfile, ",");
143
0
        ++count;
144
0
      }
145
0
    }
146
0
    fprintf(pfile, "\n");
147
0
    count = 0;
148
0
    for (int row = 0; row < mi_height; row += step) {
149
0
      for (int col = 0; col < mi_width; col += step) {
150
0
        TplDepStats *this_stats =
151
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
152
0
                                       tpl_data->tpl_stats_block_mis_log2)];
153
0
        fprintf(pfile, "%.0f", (double)this_stats->inter_cost);
154
0
        if (count < num_blocks - 1) fprintf(pfile, ",");
155
0
        ++count;
156
0
      }
157
0
    }
158
0
    fprintf(pfile, "\n");
159
0
    count = 0;
160
0
    for (int row = 0; row < mi_height; row += step) {
161
0
      for (int col = 0; col < mi_width; col += step) {
162
0
        TplDepStats *this_stats =
163
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
164
0
                                       tpl_data->tpl_stats_block_mis_log2)];
165
0
        const int64_t mc_dep_delta =
166
0
            RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
167
0
                   this_stats->mc_dep_dist);
168
0
        fprintf(pfile, "%.0f", (double)mc_dep_delta);
169
0
        if (count < num_blocks - 1) fprintf(pfile, ",");
170
0
        ++count;
171
0
      }
172
0
    }
173
0
    fclose(pfile);
174
0
  } else {
175
0
    features->sb_features.tpl_features.available = 1;
176
0
    features->sb_features.tpl_features.tpl_unit_length = tpl_data->tpl_bsize_1d;
177
0
    features->sb_features.tpl_features.num_units = num_blocks;
178
0
    int count = 0;
179
0
    for (int row = 0; row < mi_height; row += step) {
180
0
      for (int col = 0; col < mi_width; col += step) {
181
0
        TplDepStats *this_stats =
182
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
183
0
                                       tpl_data->tpl_stats_block_mis_log2)];
184
0
        const int64_t mc_dep_delta =
185
0
            RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
186
0
                   this_stats->mc_dep_dist);
187
0
        features->sb_features.tpl_features.intra_cost[count] =
188
0
            this_stats->intra_cost;
189
0
        features->sb_features.tpl_features.inter_cost[count] =
190
0
            this_stats->inter_cost;
191
0
        features->sb_features.tpl_features.mc_dep_cost[count] = mc_dep_delta;
192
0
        ++count;
193
0
      }
194
0
    }
195
0
  }
196
0
}
197
#endif  // !CONFIG_REALTIME_ONLY
198
199
static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
200
                              FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
201
                              int blk_row, int blk_col,
202
0
                              uint8_t allow_update_cdf) {
203
0
  MB_MODE_INFO *mbmi = xd->mi[0];
204
0
  const BLOCK_SIZE bsize = mbmi->bsize;
205
0
  const int max_blocks_high = max_block_high(xd, bsize, 0);
206
0
  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
207
0
  int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
208
0
                                   xd->left_txfm_context + blk_row, mbmi->bsize,
209
0
                                   tx_size);
210
0
  const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
211
0
  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
212
213
0
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
214
0
  assert(tx_size > TX_4X4);
215
216
0
  if (depth == MAX_VARTX_DEPTH) {
217
    // Don't add to counts in this case
218
0
    mbmi->tx_size = tx_size;
219
0
    txfm_partition_update(xd->above_txfm_context + blk_col,
220
0
                          xd->left_txfm_context + blk_row, tx_size, tx_size);
221
0
    return;
222
0
  }
223
224
0
  if (tx_size == plane_tx_size) {
225
#if CONFIG_ENTROPY_STATS
226
    ++counts->txfm_partition[ctx][0];
227
#endif
228
0
    if (allow_update_cdf)
229
0
      update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
230
0
    mbmi->tx_size = tx_size;
231
0
    txfm_partition_update(xd->above_txfm_context + blk_col,
232
0
                          xd->left_txfm_context + blk_row, tx_size, tx_size);
233
0
  } else {
234
0
    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
235
0
    const int bsw = tx_size_wide_unit[sub_txs];
236
0
    const int bsh = tx_size_high_unit[sub_txs];
237
238
#if CONFIG_ENTROPY_STATS
239
    ++counts->txfm_partition[ctx][1];
240
#endif
241
0
    if (allow_update_cdf)
242
0
      update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
243
0
    ++x->txfm_search_info.txb_split_count;
244
245
0
    if (sub_txs == TX_4X4) {
246
0
      mbmi->inter_tx_size[txb_size_index] = TX_4X4;
247
0
      mbmi->tx_size = TX_4X4;
248
0
      txfm_partition_update(xd->above_txfm_context + blk_col,
249
0
                            xd->left_txfm_context + blk_row, TX_4X4, tx_size);
250
0
      return;
251
0
    }
252
253
0
    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
254
0
      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
255
0
        int offsetr = row;
256
0
        int offsetc = col;
257
258
0
        update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
259
0
                          blk_col + offsetc, allow_update_cdf);
260
0
      }
261
0
    }
262
0
  }
263
0
}
264
265
static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
266
                                      BLOCK_SIZE plane_bsize,
267
                                      FRAME_COUNTS *td_counts,
268
0
                                      uint8_t allow_update_cdf) {
269
0
  MACROBLOCKD *xd = &x->e_mbd;
270
0
  const int mi_width = mi_size_wide[plane_bsize];
271
0
  const int mi_height = mi_size_high[plane_bsize];
272
0
  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
273
0
  const int bh = tx_size_high_unit[max_tx_size];
274
0
  const int bw = tx_size_wide_unit[max_tx_size];
275
276
0
  xd->above_txfm_context =
277
0
      cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
278
0
  xd->left_txfm_context =
279
0
      xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
280
281
0
  for (int idy = 0; idy < mi_height; idy += bh) {
282
0
    for (int idx = 0; idx < mi_width; idx += bw) {
283
0
      update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
284
0
                        allow_update_cdf);
285
0
    }
286
0
  }
287
0
}
288
289
static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
290
0
                             int blk_col) {
291
0
  MB_MODE_INFO *mbmi = xd->mi[0];
292
0
  const BLOCK_SIZE bsize = mbmi->bsize;
293
0
  const int max_blocks_high = max_block_high(xd, bsize, 0);
294
0
  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
295
0
  const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
296
0
  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
297
298
0
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
299
300
0
  if (tx_size == plane_tx_size) {
301
0
    mbmi->tx_size = tx_size;
302
0
    txfm_partition_update(xd->above_txfm_context + blk_col,
303
0
                          xd->left_txfm_context + blk_row, tx_size, tx_size);
304
305
0
  } else {
306
0
    if (tx_size == TX_8X8) {
307
0
      mbmi->inter_tx_size[txb_size_index] = TX_4X4;
308
0
      mbmi->tx_size = TX_4X4;
309
0
      txfm_partition_update(xd->above_txfm_context + blk_col,
310
0
                            xd->left_txfm_context + blk_row, TX_4X4, tx_size);
311
0
      return;
312
0
    }
313
0
    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
314
0
    const int bsw = tx_size_wide_unit[sub_txs];
315
0
    const int bsh = tx_size_high_unit[sub_txs];
316
0
    const int row_end =
317
0
        AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
318
0
    const int col_end =
319
0
        AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
320
0
    for (int row = 0; row < row_end; row += bsh) {
321
0
      const int offsetr = blk_row + row;
322
0
      for (int col = 0; col < col_end; col += bsw) {
323
0
        const int offsetc = blk_col + col;
324
0
        set_txfm_context(xd, sub_txs, offsetr, offsetc);
325
0
      }
326
0
    }
327
0
  }
328
0
}
329
330
static void tx_partition_set_contexts(const AV1_COMMON *const cm,
331
0
                                      MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
332
0
  const int mi_width = mi_size_wide[plane_bsize];
333
0
  const int mi_height = mi_size_high[plane_bsize];
334
0
  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
335
0
  const int bh = tx_size_high_unit[max_tx_size];
336
0
  const int bw = tx_size_wide_unit[max_tx_size];
337
338
0
  xd->above_txfm_context =
339
0
      cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
340
0
  xd->left_txfm_context =
341
0
      xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
342
343
0
  for (int idy = 0; idy < mi_height; idy += bh) {
344
0
    for (int idx = 0; idx < mi_width; idx += bw) {
345
0
      set_txfm_context(xd, max_tx_size, idy, idx);
346
0
    }
347
0
  }
348
0
}
349
350
static void update_zeromv_cnt(const AV1_COMP *const cpi,
351
                              const MB_MODE_INFO *const mi, int mi_row,
352
0
                              int mi_col, BLOCK_SIZE bsize) {
353
0
  if (mi->ref_frame[0] != LAST_FRAME || !is_inter_block(mi) ||
354
0
      mi->segment_id > CR_SEGMENT_ID_BOOST2) {
355
0
    return;
356
0
  }
357
0
  const AV1_COMMON *const cm = &cpi->common;
358
0
  const MV mv = mi->mv[0].as_mv;
359
0
  const int bw = mi_size_wide[bsize] >> 1;
360
0
  const int bh = mi_size_high[bsize] >> 1;
361
0
  const int xmis = AOMMIN((cm->mi_params.mi_cols - mi_col) >> 1, bw);
362
0
  const int ymis = AOMMIN((cm->mi_params.mi_rows - mi_row) >> 1, bh);
363
0
  const int block_index =
364
0
      (mi_row >> 1) * (cm->mi_params.mi_cols >> 1) + (mi_col >> 1);
365
0
  for (int y = 0; y < ymis; y++) {
366
0
    for (int x = 0; x < xmis; x++) {
367
      // consec_zero_mv is in the scale of 8x8 blocks
368
0
      const int map_offset = block_index + y * (cm->mi_params.mi_cols >> 1) + x;
369
0
      if (abs(mv.row) < 10 && abs(mv.col) < 10) {
370
0
        if (cpi->consec_zero_mv[map_offset] < 255)
371
0
          cpi->consec_zero_mv[map_offset]++;
372
0
      } else {
373
0
        cpi->consec_zero_mv[map_offset] = 0;
374
0
      }
375
0
    }
376
0
  }
377
0
}
378
379
static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
380
                              ThreadData *td, TokenExtra **t, RUN_TYPE dry_run,
381
61.6k
                              BLOCK_SIZE bsize, int *rate) {
382
61.6k
  const AV1_COMMON *const cm = &cpi->common;
383
61.6k
  const int num_planes = av1_num_planes(cm);
384
61.6k
  MACROBLOCK *const x = &td->mb;
385
61.6k
  MACROBLOCKD *const xd = &x->e_mbd;
386
61.6k
  MB_MODE_INFO **mi_4x4 = xd->mi;
387
61.6k
  MB_MODE_INFO *mbmi = mi_4x4[0];
388
61.6k
  const int seg_skip =
389
61.6k
      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
390
61.6k
  const int mis = cm->mi_params.mi_stride;
391
61.6k
  const int mi_width = mi_size_wide[bsize];
392
61.6k
  const int mi_height = mi_size_high[bsize];
393
61.6k
  const int is_inter = is_inter_block(mbmi);
394
395
  // Initialize tx_mode and tx_size_search_method
396
61.6k
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
397
61.6k
  set_tx_size_search_method(
398
61.6k
      cm, &cpi->winner_mode_params, txfm_params,
399
61.6k
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1);
400
401
61.6k
  const int mi_row = xd->mi_row;
402
61.6k
  const int mi_col = xd->mi_col;
403
61.6k
  if (!is_inter) {
404
61.6k
    xd->cfl.store_y = store_cfl_required(cm, xd);
405
61.6k
    mbmi->skip_txfm = 1;
406
246k
    for (int plane = 0; plane < num_planes; ++plane) {
407
184k
      av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run,
408
184k
                                   cpi->optimize_seg_arr[mbmi->segment_id]);
409
184k
    }
410
411
    // If there is at least one lossless segment, force the skip for intra
412
    // block to be 0, in order to avoid the segment_id to be changed by in
413
    // write_segment_id().
414
61.6k
    if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
415
61.6k
        cpi->enc_seg.has_lossless_segment)
416
0
      mbmi->skip_txfm = 0;
417
418
61.6k
    xd->cfl.store_y = 0;
419
61.6k
    if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
420
0
      for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
421
0
        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
422
0
          if (!dry_run) {
423
0
            av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
424
0
                                   PALETTE_MAP, tile_data->allow_update_cdf,
425
0
                                   td->counts);
426
0
          } else if (dry_run == DRY_RUN_COSTCOEFFS) {
427
0
            *rate +=
428
0
                av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
429
0
          }
430
0
        }
431
0
      }
432
0
    }
433
434
61.6k
    av1_update_intra_mb_txb_context(cpi, td, dry_run, bsize,
435
61.6k
                                    tile_data->allow_update_cdf);
436
18.4E
  } else {
437
18.4E
    int ref;
438
18.4E
    const int is_compound = has_second_ref(mbmi);
439
440
18.4E
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
441
18.4E
    for (ref = 0; ref < 1 + is_compound; ++ref) {
442
0
      const YV12_BUFFER_CONFIG *cfg =
443
0
          get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]);
444
0
      assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
445
0
      av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
446
0
                           xd->block_ref_scale_factors[ref], num_planes);
447
0
    }
448
    // Predicted sample of inter mode (for Luma plane) cannot be reused if
449
    // nonrd_check_partition_merge_mode or nonrd_check_partition_split speed
450
    // feature is enabled, Since in such cases the buffer may not contain the
451
    // predicted sample of best mode.
452
18.4E
    const int start_plane =
453
18.4E
        (cpi->sf.rt_sf.reuse_inter_pred_nonrd &&
454
18.4E
         (!cpi->sf.rt_sf.nonrd_check_partition_merge_mode) &&
455
18.4E
         (!cpi->sf.rt_sf.nonrd_check_partition_split) &&
456
18.4E
         cm->seq_params->bit_depth == AOM_BITS_8)
457
18.4E
            ? 1
458
18.4E
            : 0;
459
18.4E
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
460
18.4E
                                  start_plane, av1_num_planes(cm) - 1);
461
18.4E
    if (mbmi->motion_mode == OBMC_CAUSAL) {
462
0
      assert(cpi->oxcf.motion_mode_cfg.enable_obmc);
463
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
464
0
    }
465
466
#if CONFIG_MISMATCH_DEBUG
467
    if (dry_run == OUTPUT_ENABLED) {
468
      for (int plane = 0; plane < num_planes; ++plane) {
469
        const struct macroblockd_plane *pd = &xd->plane[plane];
470
        int pixel_c, pixel_r;
471
        mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
472
                        pd->subsampling_x, pd->subsampling_y);
473
        if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
474
                                 pd->subsampling_y))
475
          continue;
476
        mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
477
                                  cm->current_frame.order_hint, plane, pixel_c,
478
                                  pixel_r, pd->width, pd->height,
479
                                  xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
480
      }
481
    }
482
#else
483
18.4E
    (void)num_planes;
484
18.4E
#endif
485
486
18.4E
    av1_encode_sb(cpi, x, bsize, dry_run);
487
18.4E
    av1_tokenize_sb_vartx(cpi, td, dry_run, bsize, rate,
488
18.4E
                          tile_data->allow_update_cdf);
489
18.4E
  }
490
491
61.6k
  if (!dry_run) {
492
11.8k
    if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1;
493
11.8k
    if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
494
11.8k
        !xd->lossless[mbmi->segment_id] && mbmi->bsize > BLOCK_4X4 &&
495
11.8k
        !(is_inter && (mbmi->skip_txfm || seg_skip))) {
496
9.00k
      if (is_inter) {
497
0
        tx_partition_count_update(cm, x, bsize, td->counts,
498
0
                                  tile_data->allow_update_cdf);
499
9.00k
      } else {
500
9.00k
        if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
501
4.91k
          ++x->txfm_search_info.txb_split_count;
502
9.00k
        if (block_signals_txsize(bsize)) {
503
9.00k
          const int tx_size_ctx = get_tx_size_context(xd);
504
9.00k
          const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
505
9.00k
          const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
506
9.00k
          const int max_depths = bsize_to_max_depth(bsize);
507
508
9.00k
          if (tile_data->allow_update_cdf)
509
9.00k
            update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
510
9.00k
                       depth, max_depths + 1);
511
#if CONFIG_ENTROPY_STATS
512
          ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
513
#endif
514
9.00k
        }
515
9.00k
      }
516
9.00k
      assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
517
9.00k
    } else {
518
2.80k
      int i, j;
519
2.80k
      TX_SIZE intra_tx_size;
520
      // The new intra coding scheme requires no change of transform size
521
2.80k
      if (is_inter) {
522
0
        if (xd->lossless[mbmi->segment_id]) {
523
0
          intra_tx_size = TX_4X4;
524
0
        } else {
525
0
          intra_tx_size =
526
0
              tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type);
527
0
        }
528
2.80k
      } else {
529
2.80k
        intra_tx_size = mbmi->tx_size;
530
2.80k
      }
531
532
42.8k
      for (j = 0; j < mi_height; j++)
533
621k
        for (i = 0; i < mi_width; i++)
534
581k
          if (mi_col + i < cm->mi_params.mi_cols &&
535
581k
              mi_row + j < cm->mi_params.mi_rows)
536
505k
            mi_4x4[mis * j + i]->tx_size = intra_tx_size;
537
538
2.80k
      if (intra_tx_size != max_txsize_rect_lookup[bsize])
539
2.80k
        ++x->txfm_search_info.txb_split_count;
540
2.80k
    }
541
11.8k
  }
542
543
61.6k
  if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
544
61.6k
      block_signals_txsize(mbmi->bsize) && is_inter &&
545
61.6k
      !(mbmi->skip_txfm || seg_skip) && !xd->lossless[mbmi->segment_id]) {
546
0
    if (dry_run) tx_partition_set_contexts(cm, xd, bsize);
547
61.6k
  } else {
548
61.6k
    TX_SIZE tx_size = mbmi->tx_size;
549
    // The new intra coding scheme requires no change of transform size
550
61.6k
    if (is_inter) {
551
0
      if (xd->lossless[mbmi->segment_id]) {
552
0
        tx_size = TX_4X4;
553
0
      } else {
554
0
        tx_size = tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type);
555
0
      }
556
61.6k
    } else {
557
61.6k
      tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
558
61.6k
    }
559
61.6k
    mbmi->tx_size = tx_size;
560
61.6k
    set_txfm_ctxs(tx_size, xd->width, xd->height,
561
61.6k
                  (mbmi->skip_txfm || seg_skip) && is_inter_block(mbmi), xd);
562
61.6k
  }
563
564
61.6k
  if (is_inter_block(mbmi) && !xd->is_chroma_ref && is_cfl_allowed(xd)) {
565
0
    cfl_store_block(xd, mbmi->bsize, mbmi->tx_size);
566
0
  }
567
61.6k
  if (!dry_run) {
568
11.8k
    if (cpi->oxcf.pass == AOM_RC_ONE_PASS && cpi->svc.temporal_layer_id == 0 &&
569
11.8k
        cpi->sf.rt_sf.use_temporal_noise_estimate &&
570
11.8k
        (!cpi->ppi->use_svc ||
571
0
         (cpi->ppi->use_svc &&
572
0
          !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
573
0
          cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
574
0
      update_zeromv_cnt(cpi, mbmi, mi_row, mi_col, bsize);
575
11.8k
  }
576
61.6k
}
577
578
static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
579
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
580
255k
                               AQ_MODE aq_mode, MB_MODE_INFO *mbmi) {
581
255k
  x->rdmult = cpi->rd.RDMULT;
582
583
255k
  if (aq_mode != NO_AQ) {
584
0
    assert(mbmi != NULL);
585
0
    if (aq_mode == VARIANCE_AQ) {
586
0
      if (cpi->vaq_refresh) {
587
0
        const int energy = bsize <= BLOCK_16X16
588
0
                               ? x->mb_energy
589
0
                               : av1_log_block_var(cpi, x, bsize);
590
0
        mbmi->segment_id = energy;
591
0
      }
592
0
      x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
593
0
    } else if (aq_mode == COMPLEXITY_AQ) {
594
0
      x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
595
0
    } else if (aq_mode == CYCLIC_REFRESH_AQ) {
596
      // If segment is boosted, use rdmult for that segment.
597
0
      if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
598
0
        x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
599
0
    }
600
0
  }
601
602
255k
#if !CONFIG_REALTIME_ONLY
603
255k
  const AV1_COMMON *const cm = &cpi->common;
604
255k
  if (cm->delta_q_info.delta_q_present_flag &&
605
255k
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
606
0
    x->rdmult = av1_get_cb_rdmult(cpi, x, bsize, mi_row, mi_col);
607
0
  }
608
255k
#endif  // !CONFIG_REALTIME_ONLY
609
610
255k
  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM) {
611
255k
    av1_set_ssim_rdmult(cpi, &x->errorperbit, bsize, mi_row, mi_col,
612
255k
                        &x->rdmult);
613
255k
  }
614
#if CONFIG_TUNE_VMAF
615
  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
616
      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN ||
617
      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
618
    av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
619
  }
620
#endif
621
#if CONFIG_TUNE_BUTTERAUGLI
622
  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
623
    av1_set_butteraugli_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
624
  }
625
#endif
626
255k
}
627
628
void av1_set_offsets_without_segment_id(const AV1_COMP *const cpi,
629
                                        const TileInfo *const tile,
630
                                        MACROBLOCK *const x, int mi_row,
631
255k
                                        int mi_col, BLOCK_SIZE bsize) {
632
255k
  const AV1_COMMON *const cm = &cpi->common;
633
255k
  const int num_planes = av1_num_planes(cm);
634
255k
  MACROBLOCKD *const xd = &x->e_mbd;
635
255k
  assert(bsize < BLOCK_SIZES_ALL);
636
255k
  const int mi_width = mi_size_wide[bsize];
637
255k
  const int mi_height = mi_size_high[bsize];
638
639
255k
  set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
640
255k
                        mi_row, mi_col);
641
642
255k
  set_entropy_context(xd, mi_row, mi_col, num_planes);
643
255k
  xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col;
644
255k
  xd->left_txfm_context =
645
255k
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
646
647
  // Set up destination pointers.
648
255k
  av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
649
255k
                       num_planes);
650
651
  // Set up limit values for MV components.
652
  // Mv beyond the range do not produce new/different prediction block.
653
255k
  av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height,
654
255k
                    mi_width, cpi->oxcf.border_in_pixels);
655
656
255k
  set_plane_n4(xd, mi_width, mi_height, num_planes);
657
658
  // Set up distance of MB to edge of frame in 1/8th pel units.
659
255k
  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
660
255k
  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
661
255k
                 cm->mi_params.mi_rows, cm->mi_params.mi_cols);
662
663
  // Set up source buffers.
664
255k
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
665
666
  // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
667
255k
  xd->tile = *tile;
668
255k
}
669
670
void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
671
                     MACROBLOCK *const x, int mi_row, int mi_col,
672
200k
                     BLOCK_SIZE bsize) {
673
200k
  const AV1_COMMON *const cm = &cpi->common;
674
200k
  const struct segmentation *const seg = &cm->seg;
675
200k
  MACROBLOCKD *const xd = &x->e_mbd;
676
200k
  MB_MODE_INFO *mbmi;
677
678
200k
  av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
679
680
  // Setup segment ID.
681
200k
  mbmi = xd->mi[0];
682
200k
  mbmi->segment_id = 0;
683
200k
  if (seg->enabled) {
684
0
    if (seg->enabled && !cpi->vaq_refresh) {
685
0
      const uint8_t *const map =
686
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
687
0
      mbmi->segment_id =
688
0
          map ? get_segment_id(&cm->mi_params, map, bsize, mi_row, mi_col) : 0;
689
0
    }
690
0
    av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
691
0
  }
692
200k
}
693
694
/*!\brief Hybrid intra mode search.
695
 *
696
 * \ingroup intra_mode_search
697
 * \callgraph
698
 * \callergraph
699
 * This is top level function for mode search for intra frames in non-RD
700
 * optimized case. Depending on speed feature and block size it calls
701
 * either non-RD or RD optimized intra mode search.
702
 *
703
 * \param[in]    cpi            Top-level encoder structure
704
 * \param[in]    x              Pointer to structure holding all the data for
705
                                the current macroblock
706
 * \param[in]    rd_cost        Struct to keep track of the RD information
707
 * \param[in]    bsize          Current block size
708
 * \param[in]    ctx            Structure to hold snapshot of coding context
709
                                during the mode picking process
710
 *
711
 * \return Nothing is returned. Instead, the MB_MODE_INFO struct inside x
712
 * is modified to store information about the best mode computed
713
 * in this function. The rd_cost struct is also updated with the RD stats
714
 * corresponding to the best mode found.
715
 */
716
717
static AOM_INLINE void hybrid_intra_mode_search(AV1_COMP *cpi,
718
                                                MACROBLOCK *const x,
719
                                                RD_STATS *rd_cost,
720
                                                BLOCK_SIZE bsize,
721
0
                                                PICK_MODE_CONTEXT *ctx) {
722
0
  if (cpi->sf.rt_sf.hybrid_intra_pickmode && bsize < BLOCK_16X16)
723
0
    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
724
0
  else
725
0
    av1_nonrd_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
726
0
}
727
728
// For real time/allintra row-mt enabled multi-threaded encoding with cost
729
// update frequency set to COST_UPD_TILE/COST_UPD_OFF, tile ctxt is not updated
730
// at superblock level. Thus, it is not required for the encoding of top-right
731
// superblock be complete for updating tile ctxt. However, when encoding a block
732
// whose right edge is also the superblock edge, intra and inter mode evaluation
733
// (ref mv list population) require the encoding of the top-right superblock to
734
// be complete. So, here, we delay the waiting of threads until the need for the
735
// data from the top-right superblock region.
736
static AOM_INLINE void wait_for_top_right_sb(
737
    AV1EncRowMultiThreadInfo *enc_row_mt, AV1EncRowMultiThreadSync *row_mt_sync,
738
    TileInfo *tile_info, BLOCK_SIZE sb_size, int sb_mi_size_log2,
739
112k
    BLOCK_SIZE bsize, int mi_row, int mi_col) {
740
112k
  const int sb_size_in_mi = mi_size_wide[sb_size];
741
112k
  const int bw_in_mi = mi_size_wide[bsize];
742
112k
  const int blk_row_in_sb = mi_row & (sb_size_in_mi - 1);
743
112k
  const int blk_col_in_sb = mi_col & (sb_size_in_mi - 1);
744
112k
  const int top_right_block_in_sb =
745
112k
      (blk_row_in_sb == 0) && (blk_col_in_sb + bw_in_mi >= sb_size_in_mi);
746
747
  // Don't wait if the block is the not the top-right block in the superblock.
748
112k
  if (!top_right_block_in_sb) return;
749
750
  // Wait for the top-right superblock to finish encoding.
751
15.1k
  const int sb_row_in_tile =
752
15.1k
      (mi_row - tile_info->mi_row_start) >> sb_mi_size_log2;
753
15.1k
  const int sb_col_in_tile =
754
15.1k
      (mi_col - tile_info->mi_col_start) >> sb_mi_size_log2;
755
756
15.1k
  (*(enc_row_mt->sync_read_ptr))(row_mt_sync, sb_row_in_tile, sb_col_in_tile);
757
15.1k
}
758
759
/*!\brief Interface for AV1 mode search for an individual coding block
760
 *
761
 * \ingroup partition_search
762
 * \callgraph
763
 * \callergraph
764
 * Searches prediction modes, transform, and coefficient coding modes for an
765
 * individual coding block. This function is the top-level interface that
766
 * directs the encoder to the proper mode search function, among these
767
 * implemented for inter/intra + rd/non-rd + non-skip segment/skip segment.
768
 *
769
 * \param[in]    cpi            Top-level encoder structure
770
 * \param[in]    tile_data      Pointer to struct holding adaptive
771
 *                              data/contexts/models for the tile during
772
 *                              encoding
773
 * \param[in]    x              Pointer to structure holding all the data for
774
 *                              the current macroblock
775
 * \param[in]    mi_row         Row coordinate of the block in a step size of
776
 *                              MI_SIZE
777
 * \param[in]    mi_col         Column coordinate of the block in a step size of
778
 *                              MI_SIZE
779
 * \param[in]    rd_cost        Pointer to structure holding rate and distortion
780
 *                              stats for the current block
781
 * \param[in]    partition      Partition mode of the parent block
782
 * \param[in]    bsize          Current block size
783
 * \param[in]    ctx            Pointer to structure holding coding contexts and
784
 *                              chosen modes for the current block
785
 * \param[in]    best_rd        Upper bound of rd cost of a valid partition
786
 *
787
 * \return Nothing is returned. Instead, the chosen modes and contexts necessary
788
 * for reconstruction are stored in ctx, the rate-distortion stats are stored in
789
 * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be
790
 * signalled by an INT64_MAX rd_cost->rdcost.
791
 */
792
static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
793
                          MACROBLOCK *const x, int mi_row, int mi_col,
794
                          RD_STATS *rd_cost, PARTITION_TYPE partition,
795
                          BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
796
122k
                          RD_STATS best_rd) {
797
122k
  if (cpi->sf.part_sf.use_best_rd_for_pruning && best_rd.rdcost < 0) {
798
10.4k
    ctx->rd_stats.rdcost = INT64_MAX;
799
10.4k
    ctx->rd_stats.skip_txfm = 0;
800
10.4k
    av1_invalid_rd_stats(rd_cost);
801
10.4k
    return;
802
10.4k
  }
803
804
112k
  av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
805
806
112k
  if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab &&
807
112k
      ctx->rd_mode_is_ready) {
808
0
    assert(ctx->mic.bsize == bsize);
809
0
    assert(ctx->mic.partition == partition);
810
0
    rd_cost->rate = ctx->rd_stats.rate;
811
0
    rd_cost->dist = ctx->rd_stats.dist;
812
0
    rd_cost->rdcost = ctx->rd_stats.rdcost;
813
0
    return;
814
0
  }
815
816
112k
  AV1_COMMON *const cm = &cpi->common;
817
112k
  const int num_planes = av1_num_planes(cm);
818
112k
  MACROBLOCKD *const xd = &x->e_mbd;
819
112k
  MB_MODE_INFO *mbmi;
820
112k
  struct macroblock_plane *const p = x->plane;
821
112k
  struct macroblockd_plane *const pd = xd->plane;
822
112k
  const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode;
823
112k
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
824
825
112k
  int i;
826
827
  // This is only needed for real time/allintra row-mt enabled multi-threaded
828
  // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
829
112k
  wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
830
112k
                        &tile_data->tile_info, cm->seq_params->sb_size,
831
112k
                        cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
832
833
#if CONFIG_COLLECT_COMPONENT_TIMING
834
  start_timing(cpi, rd_pick_sb_modes_time);
835
#endif
836
837
112k
  mbmi = xd->mi[0];
838
112k
  mbmi->bsize = bsize;
839
112k
  mbmi->partition = partition;
840
841
#if CONFIG_RD_DEBUG
842
  mbmi->mi_row = mi_row;
843
  mbmi->mi_col = mi_col;
844
#endif
845
846
  // Sets up the tx_type_map buffer in MACROBLOCKD.
847
112k
  xd->tx_type_map = txfm_info->tx_type_map_;
848
112k
  xd->tx_type_map_stride = mi_size_wide[bsize];
849
850
448k
  for (i = 0; i < num_planes; ++i) {
851
336k
    p[i].coeff = ctx->coeff[i];
852
336k
    p[i].qcoeff = ctx->qcoeff[i];
853
336k
    p[i].dqcoeff = ctx->dqcoeff[i];
854
336k
    p[i].eobs = ctx->eobs[i];
855
336k
    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
856
336k
  }
857
858
336k
  for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
859
860
112k
  ctx->skippable = 0;
861
  // Set to zero to make sure we do not use the previous encoded frame stats
862
112k
  mbmi->skip_txfm = 0;
863
  // Reset skip mode flag.
864
112k
  mbmi->skip_mode = 0;
865
866
112k
  if (is_cur_buf_hbd(xd)) {
867
0
    x->source_variance = av1_high_get_sby_perpixel_variance(
868
0
        cpi, &x->plane[0].src, bsize, xd->bd);
869
112k
  } else {
870
112k
    x->source_variance =
871
112k
        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
872
112k
  }
873
874
  // Initialize default mode evaluation params
875
112k
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
876
877
  // Save rdmult before it might be changed, so it can be restored later.
878
112k
  const int orig_rdmult = x->rdmult;
879
112k
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
880
  // Set error per bit for current rdmult
881
112k
  av1_set_error_per_bit(&x->errorperbit, x->rdmult);
882
112k
  av1_rd_cost_update(x->rdmult, &best_rd);
883
884
  // If set best_rd.rdcost to INT64_MAX, the encoder will not use any previous
885
  // rdcost information for the following mode search.
886
  // Disabling the feature could get some coding gain, with encoder slowdown.
887
112k
  if (!cpi->sf.part_sf.use_best_rd_for_pruning) {
888
0
    av1_invalid_rd_stats(&best_rd);
889
0
  }
890
891
  // Find best coding mode & reconstruct the MB so it is available
892
  // as a predictor for MBs that follow in the SB
893
112k
  if (frame_is_intra_only(cm)) {
894
#if CONFIG_COLLECT_COMPONENT_TIMING
895
    start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
896
#endif
897
112k
    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
898
#if CONFIG_COLLECT_COMPONENT_TIMING
899
    end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
900
#endif
901
112k
  } else {
902
#if CONFIG_COLLECT_COMPONENT_TIMING
903
    start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
904
#endif
905
4
    if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
906
0
      av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
907
0
                                         rd_cost, bsize, ctx, best_rd.rdcost);
908
4
    } else {
909
4
      av1_rd_pick_inter_mode(cpi, tile_data, x, rd_cost, bsize, ctx,
910
4
                             best_rd.rdcost);
911
4
    }
912
#if CONFIG_COLLECT_COMPONENT_TIMING
913
    end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
914
#endif
915
4
  }
916
917
  // Examine the resulting rate and for AQ mode 2 make a segment choice.
918
112k
  if (rd_cost->rate != INT_MAX && aq_mode == COMPLEXITY_AQ &&
919
112k
      bsize >= BLOCK_16X16) {
920
0
    av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
921
0
  }
922
923
112k
  x->rdmult = orig_rdmult;
924
925
  // TODO(jingning) The rate-distortion optimization flow needs to be
926
  // refactored to provide proper exit/return handle.
927
112k
  if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
928
929
112k
  ctx->rd_stats.rate = rd_cost->rate;
930
112k
  ctx->rd_stats.dist = rd_cost->dist;
931
112k
  ctx->rd_stats.rdcost = rd_cost->rdcost;
932
933
#if CONFIG_COLLECT_COMPONENT_TIMING
934
  end_timing(cpi, rd_pick_sb_modes_time);
935
#endif
936
112k
}
937
938
11.8k
static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
939
11.8k
  MACROBLOCK *x = &td->mb;
940
11.8k
  MACROBLOCKD *const xd = &x->e_mbd;
941
11.8k
  const MB_MODE_INFO *const mbmi = xd->mi[0];
942
11.8k
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
943
11.8k
  const CurrentFrame *const current_frame = &cm->current_frame;
944
11.8k
  const BLOCK_SIZE bsize = mbmi->bsize;
945
11.8k
  FRAME_CONTEXT *fc = xd->tile_ctx;
946
11.8k
  const int seg_ref_active =
947
11.8k
      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
948
949
11.8k
  if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
950
11.8k
      is_comp_ref_allowed(bsize)) {
951
0
    const int skip_mode_ctx = av1_get_skip_mode_context(xd);
952
#if CONFIG_ENTROPY_STATS
953
    td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
954
#endif
955
0
    update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
956
0
  }
957
958
11.8k
  if (!mbmi->skip_mode && !seg_ref_active) {
959
11.8k
    const int skip_ctx = av1_get_skip_txfm_context(xd);
960
#if CONFIG_ENTROPY_STATS
961
    td->counts->skip_txfm[skip_ctx][mbmi->skip_txfm]++;
962
#endif
963
11.8k
    update_cdf(fc->skip_txfm_cdfs[skip_ctx], mbmi->skip_txfm, 2);
964
11.8k
  }
965
966
#if CONFIG_ENTROPY_STATS
967
  // delta quant applies to both intra and inter
968
  const int super_block_upper_left =
969
      ((xd->mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
970
      ((xd->mi_col & (cm->seq_params->mib_size - 1)) == 0);
971
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
972
  if (delta_q_info->delta_q_present_flag &&
973
      (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
974
      super_block_upper_left) {
975
    const int dq = (mbmi->current_qindex - xd->current_base_qindex) /
976
                   delta_q_info->delta_q_res;
977
    const int absdq = abs(dq);
978
    for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
979
      td->counts->delta_q[i][1]++;
980
    }
981
    if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
982
    if (delta_q_info->delta_lf_present_flag) {
983
      if (delta_q_info->delta_lf_multi) {
984
        const int frame_lf_count =
985
            av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
986
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
987
          const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
988
                               delta_q_info->delta_lf_res;
989
          const int abs_delta_lf = abs(delta_lf);
990
          for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
991
            td->counts->delta_lf_multi[lf_id][i][1]++;
992
          }
993
          if (abs_delta_lf < DELTA_LF_SMALL)
994
            td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
995
        }
996
      } else {
997
        const int delta_lf =
998
            (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
999
            delta_q_info->delta_lf_res;
1000
        const int abs_delta_lf = abs(delta_lf);
1001
        for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1002
          td->counts->delta_lf[i][1]++;
1003
        }
1004
        if (abs_delta_lf < DELTA_LF_SMALL)
1005
          td->counts->delta_lf[abs_delta_lf][0]++;
1006
      }
1007
    }
1008
  }
1009
#endif
1010
1011
11.8k
  if (!is_inter_block(mbmi)) {
1012
11.8k
    av1_sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
1013
11.8k
                        frame_is_intra_only(cm));
1014
11.8k
  }
1015
1016
11.8k
  if (av1_allow_intrabc(cm)) {
1017
0
    const int is_intrabc = is_intrabc_block(mbmi);
1018
0
    update_cdf(fc->intrabc_cdf, is_intrabc, 2);
1019
#if CONFIG_ENTROPY_STATS
1020
    ++td->counts->intrabc[is_intrabc];
1021
#endif  // CONFIG_ENTROPY_STATS
1022
0
    if (is_intrabc) {
1023
0
      const int_mv dv_ref = x->mbmi_ext_frame->ref_mv_stack[0].this_mv;
1024
0
      av1_update_mv_stats(&mbmi->mv[0].as_mv, &dv_ref.as_mv, &fc->ndvc,
1025
0
                          MV_SUBPEL_NONE);
1026
0
    }
1027
0
  }
1028
1029
11.8k
  if (frame_is_intra_only(cm) || mbmi->skip_mode) return;
1030
1031
2
  FRAME_COUNTS *const counts = td->counts;
1032
2
  const int inter_block = is_inter_block(mbmi);
1033
1034
2
  if (!seg_ref_active) {
1035
#if CONFIG_ENTROPY_STATS
1036
    counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
1037
#endif
1038
0
    update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
1039
0
               inter_block, 2);
1040
    // If the segment reference feature is enabled we have only a single
1041
    // reference frame allowed for the segment so exclude it from
1042
    // the reference frame counts used to work out probabilities.
1043
0
    if (inter_block) {
1044
0
      const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1045
0
      const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
1046
0
      if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1047
0
        if (is_comp_ref_allowed(bsize)) {
1048
#if CONFIG_ENTROPY_STATS
1049
          counts->comp_inter[av1_get_reference_mode_context(xd)]
1050
                            [has_second_ref(mbmi)]++;
1051
#endif  // CONFIG_ENTROPY_STATS
1052
0
          update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 2);
1053
0
        }
1054
0
      }
1055
1056
0
      if (has_second_ref(mbmi)) {
1057
0
        const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
1058
0
                                                      ? UNIDIR_COMP_REFERENCE
1059
0
                                                      : BIDIR_COMP_REFERENCE;
1060
0
        update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
1061
0
                   COMP_REFERENCE_TYPES);
1062
#if CONFIG_ENTROPY_STATS
1063
        counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
1064
                             [comp_ref_type]++;
1065
#endif  // CONFIG_ENTROPY_STATS
1066
1067
0
        if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1068
0
          const int bit = (ref0 == BWDREF_FRAME);
1069
0
          update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
1070
#if CONFIG_ENTROPY_STATS
1071
          counts
1072
              ->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0][bit]++;
1073
#endif  // CONFIG_ENTROPY_STATS
1074
0
          if (!bit) {
1075
0
            const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
1076
0
            update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
1077
#if CONFIG_ENTROPY_STATS
1078
            counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
1079
                                [bit1]++;
1080
#endif  // CONFIG_ENTROPY_STATS
1081
0
            if (bit1) {
1082
0
              update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
1083
0
                         ref1 == GOLDEN_FRAME, 2);
1084
#if CONFIG_ENTROPY_STATS
1085
              counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)][2]
1086
                                  [ref1 == GOLDEN_FRAME]++;
1087
#endif  // CONFIG_ENTROPY_STATS
1088
0
            }
1089
0
          }
1090
0
        } else {
1091
0
          const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
1092
0
          update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
1093
#if CONFIG_ENTROPY_STATS
1094
          counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
1095
#endif  // CONFIG_ENTROPY_STATS
1096
0
          if (!bit) {
1097
0
            update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), ref0 == LAST2_FRAME,
1098
0
                       2);
1099
#if CONFIG_ENTROPY_STATS
1100
            counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
1101
                            [ref0 == LAST2_FRAME]++;
1102
#endif  // CONFIG_ENTROPY_STATS
1103
0
          } else {
1104
0
            update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), ref0 == GOLDEN_FRAME,
1105
0
                       2);
1106
#if CONFIG_ENTROPY_STATS
1107
            counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
1108
                            [ref0 == GOLDEN_FRAME]++;
1109
#endif  // CONFIG_ENTROPY_STATS
1110
0
          }
1111
0
          update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), ref1 == ALTREF_FRAME,
1112
0
                     2);
1113
#if CONFIG_ENTROPY_STATS
1114
          counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
1115
                             [ref1 == ALTREF_FRAME]++;
1116
#endif  // CONFIG_ENTROPY_STATS
1117
0
          if (ref1 != ALTREF_FRAME) {
1118
0
            update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
1119
0
                       ref1 == ALTREF2_FRAME, 2);
1120
#if CONFIG_ENTROPY_STATS
1121
            counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
1122
                               [ref1 == ALTREF2_FRAME]++;
1123
#endif  // CONFIG_ENTROPY_STATS
1124
0
          }
1125
0
        }
1126
0
      } else {
1127
0
        const int bit = (ref0 >= BWDREF_FRAME);
1128
0
        update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
1129
#if CONFIG_ENTROPY_STATS
1130
        counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
1131
#endif  // CONFIG_ENTROPY_STATS
1132
0
        if (bit) {
1133
0
          assert(ref0 <= ALTREF_FRAME);
1134
0
          update_cdf(av1_get_pred_cdf_single_ref_p2(xd), ref0 == ALTREF_FRAME,
1135
0
                     2);
1136
#if CONFIG_ENTROPY_STATS
1137
          counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
1138
                            [ref0 == ALTREF_FRAME]++;
1139
#endif  // CONFIG_ENTROPY_STATS
1140
0
          if (ref0 != ALTREF_FRAME) {
1141
0
            update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
1142
0
                       ref0 == ALTREF2_FRAME, 2);
1143
#if CONFIG_ENTROPY_STATS
1144
            counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
1145
                              [ref0 == ALTREF2_FRAME]++;
1146
#endif  // CONFIG_ENTROPY_STATS
1147
0
          }
1148
0
        } else {
1149
0
          const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
1150
0
          update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
1151
#if CONFIG_ENTROPY_STATS
1152
          counts->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
1153
#endif  // CONFIG_ENTROPY_STATS
1154
0
          if (!bit1) {
1155
0
            update_cdf(av1_get_pred_cdf_single_ref_p4(xd), ref0 != LAST_FRAME,
1156
0
                       2);
1157
#if CONFIG_ENTROPY_STATS
1158
            counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
1159
                              [ref0 != LAST_FRAME]++;
1160
#endif  // CONFIG_ENTROPY_STATS
1161
0
          } else {
1162
0
            update_cdf(av1_get_pred_cdf_single_ref_p5(xd), ref0 != LAST3_FRAME,
1163
0
                       2);
1164
#if CONFIG_ENTROPY_STATS
1165
            counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
1166
                              [ref0 != LAST3_FRAME]++;
1167
#endif  // CONFIG_ENTROPY_STATS
1168
0
          }
1169
0
        }
1170
0
      }
1171
1172
0
      if (cm->seq_params->enable_interintra_compound &&
1173
0
          is_interintra_allowed(mbmi)) {
1174
0
        const int bsize_group = size_group_lookup[bsize];
1175
0
        if (mbmi->ref_frame[1] == INTRA_FRAME) {
1176
#if CONFIG_ENTROPY_STATS
1177
          counts->interintra[bsize_group][1]++;
1178
#endif
1179
0
          update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
1180
#if CONFIG_ENTROPY_STATS
1181
          counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
1182
#endif
1183
0
          update_cdf(fc->interintra_mode_cdf[bsize_group],
1184
0
                     mbmi->interintra_mode, INTERINTRA_MODES);
1185
0
          if (av1_is_wedge_used(bsize)) {
1186
#if CONFIG_ENTROPY_STATS
1187
            counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
1188
#endif
1189
0
            update_cdf(fc->wedge_interintra_cdf[bsize],
1190
0
                       mbmi->use_wedge_interintra, 2);
1191
0
            if (mbmi->use_wedge_interintra) {
1192
#if CONFIG_ENTROPY_STATS
1193
              counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
1194
#endif
1195
0
              update_cdf(fc->wedge_idx_cdf[bsize], mbmi->interintra_wedge_index,
1196
0
                         16);
1197
0
            }
1198
0
          }
1199
0
        } else {
1200
#if CONFIG_ENTROPY_STATS
1201
          counts->interintra[bsize_group][0]++;
1202
#endif
1203
0
          update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
1204
0
        }
1205
0
      }
1206
1207
0
      const MOTION_MODE motion_allowed =
1208
0
          cm->features.switchable_motion_mode
1209
0
              ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1210
0
                                    cm->features.allow_warped_motion)
1211
0
              : SIMPLE_TRANSLATION;
1212
0
      if (mbmi->ref_frame[1] != INTRA_FRAME) {
1213
0
        if (motion_allowed == WARPED_CAUSAL) {
1214
#if CONFIG_ENTROPY_STATS
1215
          counts->motion_mode[bsize][mbmi->motion_mode]++;
1216
#endif
1217
0
          update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
1218
0
                     MOTION_MODES);
1219
0
        } else if (motion_allowed == OBMC_CAUSAL) {
1220
#if CONFIG_ENTROPY_STATS
1221
          counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1222
#endif
1223
0
          update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, 2);
1224
0
        }
1225
0
      }
1226
1227
0
      if (has_second_ref(mbmi)) {
1228
0
        assert(current_frame->reference_mode != SINGLE_REFERENCE &&
1229
0
               is_inter_compound_mode(mbmi->mode) &&
1230
0
               mbmi->motion_mode == SIMPLE_TRANSLATION);
1231
1232
0
        const int masked_compound_used = is_any_masked_compound_used(bsize) &&
1233
0
                                         cm->seq_params->enable_masked_compound;
1234
0
        if (masked_compound_used) {
1235
0
          const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
1236
#if CONFIG_ENTROPY_STATS
1237
          ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
1238
#endif
1239
0
          update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
1240
0
                     mbmi->comp_group_idx, 2);
1241
0
        }
1242
1243
0
        if (mbmi->comp_group_idx == 0) {
1244
0
          const int comp_index_ctx = get_comp_index_context(cm, xd);
1245
#if CONFIG_ENTROPY_STATS
1246
          ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
1247
#endif
1248
0
          update_cdf(fc->compound_index_cdf[comp_index_ctx], mbmi->compound_idx,
1249
0
                     2);
1250
0
        } else {
1251
0
          assert(masked_compound_used);
1252
0
          if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1253
#if CONFIG_ENTROPY_STATS
1254
            ++counts->compound_type[bsize][mbmi->interinter_comp.type -
1255
                                           COMPOUND_WEDGE];
1256
#endif
1257
0
            update_cdf(fc->compound_type_cdf[bsize],
1258
0
                       mbmi->interinter_comp.type - COMPOUND_WEDGE,
1259
0
                       MASKED_COMPOUND_TYPES);
1260
0
          }
1261
0
        }
1262
0
      }
1263
0
      if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
1264
0
        if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1265
#if CONFIG_ENTROPY_STATS
1266
          counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
1267
#endif
1268
0
          update_cdf(fc->wedge_idx_cdf[bsize],
1269
0
                     mbmi->interinter_comp.wedge_index, 16);
1270
0
        }
1271
0
      }
1272
0
    }
1273
0
  }
1274
1275
2
  if (inter_block && cm->features.interp_filter == SWITCHABLE &&
1276
2
      mbmi->motion_mode != WARPED_CAUSAL &&
1277
2
      !is_nontrans_global_motion(xd, mbmi)) {
1278
0
    update_filter_type_cdf(xd, mbmi, cm->seq_params->enable_dual_filter);
1279
0
  }
1280
2
  if (inter_block &&
1281
2
      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1282
0
    const PREDICTION_MODE mode = mbmi->mode;
1283
0
    const int16_t mode_ctx =
1284
0
        av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1285
0
    if (has_second_ref(mbmi)) {
1286
#if CONFIG_ENTROPY_STATS
1287
      ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
1288
#endif
1289
0
      update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
1290
0
                 INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
1291
0
    } else {
1292
0
      av1_update_inter_mode_stats(fc, counts, mode, mode_ctx);
1293
0
    }
1294
1295
0
    const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
1296
0
    if (new_mv) {
1297
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1298
0
      for (int idx = 0; idx < 2; ++idx) {
1299
0
        if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1300
0
          const uint8_t drl_ctx =
1301
0
              av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1302
0
          update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx, 2);
1303
#if CONFIG_ENTROPY_STATS
1304
          ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
1305
#endif
1306
0
          if (mbmi->ref_mv_idx == idx) break;
1307
0
        }
1308
0
      }
1309
0
    }
1310
1311
0
    if (have_nearmv_in_inter_mode(mbmi->mode)) {
1312
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1313
0
      for (int idx = 1; idx < 3; ++idx) {
1314
0
        if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1315
0
          const uint8_t drl_ctx =
1316
0
              av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1317
0
          update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx - 1, 2);
1318
#if CONFIG_ENTROPY_STATS
1319
          ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
1320
#endif
1321
0
          if (mbmi->ref_mv_idx == idx - 1) break;
1322
0
        }
1323
0
      }
1324
0
    }
1325
0
    if (have_newmv_in_inter_mode(mbmi->mode)) {
1326
0
      const int allow_hp = cm->features.cur_frame_force_integer_mv
1327
0
                               ? MV_SUBPEL_NONE
1328
0
                               : cm->features.allow_high_precision_mv;
1329
0
      if (new_mv) {
1330
0
        for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
1331
0
          const int_mv ref_mv = av1_get_ref_mv(x, ref);
1332
0
          av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1333
0
                              allow_hp);
1334
0
        }
1335
0
      } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAR_NEWMV) {
1336
0
        const int ref = 1;
1337
0
        const int_mv ref_mv = av1_get_ref_mv(x, ref);
1338
0
        av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1339
0
                            allow_hp);
1340
0
      } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEW_NEARMV) {
1341
0
        const int ref = 0;
1342
0
        const int_mv ref_mv = av1_get_ref_mv(x, ref);
1343
0
        av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1344
0
                            allow_hp);
1345
0
      }
1346
0
    }
1347
0
  }
1348
2
}
1349
1350
/*!\brief Reconstructs an individual coding block
1351
 *
1352
 * \ingroup partition_search
1353
 * Reconstructs an individual coding block by applying the chosen modes stored
1354
 * in ctx, also updates mode counts and entropy models.
1355
 *
1356
 * \param[in]    cpi       Top-level encoder structure
1357
 * \param[in]    tile_data Pointer to struct holding adaptive
1358
 *                         data/contexts/models for the tile during encoding
1359
 * \param[in]    td        Pointer to thread data
1360
 * \param[in]    tp        Pointer to the starting token
1361
 * \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
1362
 * \param[in]    mi_col    Column coordinate of the block in a step size of
1363
 *                         MI_SIZE
1364
 * \param[in]    dry_run   A code indicating whether it is part of the final
1365
 *                         pass for reconstructing the superblock
1366
 * \param[in]    bsize     Current block size
1367
 * \param[in]    partition Partition mode of the parent block
1368
 * \param[in]    ctx       Pointer to structure holding coding contexts and the
1369
 *                         chosen modes for the current block
1370
 * \param[in]    rate      Pointer to the total rate for the current block
1371
 *
1372
 * \return Nothing is returned. Instead, reconstructions (w/o in-loop filters)
1373
 * will be updated in the pixel buffers in td->mb.e_mbd. Also, the chosen modes
1374
 * will be stored in the MB_MODE_INFO buffer td->mb.e_mbd.mi[0].
1375
 */
1376
static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
1377
                     ThreadData *td, TokenExtra **tp, int mi_row, int mi_col,
1378
                     RUN_TYPE dry_run, BLOCK_SIZE bsize,
1379
                     PARTITION_TYPE partition, PICK_MODE_CONTEXT *const ctx,
1380
55.3k
                     int *rate) {
1381
55.3k
  const AV1_COMMON *const cm = &cpi->common;
1382
55.3k
  TileInfo *const tile = &tile_data->tile_info;
1383
55.3k
  MACROBLOCK *const x = &td->mb;
1384
55.3k
  MACROBLOCKD *xd = &x->e_mbd;
1385
55.3k
  const int subsampling_x = cm->seq_params->subsampling_x;
1386
55.3k
  const int subsampling_y = cm->seq_params->subsampling_y;
1387
1388
55.3k
  av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
1389
55.3k
  const int origin_mult = x->rdmult;
1390
55.3k
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1391
55.3k
  MB_MODE_INFO *mbmi = xd->mi[0];
1392
55.3k
  mbmi->partition = partition;
1393
55.3k
  av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
1394
1395
55.3k
  if (!dry_run) {
1396
11.8k
    set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y],
1397
11.8k
                   x->cb_offset[PLANE_TYPE_UV]);
1398
11.8k
    assert(x->cb_offset[PLANE_TYPE_Y] <
1399
11.8k
           (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]));
1400
11.8k
    assert(x->cb_offset[PLANE_TYPE_UV] <
1401
11.8k
           ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >>
1402
11.8k
            (subsampling_x + subsampling_y)));
1403
11.8k
  }
1404
1405
55.3k
  encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
1406
1407
55.3k
  if (!dry_run) {
1408
11.8k
    update_cb_offsets(x, bsize, subsampling_x, subsampling_y);
1409
11.8k
    if (bsize == cpi->common.seq_params->sb_size && mbmi->skip_txfm == 1 &&
1410
11.8k
        cm->delta_q_info.delta_lf_present_flag) {
1411
0
      const int frame_lf_count =
1412
0
          av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1413
0
      for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
1414
0
        mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
1415
0
      mbmi->delta_lf_from_base = xd->delta_lf_from_base;
1416
0
    }
1417
11.8k
    if (has_second_ref(mbmi)) {
1418
0
      if (mbmi->compound_idx == 0 ||
1419
0
          mbmi->interinter_comp.type == COMPOUND_AVERAGE)
1420
0
        mbmi->comp_group_idx = 0;
1421
0
      else
1422
0
        mbmi->comp_group_idx = 1;
1423
0
    }
1424
1425
    // delta quant applies to both intra and inter
1426
11.8k
    const int super_block_upper_left =
1427
11.8k
        ((mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
1428
11.8k
        ((mi_col & (cm->seq_params->mib_size - 1)) == 0);
1429
11.8k
    const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1430
11.8k
    if (delta_q_info->delta_q_present_flag &&
1431
11.8k
        (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
1432
11.8k
        super_block_upper_left) {
1433
0
      xd->current_base_qindex = mbmi->current_qindex;
1434
0
      if (delta_q_info->delta_lf_present_flag) {
1435
0
        if (delta_q_info->delta_lf_multi) {
1436
0
          const int frame_lf_count =
1437
0
              av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1438
0
          for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1439
0
            xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
1440
0
          }
1441
0
        } else {
1442
0
          xd->delta_lf_from_base = mbmi->delta_lf_from_base;
1443
0
        }
1444
0
      }
1445
0
    }
1446
1447
11.8k
    RD_COUNTS *rdc = &td->rd_counts;
1448
11.8k
    if (mbmi->skip_mode) {
1449
0
      assert(!frame_is_intra_only(cm));
1450
0
      rdc->skip_mode_used_flag = 1;
1451
0
      if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1452
0
        assert(has_second_ref(mbmi));
1453
0
        rdc->compound_ref_used_flag = 1;
1454
0
      }
1455
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1456
11.8k
    } else {
1457
11.8k
      const int seg_ref_active =
1458
11.8k
          segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1459
11.8k
      if (!seg_ref_active) {
1460
        // If the segment reference feature is enabled we have only a single
1461
        // reference frame allowed for the segment so exclude it from
1462
        // the reference frame counts used to work out probabilities.
1463
11.8k
        if (is_inter_block(mbmi)) {
1464
0
          av1_collect_neighbors_ref_counts(xd);
1465
0
          if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1466
0
            if (has_second_ref(mbmi)) {
1467
              // This flag is also updated for 4x4 blocks
1468
0
              rdc->compound_ref_used_flag = 1;
1469
0
            }
1470
0
          }
1471
0
          set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1472
0
        }
1473
11.8k
      }
1474
11.8k
    }
1475
1476
11.8k
    if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
1477
1478
    // Gather obmc and warped motion count to update the probability.
1479
11.8k
    if ((cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
1480
11.8k
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) ||
1481
11.8k
        (cm->features.allow_warped_motion &&
1482
11.8k
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) {
1483
0
      const int inter_block = is_inter_block(mbmi);
1484
0
      const int seg_ref_active =
1485
0
          segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1486
0
      if (!seg_ref_active && inter_block) {
1487
0
        const MOTION_MODE motion_allowed =
1488
0
            cm->features.switchable_motion_mode
1489
0
                ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1490
0
                                      cm->features.allow_warped_motion)
1491
0
                : SIMPLE_TRANSLATION;
1492
1493
0
        if (mbmi->ref_frame[1] != INTRA_FRAME) {
1494
0
          if (motion_allowed >= OBMC_CAUSAL) {
1495
0
            td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1496
0
          }
1497
0
          if (motion_allowed == WARPED_CAUSAL) {
1498
0
            td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++;
1499
0
          }
1500
0
        }
1501
0
      }
1502
0
    }
1503
11.8k
  }
1504
  // TODO(Ravi/Remya): Move this copy function to a better logical place
1505
  // This function will copy the best mode information from block
1506
  // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
1507
  // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
1508
  // bitstream preparation.
1509
55.3k
  av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext,
1510
55.3k
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1511
55.3k
  x->rdmult = origin_mult;
1512
55.3k
}
1513
1514
/*!\brief Reconstructs a partition (may contain multiple coding blocks)
1515
 *
1516
 * \ingroup partition_search
1517
 * Reconstructs a sub-partition of the superblock by applying the chosen modes
1518
 * and partition trees stored in pc_tree.
1519
 *
1520
 * \param[in]    cpi       Top-level encoder structure
1521
 * \param[in]    td        Pointer to thread data
1522
 * \param[in]    tile_data Pointer to struct holding adaptive
1523
 *                         data/contexts/models for the tile during encoding
1524
 * \param[in]    tp        Pointer to the starting token
1525
 * \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
1526
 * \param[in]    mi_col    Column coordinate of the block in a step size of
1527
 *                         MI_SIZE
1528
 * \param[in]    dry_run   A code indicating whether it is part of the final
1529
 *                         pass for reconstructing the superblock
1530
 * \param[in]    bsize     Current block size
1531
 * \param[in]    pc_tree   Pointer to the PC_TREE node storing the picked
1532
 *                         partitions and mode info for the current block
1533
 * \param[in]    rate      Pointer to the total rate for the current block
1534
 *
1535
 * \return Nothing is returned. Instead, reconstructions (w/o in-loop filters)
1536
 * will be updated in the pixel buffers in td->mb.e_mbd.
1537
 */
1538
static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
1539
                      TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
1540
                      int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
1541
64.0k
                      PC_TREE *pc_tree, int *rate) {
1542
64.0k
  assert(bsize < BLOCK_SIZES_ALL);
1543
64.0k
  const AV1_COMMON *const cm = &cpi->common;
1544
64.0k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
1545
64.0k
  MACROBLOCK *const x = &td->mb;
1546
64.0k
  MACROBLOCKD *const xd = &x->e_mbd;
1547
64.0k
  assert(bsize < BLOCK_SIZES_ALL);
1548
64.0k
  const int hbs = mi_size_wide[bsize] / 2;
1549
64.0k
  const int is_partition_root = bsize >= BLOCK_8X8;
1550
64.0k
  const int ctx = is_partition_root
1551
64.0k
                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
1552
64.0k
                      : -1;
1553
64.0k
  const PARTITION_TYPE partition = pc_tree->partitioning;
1554
64.0k
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1555
64.0k
  int quarter_step = mi_size_wide[bsize] / 4;
1556
64.0k
  int i;
1557
64.0k
  BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
1558
1559
64.0k
  if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1560
57.4k
  if (subsize == BLOCK_INVALID) return;
1561
1562
57.4k
  if (!dry_run && ctx >= 0) {
1563
13.0k
    const int has_rows = (mi_row + hbs) < mi_params->mi_rows;
1564
13.0k
    const int has_cols = (mi_col + hbs) < mi_params->mi_cols;
1565
1566
13.0k
    if (has_rows && has_cols) {
1567
#if CONFIG_ENTROPY_STATS
1568
      td->counts->partition[ctx][partition]++;
1569
#endif
1570
1571
7.38k
      if (tile_data->allow_update_cdf) {
1572
7.38k
        FRAME_CONTEXT *fc = xd->tile_ctx;
1573
7.38k
        update_cdf(fc->partition_cdf[ctx], partition,
1574
7.38k
                   partition_cdf_length(bsize));
1575
7.38k
      }
1576
7.38k
    }
1577
13.0k
  }
1578
1579
57.4k
  switch (partition) {
1580
36.9k
    case PARTITION_NONE:
1581
36.9k
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1582
36.9k
               partition, pc_tree->none, rate);
1583
36.9k
      break;
1584
9.12k
    case PARTITION_VERT:
1585
9.12k
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1586
9.12k
               partition, pc_tree->vertical[0], rate);
1587
9.12k
      if (mi_col + hbs < mi_params->mi_cols) {
1588
27
        encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1589
27
                 partition, pc_tree->vertical[1], rate);
1590
27
      }
1591
9.12k
      break;
1592
9.28k
    case PARTITION_HORZ:
1593
9.28k
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1594
9.28k
               partition, pc_tree->horizontal[0], rate);
1595
9.28k
      if (mi_row + hbs < mi_params->mi_rows) {
1596
27
        encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1597
27
                 partition, pc_tree->horizontal[1], rate);
1598
27
      }
1599
9.28k
      break;
1600
2.18k
    case PARTITION_SPLIT:
1601
2.18k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
1602
2.18k
                pc_tree->split[0], rate);
1603
2.18k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
1604
2.18k
                pc_tree->split[1], rate);
1605
2.18k
      encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
1606
2.18k
                pc_tree->split[2], rate);
1607
2.18k
      encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
1608
2.18k
                subsize, pc_tree->split[3], rate);
1609
2.18k
      break;
1610
1611
0
    case PARTITION_HORZ_A:
1612
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1613
0
               partition, pc_tree->horizontala[0], rate);
1614
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1615
0
               partition, pc_tree->horizontala[1], rate);
1616
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1617
0
               partition, pc_tree->horizontala[2], rate);
1618
0
      break;
1619
0
    case PARTITION_HORZ_B:
1620
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1621
0
               partition, pc_tree->horizontalb[0], rate);
1622
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1623
0
               partition, pc_tree->horizontalb[1], rate);
1624
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1625
0
               bsize2, partition, pc_tree->horizontalb[2], rate);
1626
0
      break;
1627
0
    case PARTITION_VERT_A:
1628
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1629
0
               partition, pc_tree->verticala[0], rate);
1630
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1631
0
               partition, pc_tree->verticala[1], rate);
1632
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1633
0
               partition, pc_tree->verticala[2], rate);
1634
1635
0
      break;
1636
0
    case PARTITION_VERT_B:
1637
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1638
0
               partition, pc_tree->verticalb[0], rate);
1639
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1640
0
               partition, pc_tree->verticalb[1], rate);
1641
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1642
0
               bsize2, partition, pc_tree->verticalb[2], rate);
1643
0
      break;
1644
0
    case PARTITION_HORZ_4:
1645
0
      for (i = 0; i < SUB_PARTITIONS_PART4; ++i) {
1646
0
        int this_mi_row = mi_row + i * quarter_step;
1647
0
        if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
1648
1649
0
        encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
1650
0
                 partition, pc_tree->horizontal4[i], rate);
1651
0
      }
1652
0
      break;
1653
0
    case PARTITION_VERT_4:
1654
0
      for (i = 0; i < SUB_PARTITIONS_PART4; ++i) {
1655
0
        int this_mi_col = mi_col + i * quarter_step;
1656
0
        if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
1657
0
        encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
1658
0
                 partition, pc_tree->vertical4[i], rate);
1659
0
      }
1660
0
      break;
1661
0
    default: assert(0 && "Invalid partition type."); break;
1662
57.4k
  }
1663
1664
57.4k
  update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
1665
57.4k
}
1666
1667
static AOM_INLINE int is_adjust_var_based_part_enabled(
1668
    AV1_COMMON *const cm, const PARTITION_SPEED_FEATURES *const part_sf,
1669
0
    BLOCK_SIZE bsize) {
1670
0
  if (part_sf->partition_search_type != VAR_BASED_PARTITION) return 0;
1671
0
  if (part_sf->adjust_var_based_rd_partitioning == 0 ||
1672
0
      part_sf->adjust_var_based_rd_partitioning > 2)
1673
0
    return 0;
1674
1675
0
  if (bsize <= BLOCK_32X32) return 1;
1676
0
  if (part_sf->adjust_var_based_rd_partitioning == 2) {
1677
0
    const int is_larger_qindex = cm->quant_params.base_qindex > 190;
1678
0
    const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
1679
0
    return is_360p_or_larger && is_larger_qindex && bsize == BLOCK_64X64;
1680
0
  }
1681
0
  return 0;
1682
0
}
1683
1684
/*!\brief AV1 block partition search (partition estimation and partial search).
1685
*
1686
* \ingroup partition_search
1687
* Encode the block by applying pre-calculated partition patterns that are
1688
* represented by coding block sizes stored in the mbmi array. Minor partition
1689
* adjustments are tested and applied if they lead to lower rd costs. The
1690
* partition types are limited to a basic set: none, horz, vert, and split.
1691
*
1692
* \param[in]    cpi       Top-level encoder structure
1693
* \param[in]    td        Pointer to thread data
1694
* \param[in]    tile_data Pointer to struct holding adaptive
1695
data/contexts/models for the tile during encoding
1696
* \param[in]    mib       Array representing MB_MODE_INFO pointers for mi
1697
blocks starting from the first pixel of the current
1698
block
1699
* \param[in]    tp        Pointer to the starting token
1700
* \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
1701
* \param[in]    mi_col    Column coordinate of the block in a step size of
1702
MI_SIZE
1703
* \param[in]    bsize     Current block size
1704
* \param[in]    rate      Pointer to the final rate for encoding the current
1705
block
1706
* \param[in]    dist      Pointer to the final distortion of the current block
1707
* \param[in]    do_recon  Whether the reconstruction function needs to be run,
1708
either for finalizing a superblock or providing
1709
reference for future sub-partitions
1710
* \param[in]    pc_tree   Pointer to the PC_TREE node holding the picked
1711
partitions and mode info for the current block
1712
*
1713
* \return Nothing is returned. The pc_tree struct is modified to store the
1714
* picked partition and modes. The rate and dist are also updated with those
1715
* corresponding to the best partition found.
1716
*/
1717
void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
1718
                          MB_MODE_INFO **mib, TokenExtra **tp, int mi_row,
1719
                          int mi_col, BLOCK_SIZE bsize, int *rate,
1720
0
                          int64_t *dist, int do_recon, PC_TREE *pc_tree) {
1721
0
  AV1_COMMON *const cm = &cpi->common;
1722
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
1723
0
  const int num_planes = av1_num_planes(cm);
1724
0
  TileInfo *const tile_info = &tile_data->tile_info;
1725
0
  MACROBLOCK *const x = &td->mb;
1726
0
  MACROBLOCKD *const xd = &x->e_mbd;
1727
0
  const ModeCosts *mode_costs = &x->mode_costs;
1728
0
  const int bs = mi_size_wide[bsize];
1729
0
  const int hbs = bs / 2;
1730
0
  const int pl = (bsize >= BLOCK_8X8)
1731
0
                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
1732
0
                     : 0;
1733
0
  const PARTITION_TYPE partition =
1734
0
      (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1735
0
                           : PARTITION_NONE;
1736
0
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1737
0
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1738
0
  RD_STATS last_part_rdc, none_rdc, chosen_rdc, invalid_rdc;
1739
0
  BLOCK_SIZE bs_type = mib[0]->bsize;
1740
0
  int use_partition_none = 0;
1741
0
  x->try_merge_partition = 0;
1742
1743
0
  if (pc_tree->none == NULL) {
1744
0
    pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
1745
0
  }
1746
0
  PICK_MODE_CONTEXT *ctx_none = pc_tree->none;
1747
1748
0
  if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1749
1750
0
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1751
  // In rt mode, currently the min partition size is BLOCK_8X8.
1752
0
  assert(bsize >= cpi->sf.part_sf.default_min_partition_size);
1753
1754
0
  av1_invalid_rd_stats(&last_part_rdc);
1755
0
  av1_invalid_rd_stats(&none_rdc);
1756
0
  av1_invalid_rd_stats(&chosen_rdc);
1757
0
  av1_invalid_rd_stats(&invalid_rdc);
1758
1759
0
  pc_tree->partitioning = partition;
1760
1761
0
  xd->above_txfm_context =
1762
0
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
1763
0
  xd->left_txfm_context =
1764
0
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1765
0
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1766
1767
0
  if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1768
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1769
0
    x->mb_energy = av1_log_block_var(cpi, x, bsize);
1770
0
  }
1771
1772
  // Save rdmult before it might be changed, so it can be restored later.
1773
0
  const int orig_rdmult = x->rdmult;
1774
0
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1775
1776
0
  if (partition != PARTITION_NONE &&
1777
0
      is_adjust_var_based_part_enabled(cm, &cpi->sf.part_sf, bsize) &&
1778
0
      (mi_row + hbs < mi_params->mi_rows &&
1779
0
       mi_col + hbs < mi_params->mi_cols)) {
1780
0
    assert(bsize > cpi->sf.part_sf.default_min_partition_size);
1781
0
    mib[0]->bsize = bsize;
1782
0
    pc_tree->partitioning = PARTITION_NONE;
1783
0
    x->try_merge_partition = 1;
1784
0
    pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, PARTITION_NONE,
1785
0
                  bsize, ctx_none, invalid_rdc);
1786
1787
0
    if (none_rdc.rate < INT_MAX) {
1788
0
      none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
1789
0
      none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
1790
0
    }
1791
1792
    // Try to skip split partition evaluation based on none partition
1793
    // characteristics.
1794
0
    if (none_rdc.rate < INT_MAX && none_rdc.skip_txfm == 1) {
1795
0
      use_partition_none = 1;
1796
0
    }
1797
1798
0
    av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1799
0
    mib[0]->bsize = bs_type;
1800
0
    pc_tree->partitioning = partition;
1801
0
  }
1802
1803
0
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
1804
0
    pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
1805
0
    pc_tree->split[i]->index = i;
1806
0
  }
1807
0
  switch (partition) {
1808
0
    case PARTITION_NONE:
1809
0
      pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1810
0
                    PARTITION_NONE, bsize, ctx_none, invalid_rdc);
1811
0
      break;
1812
0
    case PARTITION_HORZ:
1813
0
      if (use_partition_none) {
1814
0
        av1_invalid_rd_stats(&last_part_rdc);
1815
0
        break;
1816
0
      }
1817
1818
0
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
1819
0
        pc_tree->horizontal[i] =
1820
0
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
1821
0
      }
1822
0
      pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1823
0
                    PARTITION_HORZ, subsize, pc_tree->horizontal[0],
1824
0
                    invalid_rdc);
1825
0
      if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1826
0
          mi_row + hbs < mi_params->mi_rows) {
1827
0
        RD_STATS tmp_rdc;
1828
0
        const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0];
1829
0
        av1_init_rd_stats(&tmp_rdc);
1830
0
        av1_update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
1831
0
        encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
1832
0
                          NULL);
1833
0
        pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
1834
0
                      PARTITION_HORZ, subsize, pc_tree->horizontal[1],
1835
0
                      invalid_rdc);
1836
0
        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1837
0
          av1_invalid_rd_stats(&last_part_rdc);
1838
0
          break;
1839
0
        }
1840
0
        last_part_rdc.rate += tmp_rdc.rate;
1841
0
        last_part_rdc.dist += tmp_rdc.dist;
1842
0
        last_part_rdc.rdcost += tmp_rdc.rdcost;
1843
0
      }
1844
0
      break;
1845
0
    case PARTITION_VERT:
1846
0
      if (use_partition_none) {
1847
0
        av1_invalid_rd_stats(&last_part_rdc);
1848
0
        break;
1849
0
      }
1850
1851
0
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
1852
0
        pc_tree->vertical[i] =
1853
0
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
1854
0
      }
1855
0
      pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1856
0
                    PARTITION_VERT, subsize, pc_tree->vertical[0], invalid_rdc);
1857
0
      if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1858
0
          mi_col + hbs < mi_params->mi_cols) {
1859
0
        RD_STATS tmp_rdc;
1860
0
        const PICK_MODE_CONTEXT *const ctx_v = pc_tree->vertical[0];
1861
0
        av1_init_rd_stats(&tmp_rdc);
1862
0
        av1_update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
1863
0
        encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
1864
0
                          NULL);
1865
0
        pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
1866
0
                      PARTITION_VERT, subsize,
1867
0
                      pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc);
1868
0
        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1869
0
          av1_invalid_rd_stats(&last_part_rdc);
1870
0
          break;
1871
0
        }
1872
0
        last_part_rdc.rate += tmp_rdc.rate;
1873
0
        last_part_rdc.dist += tmp_rdc.dist;
1874
0
        last_part_rdc.rdcost += tmp_rdc.rdcost;
1875
0
      }
1876
0
      break;
1877
0
    case PARTITION_SPLIT:
1878
0
      if (use_partition_none) {
1879
0
        av1_invalid_rd_stats(&last_part_rdc);
1880
0
        break;
1881
0
      }
1882
1883
0
      last_part_rdc.rate = 0;
1884
0
      last_part_rdc.dist = 0;
1885
0
      last_part_rdc.rdcost = 0;
1886
0
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
1887
0
        int x_idx = (i & 1) * hbs;
1888
0
        int y_idx = (i >> 1) * hbs;
1889
0
        int jj = i >> 1, ii = i & 0x01;
1890
0
        RD_STATS tmp_rdc;
1891
0
        if ((mi_row + y_idx >= mi_params->mi_rows) ||
1892
0
            (mi_col + x_idx >= mi_params->mi_cols))
1893
0
          continue;
1894
1895
0
        av1_init_rd_stats(&tmp_rdc);
1896
0
        av1_rd_use_partition(
1897
0
            cpi, td, tile_data,
1898
0
            mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
1899
0
            mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
1900
0
            &tmp_rdc.dist, i != (SUB_PARTITIONS_SPLIT - 1), pc_tree->split[i]);
1901
0
        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1902
0
          av1_invalid_rd_stats(&last_part_rdc);
1903
0
          break;
1904
0
        }
1905
0
        last_part_rdc.rate += tmp_rdc.rate;
1906
0
        last_part_rdc.dist += tmp_rdc.dist;
1907
0
      }
1908
0
      break;
1909
0
    case PARTITION_VERT_A:
1910
0
    case PARTITION_VERT_B:
1911
0
    case PARTITION_HORZ_A:
1912
0
    case PARTITION_HORZ_B:
1913
0
    case PARTITION_HORZ_4:
1914
0
    case PARTITION_VERT_4:
1915
0
      assert(0 && "Cannot handle extended partition types");
1916
0
    default: assert(0); break;
1917
0
  }
1918
1919
0
  if (last_part_rdc.rate < INT_MAX) {
1920
0
    last_part_rdc.rate += mode_costs->partition_cost[pl][partition];
1921
0
    last_part_rdc.rdcost =
1922
0
        RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
1923
0
  }
1924
1925
0
  if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
1926
0
       cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) &&
1927
0
      partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
1928
0
      (mi_row + bs < mi_params->mi_rows ||
1929
0
       mi_row + hbs == mi_params->mi_rows) &&
1930
0
      (mi_col + bs < mi_params->mi_cols ||
1931
0
       mi_col + hbs == mi_params->mi_cols)) {
1932
0
    BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
1933
0
    chosen_rdc.rate = 0;
1934
0
    chosen_rdc.dist = 0;
1935
1936
0
    av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1937
0
    pc_tree->partitioning = PARTITION_SPLIT;
1938
1939
    // Split partition.
1940
0
    for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
1941
0
      int x_idx = (i & 1) * hbs;
1942
0
      int y_idx = (i >> 1) * hbs;
1943
0
      RD_STATS tmp_rdc;
1944
1945
0
      if ((mi_row + y_idx >= mi_params->mi_rows) ||
1946
0
          (mi_col + x_idx >= mi_params->mi_cols))
1947
0
        continue;
1948
1949
0
      av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1950
0
      pc_tree->split[i]->partitioning = PARTITION_NONE;
1951
0
      if (pc_tree->split[i]->none == NULL)
1952
0
        pc_tree->split[i]->none =
1953
0
            av1_alloc_pmc(cpi, split_subsize, &td->shared_coeff_buf);
1954
0
      pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
1955
0
                    PARTITION_SPLIT, split_subsize, pc_tree->split[i]->none,
1956
0
                    invalid_rdc);
1957
1958
0
      av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1959
0
      if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1960
0
        av1_invalid_rd_stats(&chosen_rdc);
1961
0
        break;
1962
0
      }
1963
1964
0
      chosen_rdc.rate += tmp_rdc.rate;
1965
0
      chosen_rdc.dist += tmp_rdc.dist;
1966
1967
0
      if (i != SUB_PARTITIONS_SPLIT - 1)
1968
0
        encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
1969
0
                  OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
1970
1971
0
      chosen_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
1972
0
    }
1973
0
    if (chosen_rdc.rate < INT_MAX) {
1974
0
      chosen_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
1975
0
      chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
1976
0
    }
1977
0
  }
1978
1979
  // If last_part is better set the partitioning to that.
1980
0
  if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
1981
0
    mib[0]->bsize = bs_type;
1982
0
    if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
1983
1984
0
    chosen_rdc = last_part_rdc;
1985
0
  }
1986
  // If none was better set the partitioning to that.
1987
0
  if (none_rdc.rdcost < INT64_MAX &&
1988
0
      none_rdc.rdcost - (none_rdc.rdcost >> 9) < chosen_rdc.rdcost) {
1989
0
    mib[0]->bsize = bsize;
1990
0
    if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
1991
0
    chosen_rdc = none_rdc;
1992
0
  }
1993
1994
0
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1995
1996
  // We must have chosen a partitioning and encoding or we'll fail later on.
1997
  // No other opportunities for success.
1998
0
  if (bsize == cm->seq_params->sb_size)
1999
0
    assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2000
2001
#if CONFIG_COLLECT_COMPONENT_TIMING
2002
  start_timing(cpi, encode_sb_time);
2003
#endif
2004
0
  if (do_recon) {
2005
0
    if (bsize == cm->seq_params->sb_size) {
2006
      // NOTE: To get estimate for rate due to the tokens, use:
2007
      // int rate_coeffs = 0;
2008
      // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
2009
      //           bsize, pc_tree, &rate_coeffs);
2010
0
      set_cb_offsets(x->cb_offset, 0, 0);
2011
0
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
2012
0
                pc_tree, NULL);
2013
0
    } else {
2014
0
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2015
0
                pc_tree, NULL);
2016
0
    }
2017
0
  }
2018
#if CONFIG_COLLECT_COMPONENT_TIMING
2019
  end_timing(cpi, encode_sb_time);
2020
#endif
2021
2022
0
  *rate = chosen_rdc.rate;
2023
0
  *dist = chosen_rdc.dist;
2024
0
  x->rdmult = orig_rdmult;
2025
0
}
2026
2027
static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
2028
                           ThreadData *td, TokenExtra **tp, int mi_row,
2029
                           int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
2030
                           PARTITION_TYPE partition,
2031
0
                           PICK_MODE_CONTEXT *const ctx, int *rate) {
2032
0
  const AV1_COMMON *const cm = &cpi->common;
2033
0
  TileInfo *const tile = &tile_data->tile_info;
2034
0
  MACROBLOCK *const x = &td->mb;
2035
0
  MACROBLOCKD *xd = &x->e_mbd;
2036
0
  av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
2037
0
  const int origin_mult = x->rdmult;
2038
0
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
2039
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2040
0
  mbmi->partition = partition;
2041
0
  av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
2042
0
  const int subsampling_x = cpi->common.seq_params->subsampling_x;
2043
0
  const int subsampling_y = cpi->common.seq_params->subsampling_y;
2044
0
  if (!dry_run) {
2045
0
    set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y],
2046
0
                   x->cb_offset[PLANE_TYPE_UV]);
2047
0
    assert(x->cb_offset[PLANE_TYPE_Y] <
2048
0
           (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]));
2049
0
    assert(x->cb_offset[PLANE_TYPE_UV] <
2050
0
           ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >>
2051
0
            (subsampling_x + subsampling_y)));
2052
0
  }
2053
0
  encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
2054
0
  if (!dry_run) {
2055
0
    update_cb_offsets(x, bsize, subsampling_x, subsampling_y);
2056
0
    if (has_second_ref(mbmi)) {
2057
0
      if (mbmi->compound_idx == 0 ||
2058
0
          mbmi->interinter_comp.type == COMPOUND_AVERAGE)
2059
0
        mbmi->comp_group_idx = 0;
2060
0
      else
2061
0
        mbmi->comp_group_idx = 1;
2062
0
      mbmi->compound_idx = 1;
2063
0
    }
2064
0
    RD_COUNTS *const rdc = &td->rd_counts;
2065
0
    if (mbmi->skip_mode) {
2066
0
      assert(!frame_is_intra_only(cm));
2067
0
      rdc->skip_mode_used_flag = 1;
2068
0
      if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
2069
0
          has_second_ref(mbmi)) {
2070
0
        rdc->compound_ref_used_flag = 1;
2071
0
      }
2072
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2073
0
    } else {
2074
0
      const int seg_ref_active =
2075
0
          segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
2076
0
      if (!seg_ref_active) {
2077
        // If the segment reference feature is enabled we have only a single
2078
        // reference frame allowed for the segment so exclude it from
2079
        // the reference frame counts used to work out probabilities.
2080
0
        if (is_inter_block(mbmi)) {
2081
0
          av1_collect_neighbors_ref_counts(xd);
2082
0
          if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
2083
0
              has_second_ref(mbmi)) {
2084
            // This flag is also updated for 4x4 blocks
2085
0
            rdc->compound_ref_used_flag = 1;
2086
0
          }
2087
0
          set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2088
0
        }
2089
0
      }
2090
0
    }
2091
0
    if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY &&
2092
0
        (mbmi->mode == NEWMV || mbmi->mode < INTRA_MODE_END)) {
2093
0
      int32_t blocks = mi_size_high[bsize] * mi_size_wide[bsize];
2094
0
      rdc->newmv_or_intra_blocks += blocks;
2095
0
    }
2096
0
    if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
2097
0
  }
2098
0
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm &&
2099
0
      !cpi->rc.rtc_external_ratectrl)
2100
0
    av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize);
2101
  // TODO(Ravi/Remya): Move this copy function to a better logical place
2102
  // This function will copy the best mode information from block
2103
  // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
2104
  // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
2105
  // bitstream preparation.
2106
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext,
2107
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
2108
0
  x->rdmult = origin_mult;
2109
0
}
2110
2111
/*!\brief Top level function to pick block mode for non-RD optimized case
2112
 *
2113
 * \ingroup partition_search
2114
 * \callgraph
2115
 * \callergraph
2116
 * Searches prediction modes, transform, and coefficient coding modes for an
2117
 * individual coding block. This function is the top-level function that is
2118
 * used for non-RD optimized mode search (controlled by
2119
 * \c cpi->sf.rt_sf.use_nonrd_pick_mode). Depending on frame type it calls
2120
 * inter/skip/hybrid-intra mode search functions
2121
 *
2122
 * \param[in]    cpi            Top-level encoder structure
2123
 * \param[in]    tile_data      Pointer to struct holding adaptive
2124
 *                              data/contexts/models for the tile during
2125
 *                              encoding
2126
 * \param[in]    x              Pointer to structure holding all the data for
2127
 *                              the current macroblock
2128
 * \param[in]    mi_row         Row coordinate of the block in a step size of
2129
 *                              MI_SIZE
2130
 * \param[in]    mi_col         Column coordinate of the block in a step size of
2131
 *                              MI_SIZE
2132
 * \param[in]    rd_cost        Pointer to structure holding rate and distortion
2133
 *                              stats for the current block
2134
 * \param[in]    bsize          Current block size
2135
 * \param[in]    ctx            Pointer to structure holding coding contexts and
2136
 *                              chosen modes for the current block
2137
 *
2138
 * \return Nothing is returned. Instead, the chosen modes and contexts necessary
2139
 * for reconstruction are stored in ctx, the rate-distortion stats are stored in
2140
 * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be
2141
 * signalled by an INT64_MAX rd_cost->rdcost.
2142
 */
2143
static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
2144
                                MACROBLOCK *const x, int mi_row, int mi_col,
2145
                                RD_STATS *rd_cost, BLOCK_SIZE bsize,
2146
0
                                PICK_MODE_CONTEXT *ctx) {
2147
0
  av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
2148
0
  AV1_COMMON *const cm = &cpi->common;
2149
0
  const int num_planes = av1_num_planes(cm);
2150
0
  MACROBLOCKD *const xd = &x->e_mbd;
2151
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2152
0
  struct macroblock_plane *const p = x->plane;
2153
0
  struct macroblockd_plane *const pd = xd->plane;
2154
0
  const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode;
2155
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2156
0
  int i;
2157
2158
  // This is only needed for real time/allintra row-mt enabled multi-threaded
2159
  // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
2160
0
  wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
2161
0
                        &tile_data->tile_info, cm->seq_params->sb_size,
2162
0
                        cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
2163
2164
#if CONFIG_COLLECT_COMPONENT_TIMING
2165
  start_timing(cpi, rd_pick_sb_modes_time);
2166
#endif
2167
  // Sets up the tx_type_map buffer in MACROBLOCKD.
2168
0
  xd->tx_type_map = txfm_info->tx_type_map_;
2169
0
  xd->tx_type_map_stride = mi_size_wide[bsize];
2170
0
  for (i = 0; i < num_planes; ++i) {
2171
0
    p[i].coeff = ctx->coeff[i];
2172
0
    p[i].qcoeff = ctx->qcoeff[i];
2173
0
    p[i].dqcoeff = ctx->dqcoeff[i];
2174
0
    p[i].eobs = ctx->eobs[i];
2175
0
    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
2176
0
  }
2177
0
  for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
2178
0
  if (is_cur_buf_hbd(xd)) {
2179
0
    x->source_variance = av1_high_get_sby_perpixel_variance(
2180
0
        cpi, &x->plane[0].src, bsize, xd->bd);
2181
0
  } else {
2182
0
    x->source_variance =
2183
0
        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
2184
0
  }
2185
  // Save rdmult before it might be changed, so it can be restored later.
2186
0
  const int orig_rdmult = x->rdmult;
2187
0
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
2188
  // Set error per bit for current rdmult
2189
0
  av1_set_error_per_bit(&x->errorperbit, x->rdmult);
2190
  // Find best coding mode & reconstruct the MB so it is available
2191
  // as a predictor for MBs that follow in the SB
2192
0
  if (frame_is_intra_only(cm)) {
2193
#if CONFIG_COLLECT_COMPONENT_TIMING
2194
    start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
2195
#endif
2196
0
    hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
2197
#if CONFIG_COLLECT_COMPONENT_TIMING
2198
    end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
2199
#endif
2200
0
  } else {
2201
#if CONFIG_COLLECT_COMPONENT_TIMING
2202
    start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
2203
#endif
2204
0
    if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
2205
0
      RD_STATS invalid_rd;
2206
0
      av1_invalid_rd_stats(&invalid_rd);
2207
      // TODO(kyslov): add av1_nonrd_pick_inter_mode_sb_seg_skip
2208
0
      av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
2209
0
                                         rd_cost, bsize, ctx,
2210
0
                                         invalid_rd.rdcost);
2211
0
    } else {
2212
0
      av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx);
2213
0
    }
2214
#if CONFIG_COLLECT_COMPONENT_TIMING
2215
    end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
2216
#endif
2217
0
  }
2218
0
  if (cpi->sf.rt_sf.skip_cdef_sb) {
2219
    // Find the corresponding 64x64 block. It'll be the 128x128 block if that's
2220
    // the block size.
2221
0
    const int mi_row_sb = mi_row - mi_row % MI_SIZE_64X64;
2222
0
    const int mi_col_sb = mi_col - mi_col % MI_SIZE_64X64;
2223
0
    MB_MODE_INFO **mi_sb =
2224
0
        cm->mi_params.mi_grid_base +
2225
0
        get_mi_grid_idx(&cm->mi_params, mi_row_sb, mi_col_sb);
2226
    // Do not skip if intra or new mv is picked, or color sensitivity is set.
2227
0
    mi_sb[0]->skip_cdef_curr_sb =
2228
0
        mi_sb[0]->skip_cdef_curr_sb &&
2229
0
        !(x->color_sensitivity[0] || x->color_sensitivity[1]) &&
2230
0
        !(mbmi->mode < INTRA_MODES || mbmi->mode == NEWMV);
2231
    // Store in the pickmode context.
2232
0
    ctx->mic.skip_cdef_curr_sb = mi_sb[0]->skip_cdef_curr_sb;
2233
0
  }
2234
0
  x->rdmult = orig_rdmult;
2235
0
  ctx->rd_stats.rate = rd_cost->rate;
2236
0
  ctx->rd_stats.dist = rd_cost->dist;
2237
0
  ctx->rd_stats.rdcost = rd_cost->rdcost;
2238
#if CONFIG_COLLECT_COMPONENT_TIMING
2239
  end_timing(cpi, rd_pick_sb_modes_time);
2240
#endif
2241
0
}
2242
2243
/*!\brief AV1 block partition application (minimal RD search).
2244
*
2245
* \ingroup partition_search
2246
* \callgraph
2247
* \callergraph
2248
* Encode the block by applying pre-calculated partition patterns that are
2249
* represented by coding block sizes stored in the mbmi array. The only
2250
* partition adjustment allowed is merging leaf split nodes if it leads to a
2251
* lower rd cost. The partition types are limited to a basic set: none, horz,
2252
* vert, and split. This function is only used in the real-time mode.
2253
*
2254
* \param[in]    cpi       Top-level encoder structure
2255
* \param[in]    td        Pointer to thread data
2256
* \param[in]    tile_data Pointer to struct holding adaptive
2257
data/contexts/models for the tile during encoding
2258
* \param[in]    mib       Array representing MB_MODE_INFO pointers for mi
2259
blocks starting from the first pixel of the current
2260
block
2261
* \param[in]    tp        Pointer to the starting token
2262
* \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
2263
* \param[in]    mi_col    Column coordinate of the block in a step size of
2264
MI_SIZE
2265
* \param[in]    bsize     Current block size
2266
* \param[in]    pc_tree   Pointer to the PC_TREE node holding the picked
2267
partitions and mode info for the current block
2268
*
2269
* \return Nothing is returned. The pc_tree struct is modified to store the
2270
* picked partition and modes.
2271
*/
2272
void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
2273
                             TileDataEnc *tile_data, MB_MODE_INFO **mib,
2274
                             TokenExtra **tp, int mi_row, int mi_col,
2275
0
                             BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2276
0
  AV1_COMMON *const cm = &cpi->common;
2277
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
2278
0
  TileInfo *const tile_info = &tile_data->tile_info;
2279
0
  MACROBLOCK *const x = &td->mb;
2280
0
  MACROBLOCKD *const xd = &x->e_mbd;
2281
0
  const ModeCosts *mode_costs = &x->mode_costs;
2282
  // Only square blocks from 8x8 to 128x128 are supported
2283
0
  assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128);
2284
0
  const int bs = mi_size_wide[bsize];
2285
0
  const int hbs = bs / 2;
2286
0
  const PARTITION_TYPE partition =
2287
0
      (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
2288
0
                           : PARTITION_NONE;
2289
0
  BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
2290
0
  assert(subsize <= BLOCK_LARGEST);
2291
0
  const int pl = (bsize >= BLOCK_8X8)
2292
0
                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
2293
0
                     : 0;
2294
2295
0
  RD_STATS dummy_cost;
2296
0
  av1_invalid_rd_stats(&dummy_cost);
2297
2298
0
  if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
2299
2300
0
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2301
2302
0
  pc_tree->partitioning = partition;
2303
2304
0
  xd->above_txfm_context =
2305
0
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2306
0
  xd->left_txfm_context =
2307
0
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2308
2309
  // Initialize default mode evaluation params
2310
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
2311
2312
0
  switch (partition) {
2313
0
    case PARTITION_NONE:
2314
0
      pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
2315
0
      if (cpi->sf.rt_sf.nonrd_check_partition_split && do_split_check(bsize) &&
2316
0
          !frame_is_intra_only(cm)) {
2317
0
        RD_STATS split_rdc, none_rdc, block_rdc;
2318
0
        RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2319
2320
0
        av1_init_rd_stats(&split_rdc);
2321
0
        av1_invalid_rd_stats(&none_rdc);
2322
2323
0
        av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2324
0
        subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2325
0
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
2326
0
                            pc_tree->none);
2327
0
        none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
2328
0
        none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2329
0
        av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2330
2331
0
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2332
0
          av1_invalid_rd_stats(&block_rdc);
2333
0
          const int x_idx = (i & 1) * hbs;
2334
0
          const int y_idx = (i >> 1) * hbs;
2335
0
          if (mi_row + y_idx >= mi_params->mi_rows ||
2336
0
              mi_col + x_idx >= mi_params->mi_cols)
2337
0
            continue;
2338
0
          xd->above_txfm_context =
2339
0
              cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2340
0
          xd->left_txfm_context =
2341
0
              xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
2342
0
          pc_tree->split[i]->partitioning = PARTITION_NONE;
2343
0
          pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2344
0
                              &block_rdc, subsize, pc_tree->split[i]->none);
2345
0
          split_rdc.rate += block_rdc.rate;
2346
0
          split_rdc.dist += block_rdc.dist;
2347
2348
0
          encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx,
2349
0
                         1, subsize, PARTITION_NONE, pc_tree->split[i]->none,
2350
0
                         NULL);
2351
0
        }
2352
0
        split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
2353
0
        split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2354
0
        av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2355
2356
0
        if (none_rdc.rdcost < split_rdc.rdcost) {
2357
0
          mib[0]->bsize = bsize;
2358
0
          pc_tree->partitioning = PARTITION_NONE;
2359
0
          encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize,
2360
0
                         partition, pc_tree->none, NULL);
2361
0
        } else {
2362
0
          mib[0]->bsize = subsize;
2363
0
          pc_tree->partitioning = PARTITION_SPLIT;
2364
0
          for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2365
0
            const int x_idx = (i & 1) * hbs;
2366
0
            const int y_idx = (i >> 1) * hbs;
2367
0
            if (mi_row + y_idx >= mi_params->mi_rows ||
2368
0
                mi_col + x_idx >= mi_params->mi_cols)
2369
0
              continue;
2370
0
            encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx,
2371
0
                           mi_col + x_idx, 0, subsize, PARTITION_NONE,
2372
0
                           pc_tree->split[i]->none, NULL);
2373
0
          }
2374
0
        }
2375
2376
0
      } else {
2377
0
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2378
0
                            bsize, pc_tree->none);
2379
0
        encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize,
2380
0
                       partition, pc_tree->none, NULL);
2381
0
      }
2382
0
      break;
2383
0
    case PARTITION_VERT:
2384
0
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
2385
0
        pc_tree->vertical[i] =
2386
0
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2387
0
      }
2388
0
      pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2389
0
                          subsize, pc_tree->vertical[0]);
2390
0
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
2391
0
                     PARTITION_VERT, pc_tree->vertical[0], NULL);
2392
0
      if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) {
2393
0
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col + hbs,
2394
0
                            &dummy_cost, subsize, pc_tree->vertical[1]);
2395
0
        encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
2396
0
                       PARTITION_VERT, pc_tree->vertical[1], NULL);
2397
0
      }
2398
0
      break;
2399
0
    case PARTITION_HORZ:
2400
0
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
2401
0
        pc_tree->horizontal[i] =
2402
0
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2403
0
      }
2404
0
      pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2405
0
                          subsize, pc_tree->horizontal[0]);
2406
0
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
2407
0
                     PARTITION_HORZ, pc_tree->horizontal[0], NULL);
2408
2409
0
      if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) {
2410
0
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + hbs, mi_col,
2411
0
                            &dummy_cost, subsize, pc_tree->horizontal[1]);
2412
0
        encode_b_nonrd(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
2413
0
                       PARTITION_HORZ, pc_tree->horizontal[1], NULL);
2414
0
      }
2415
0
      break;
2416
0
    case PARTITION_SPLIT:
2417
0
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
2418
0
        pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
2419
0
        pc_tree->split[i]->index = i;
2420
0
      }
2421
0
      if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
2422
0
          av1_is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
2423
0
          !frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
2424
0
        RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2425
0
        RD_STATS split_rdc, none_rdc;
2426
0
        av1_invalid_rd_stats(&split_rdc);
2427
0
        av1_invalid_rd_stats(&none_rdc);
2428
0
        av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2429
0
        xd->above_txfm_context =
2430
0
            cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2431
0
        xd->left_txfm_context =
2432
0
            xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2433
0
        pc_tree->partitioning = PARTITION_NONE;
2434
0
        pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
2435
0
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
2436
0
                            pc_tree->none);
2437
0
        none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
2438
0
        none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2439
0
        av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2440
0
        if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode != 2 ||
2441
0
            none_rdc.skip_txfm != 1 || pc_tree->none->mic.mode == NEWMV) {
2442
0
          av1_init_rd_stats(&split_rdc);
2443
0
          for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2444
0
            RD_STATS block_rdc;
2445
0
            av1_invalid_rd_stats(&block_rdc);
2446
0
            int x_idx = (i & 1) * hbs;
2447
0
            int y_idx = (i >> 1) * hbs;
2448
0
            if ((mi_row + y_idx >= mi_params->mi_rows) ||
2449
0
                (mi_col + x_idx >= mi_params->mi_cols))
2450
0
              continue;
2451
0
            xd->above_txfm_context =
2452
0
                cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2453
0
            xd->left_txfm_context = xd->left_txfm_context_buffer +
2454
0
                                    ((mi_row + y_idx) & MAX_MIB_MASK);
2455
0
            if (pc_tree->split[i]->none == NULL)
2456
0
              pc_tree->split[i]->none =
2457
0
                  av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2458
0
            pc_tree->split[i]->partitioning = PARTITION_NONE;
2459
0
            pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx,
2460
0
                                mi_col + x_idx, &block_rdc, subsize,
2461
0
                                pc_tree->split[i]->none);
2462
0
            split_rdc.rate += block_rdc.rate;
2463
0
            split_rdc.dist += block_rdc.dist;
2464
2465
0
            encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx,
2466
0
                           mi_col + x_idx, 1, subsize, PARTITION_NONE,
2467
0
                           pc_tree->split[i]->none, NULL);
2468
0
          }
2469
0
          av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2470
0
          split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
2471
0
          split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2472
0
        }
2473
0
        if (none_rdc.rdcost < split_rdc.rdcost) {
2474
0
          mib[0]->bsize = bsize;
2475
0
          pc_tree->partitioning = PARTITION_NONE;
2476
0
          encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize,
2477
0
                         partition, pc_tree->none, NULL);
2478
0
        } else {
2479
0
          mib[0]->bsize = subsize;
2480
0
          pc_tree->partitioning = PARTITION_SPLIT;
2481
0
          for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2482
0
            int x_idx = (i & 1) * hbs;
2483
0
            int y_idx = (i >> 1) * hbs;
2484
0
            if ((mi_row + y_idx >= mi_params->mi_rows) ||
2485
0
                (mi_col + x_idx >= mi_params->mi_cols))
2486
0
              continue;
2487
2488
0
            if (pc_tree->split[i]->none == NULL)
2489
0
              pc_tree->split[i]->none =
2490
0
                  av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2491
0
            encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx,
2492
0
                           mi_col + x_idx, 0, subsize, PARTITION_NONE,
2493
0
                           pc_tree->split[i]->none, NULL);
2494
0
          }
2495
0
        }
2496
0
      } else {
2497
0
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2498
0
          int x_idx = (i & 1) * hbs;
2499
0
          int y_idx = (i >> 1) * hbs;
2500
0
          int jj = i >> 1, ii = i & 0x01;
2501
0
          if ((mi_row + y_idx >= mi_params->mi_rows) ||
2502
0
              (mi_col + x_idx >= mi_params->mi_cols))
2503
0
            continue;
2504
0
          av1_nonrd_use_partition(
2505
0
              cpi, td, tile_data,
2506
0
              mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
2507
0
              mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
2508
0
        }
2509
0
      }
2510
0
      break;
2511
0
    case PARTITION_VERT_A:
2512
0
    case PARTITION_VERT_B:
2513
0
    case PARTITION_HORZ_A:
2514
0
    case PARTITION_HORZ_B:
2515
0
    case PARTITION_HORZ_4:
2516
0
    case PARTITION_VERT_4:
2517
0
      assert(0 && "Cannot handle extended partition types");
2518
0
    default: assert(0); break;
2519
0
  }
2520
0
}
2521
2522
#if !CONFIG_REALTIME_ONLY
2523
// Try searching for an encoding for the given subblock. Returns zero if the
2524
// rdcost is already too high (to tell the caller not to bother searching for
2525
// encodings of further subblocks).
2526
static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
2527
                           TileDataEnc *tile_data, TokenExtra **tp, int is_last,
2528
                           int mi_row, int mi_col, BLOCK_SIZE subsize,
2529
                           RD_STATS best_rdcost, RD_STATS *sum_rdc,
2530
                           PARTITION_TYPE partition,
2531
76
                           PICK_MODE_CONTEXT *this_ctx) {
2532
76
  MACROBLOCK *const x = &td->mb;
2533
76
  const int orig_mult = x->rdmult;
2534
76
  setup_block_rdmult(cpi, x, mi_row, mi_col, subsize, NO_AQ, NULL);
2535
2536
76
  av1_rd_cost_update(x->rdmult, &best_rdcost);
2537
2538
76
  RD_STATS rdcost_remaining;
2539
76
  av1_rd_stats_subtraction(x->rdmult, &best_rdcost, sum_rdc, &rdcost_remaining);
2540
76
  RD_STATS this_rdc;
2541
76
  pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, partition,
2542
76
                subsize, this_ctx, rdcost_remaining);
2543
2544
76
  if (this_rdc.rate == INT_MAX) {
2545
38
    sum_rdc->rdcost = INT64_MAX;
2546
38
  } else {
2547
38
    sum_rdc->rate += this_rdc.rate;
2548
38
    sum_rdc->dist += this_rdc.dist;
2549
38
    av1_rd_cost_update(x->rdmult, sum_rdc);
2550
38
  }
2551
2552
76
  if (sum_rdc->rdcost >= best_rdcost.rdcost) {
2553
38
    x->rdmult = orig_mult;
2554
38
    return 0;
2555
38
  }
2556
2557
38
  if (!is_last) {
2558
38
    av1_update_state(cpi, td, this_ctx, mi_row, mi_col, subsize, 1);
2559
38
    encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
2560
38
  }
2561
2562
38
  x->rdmult = orig_mult;
2563
38
  return 1;
2564
76
}
2565
2566
// Tests an AB partition, and updates the encoder status, the pick mode
2567
// contexts, the best rdcost, and the best partition.
2568
static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
2569
                               TileDataEnc *tile_data, TokenExtra **tp,
2570
                               PC_TREE *pc_tree, RD_STATS *best_rdc,
2571
                               int64_t *this_rdcost,
2572
                               PICK_MODE_CONTEXT *ctxs[SUB_PARTITIONS_AB],
2573
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
2574
                               PARTITION_TYPE partition,
2575
                               const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
2576
                               const int ab_mi_pos[SUB_PARTITIONS_AB][2],
2577
0
                               const MB_MODE_INFO **mode_cache) {
2578
0
  MACROBLOCK *const x = &td->mb;
2579
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2580
0
  const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2581
0
  RD_STATS sum_rdc;
2582
0
  av1_init_rd_stats(&sum_rdc);
2583
0
  sum_rdc.rate = x->mode_costs.partition_cost[pl][partition];
2584
0
  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2585
  // Loop over sub-partitions in AB partition type.
2586
0
  for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
2587
0
    if (mode_cache && mode_cache[i]) {
2588
0
      x->use_mb_mode_cache = 1;
2589
0
      x->mb_mode_cache = mode_cache[i];
2590
0
    }
2591
0
    const int mode_search_success =
2592
0
        rd_try_subblock(cpi, td, tile_data, tp, i == SUB_PARTITIONS_AB - 1,
2593
0
                        ab_mi_pos[i][0], ab_mi_pos[i][1], ab_subsize[i],
2594
0
                        *best_rdc, &sum_rdc, partition, ctxs[i]);
2595
0
    x->use_mb_mode_cache = 0;
2596
0
    x->mb_mode_cache = NULL;
2597
0
    if (!mode_search_success) {
2598
0
      return false;
2599
0
    }
2600
0
  }
2601
2602
0
  av1_rd_cost_update(x->rdmult, &sum_rdc);
2603
0
  *this_rdcost = sum_rdc.rdcost;
2604
0
  if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
2605
0
  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2606
0
  *this_rdcost = sum_rdc.rdcost;
2607
0
  if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
2608
2609
0
  *best_rdc = sum_rdc;
2610
0
  pc_tree->partitioning = partition;
2611
0
  return true;
2612
0
}
2613
2614
#if CONFIG_COLLECT_PARTITION_STATS
2615
static void init_partition_block_timing_stats(
2616
    PartitionTimingStats *part_timing_stats) {
2617
  av1_zero(*part_timing_stats);
2618
}
2619
2620
static INLINE void start_partition_block_timer(
2621
    PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type) {
2622
  assert(!part_timing_stats->timer_is_on);
2623
  part_timing_stats->partition_attempts[partition_type] += 1;
2624
  aom_usec_timer_start(&part_timing_stats->timer);
2625
  part_timing_stats->timer_is_on = 1;
2626
}
2627
2628
static INLINE void end_partition_block_timer(
2629
    PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type,
2630
    int64_t rdcost) {
2631
  if (part_timing_stats->timer_is_on) {
2632
    aom_usec_timer_mark(&part_timing_stats->timer);
2633
    const int64_t time = aom_usec_timer_elapsed(&part_timing_stats->timer);
2634
    part_timing_stats->partition_times[partition_type] += time;
2635
    part_timing_stats->partition_rdcost[partition_type] = rdcost;
2636
    part_timing_stats->timer_is_on = 0;
2637
  }
2638
}
2639
static INLINE void print_partition_timing_stats_with_rdcost(
2640
    const PartitionTimingStats *part_timing_stats, int mi_row, int mi_col,
2641
    BLOCK_SIZE bsize, FRAME_UPDATE_TYPE frame_update_type, int frame_number,
2642
    const RD_STATS *best_rdc, const char *filename) {
2643
  FILE *f = fopen(filename, "a");
2644
  fprintf(f, "%d,%d,%d,%d,%d,%d,%" PRId64 ",%" PRId64 ",", bsize, frame_number,
2645
          frame_update_type, mi_row, mi_col, best_rdc->rate, best_rdc->dist,
2646
          best_rdc->rdcost);
2647
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2648
    fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]);
2649
  }
2650
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2651
    fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]);
2652
  }
2653
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2654
    fprintf(f, "%" PRId64 ",", part_timing_stats->partition_times[idx]);
2655
  }
2656
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2657
    if (part_timing_stats->partition_rdcost[idx] == INT64_MAX) {
2658
      fprintf(f, "%d,", -1);
2659
    } else {
2660
      fprintf(f, "%" PRId64 ",", part_timing_stats->partition_rdcost[idx]);
2661
    }
2662
  }
2663
  fprintf(f, "\n");
2664
  fclose(f);
2665
}
2666
2667
static INLINE void print_partition_timing_stats(
2668
    const PartitionTimingStats *part_timing_stats, int intra_only,
2669
    int show_frame, const BLOCK_SIZE bsize, const char *filename) {
2670
  FILE *f = fopen(filename, "a");
2671
  fprintf(f, "%d,%d,%d,", bsize, show_frame, intra_only);
2672
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2673
    fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]);
2674
  }
2675
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2676
    fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]);
2677
  }
2678
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2679
    fprintf(f, "%" PRId64 ",", part_timing_stats->partition_times[idx]);
2680
  }
2681
  fprintf(f, "\n");
2682
  fclose(f);
2683
}
2684
2685
static INLINE void accumulate_partition_timing_stats(
2686
    FramePartitionTimingStats *fr_part_timing_stats,
2687
    const PartitionTimingStats *part_timing_stats, BLOCK_SIZE bsize) {
2688
  const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
2689
  int *agg_attempts = fr_part_timing_stats->partition_attempts[bsize_idx];
2690
  int *agg_decisions = fr_part_timing_stats->partition_decisions[bsize_idx];
2691
  int64_t *agg_times = fr_part_timing_stats->partition_times[bsize_idx];
2692
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
2693
    agg_attempts[idx] += part_timing_stats->partition_attempts[idx];
2694
    agg_decisions[idx] += part_timing_stats->partition_decisions[idx];
2695
    agg_times[idx] += part_timing_stats->partition_times[idx];
2696
  }
2697
}
2698
#endif  // CONFIG_COLLECT_PARTITION_STATS
2699
2700
// Initialize state variables of partition search used in
2701
// av1_rd_pick_partition().
2702
static void init_partition_search_state_params(
2703
    MACROBLOCK *x, AV1_COMP *const cpi, PartitionSearchState *part_search_state,
2704
88.0k
    int mi_row, int mi_col, BLOCK_SIZE bsize) {
2705
88.0k
  MACROBLOCKD *const xd = &x->e_mbd;
2706
88.0k
  const AV1_COMMON *const cm = &cpi->common;
2707
88.0k
  PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
2708
88.0k
  const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
2709
2710
  // Initialization of block size related parameters.
2711
88.0k
  blk_params->mi_step = mi_size_wide[bsize] / 2;
2712
88.0k
  blk_params->mi_row = mi_row;
2713
88.0k
  blk_params->mi_col = mi_col;
2714
88.0k
  blk_params->mi_row_edge = mi_row + blk_params->mi_step;
2715
88.0k
  blk_params->mi_col_edge = mi_col + blk_params->mi_step;
2716
88.0k
  blk_params->width = block_size_wide[bsize];
2717
88.0k
  blk_params->min_partition_size_1d =
2718
88.0k
      block_size_wide[x->sb_enc.min_partition_size];
2719
88.0k
  blk_params->subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2720
88.0k
  blk_params->split_bsize2 = blk_params->subsize;
2721
88.0k
  blk_params->bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
2722
88.0k
  blk_params->bsize = bsize;
2723
2724
  // Check if the partition corresponds to edge block.
2725
88.0k
  blk_params->has_rows = (blk_params->mi_row_edge < mi_params->mi_rows);
2726
88.0k
  blk_params->has_cols = (blk_params->mi_col_edge < mi_params->mi_cols);
2727
2728
  // Update intra partitioning related info.
2729
88.0k
  part_search_state->intra_part_info = &x->part_search_info;
2730
  // Prepare for segmentation CNN-based partitioning for intra-frame.
2731
88.0k
  if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
2732
11.7k
    part_search_state->intra_part_info->quad_tree_idx = 0;
2733
11.7k
    part_search_state->intra_part_info->cnn_output_valid = 0;
2734
11.7k
  }
2735
2736
  // Set partition plane context index.
2737
88.0k
  part_search_state->pl_ctx_idx =
2738
88.0k
      blk_params->bsize_at_least_8x8
2739
88.0k
          ? partition_plane_context(xd, mi_row, mi_col, bsize)
2740
88.0k
          : 0;
2741
2742
  // Partition cost buffer update
2743
88.0k
  ModeCosts *mode_costs = &x->mode_costs;
2744
88.0k
  part_search_state->partition_cost =
2745
88.0k
      mode_costs->partition_cost[part_search_state->pl_ctx_idx];
2746
2747
  // Initialize HORZ and VERT win flags as true for all split partitions.
2748
440k
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2749
352k
    part_search_state->split_part_rect_win[i].rect_part_win[HORZ] = true;
2750
352k
    part_search_state->split_part_rect_win[i].rect_part_win[VERT] = true;
2751
352k
  }
2752
2753
  // Initialize the rd cost.
2754
88.0k
  av1_init_rd_stats(&part_search_state->this_rdc);
2755
2756
  // Initialize RD costs for partition types to 0.
2757
88.0k
  part_search_state->none_rd = 0;
2758
88.0k
  av1_zero(part_search_state->split_rd);
2759
88.0k
  av1_zero(part_search_state->rect_part_rd);
2760
2761
  // Initialize SPLIT partition to be not ready.
2762
88.0k
  av1_zero(part_search_state->is_split_ctx_is_ready);
2763
  // Initialize HORZ and VERT partitions to be not ready.
2764
88.0k
  av1_zero(part_search_state->is_rect_ctx_is_ready);
2765
2766
  // Chroma subsampling.
2767
88.0k
  part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x;
2768
88.0k
  part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y;
2769
2770
  // Initialize partition search flags to defaults.
2771
88.0k
  part_search_state->terminate_partition_search = 0;
2772
88.0k
  part_search_state->do_square_split = blk_params->bsize_at_least_8x8;
2773
88.0k
  part_search_state->do_rectangular_split =
2774
88.0k
      cpi->oxcf.part_cfg.enable_rect_partitions &&
2775
88.0k
      blk_params->bsize_at_least_8x8;
2776
88.0k
  av1_zero(part_search_state->prune_rect_part);
2777
2778
  // Initialize allowed partition types for the partition block.
2779
88.0k
  part_search_state->partition_none_allowed =
2780
88.0k
      av1_blk_has_rows_and_cols(blk_params);
2781
88.0k
  part_search_state->partition_rect_allowed[HORZ] =
2782
88.0k
      part_search_state->do_rectangular_split && blk_params->has_cols &&
2783
88.0k
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ),
2784
57.5k
                           part_search_state->ss_x,
2785
57.5k
                           part_search_state->ss_y) != BLOCK_INVALID;
2786
88.0k
  part_search_state->partition_rect_allowed[VERT] =
2787
88.0k
      part_search_state->do_rectangular_split && blk_params->has_rows &&
2788
88.0k
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT),
2789
57.4k
                           part_search_state->ss_x,
2790
57.4k
                           part_search_state->ss_y) != BLOCK_INVALID;
2791
2792
  // Reset the flag indicating whether a partition leading to a rdcost lower
2793
  // than the bound best_rdc has been found.
2794
88.0k
  part_search_state->found_best_partition = false;
2795
2796
#if CONFIG_COLLECT_PARTITION_STATS
2797
  init_partition_block_timing_stats(&part_search_state->part_timing_stats);
2798
#endif  // CONFIG_COLLECT_PARTITION_STATS
2799
88.0k
}
2800
2801
// Override partition cost buffer for the edge blocks.
2802
static void set_partition_cost_for_edge_blk(
2803
19.4k
    AV1_COMMON const *cm, PartitionSearchState *part_search_state) {
2804
19.4k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
2805
19.4k
  assert(blk_params.bsize_at_least_8x8 && part_search_state->pl_ctx_idx >= 0);
2806
19.4k
  const aom_cdf_prob *partition_cdf =
2807
19.4k
      cm->fc->partition_cdf[part_search_state->pl_ctx_idx];
2808
19.4k
  const int max_cost = av1_cost_symbol(0);
2809
97.0k
  for (PARTITION_TYPE i = 0; i < PARTITION_TYPES; ++i)
2810
77.6k
    part_search_state->tmp_partition_cost[i] = max_cost;
2811
19.4k
  if (blk_params.has_cols) {
2812
    // At the bottom, the two possibilities are HORZ and SPLIT.
2813
9.00k
    aom_cdf_prob bot_cdf[2];
2814
9.00k
    partition_gather_vert_alike(bot_cdf, partition_cdf, blk_params.bsize);
2815
9.00k
    static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
2816
9.00k
    av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, bot_cdf,
2817
9.00k
                             bot_inv_map);
2818
10.4k
  } else if (blk_params.has_rows) {
2819
    // At the right, the two possibilities are VERT and SPLIT.
2820
8.92k
    aom_cdf_prob rhs_cdf[2];
2821
8.92k
    partition_gather_horz_alike(rhs_cdf, partition_cdf, blk_params.bsize);
2822
8.92k
    static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
2823
8.92k
    av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, rhs_cdf,
2824
8.92k
                             rhs_inv_map);
2825
8.92k
  } else {
2826
    // At the bottom right, we always split.
2827
1.48k
    part_search_state->tmp_partition_cost[PARTITION_SPLIT] = 0;
2828
1.48k
  }
2829
  // Override the partition cost buffer.
2830
19.4k
  part_search_state->partition_cost = part_search_state->tmp_partition_cost;
2831
19.4k
}
2832
2833
// Reset the partition search state flags when
2834
// must_find_valid_partition is equal to 1.
2835
static AOM_INLINE void reset_part_limitations(
2836
0
    AV1_COMP *const cpi, PartitionSearchState *part_search_state) {
2837
0
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
2838
0
  const int is_rect_part_allowed =
2839
0
      blk_params.bsize_at_least_8x8 &&
2840
0
      cpi->oxcf.part_cfg.enable_rect_partitions &&
2841
0
      (blk_params.width > blk_params.min_partition_size_1d);
2842
0
  part_search_state->do_square_split =
2843
0
      blk_params.bsize_at_least_8x8 &&
2844
0
      (blk_params.width > blk_params.min_partition_size_1d);
2845
0
  part_search_state->partition_none_allowed =
2846
0
      av1_blk_has_rows_and_cols(&blk_params) &&
2847
0
      (blk_params.width >= blk_params.min_partition_size_1d);
2848
0
  part_search_state->partition_rect_allowed[HORZ] =
2849
0
      blk_params.has_cols && is_rect_part_allowed &&
2850
0
      get_plane_block_size(
2851
0
          get_partition_subsize(blk_params.bsize, PARTITION_HORZ),
2852
0
          part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID;
2853
0
  part_search_state->partition_rect_allowed[VERT] =
2854
0
      blk_params.has_rows && is_rect_part_allowed &&
2855
0
      get_plane_block_size(
2856
0
          get_partition_subsize(blk_params.bsize, PARTITION_VERT),
2857
0
          part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID;
2858
0
  part_search_state->terminate_partition_search = 0;
2859
0
}
2860
2861
// Rectangular partitions evaluation at sub-block level.
2862
static void rd_pick_rect_partition(AV1_COMP *const cpi, TileDataEnc *tile_data,
2863
                                   MACROBLOCK *x,
2864
                                   PICK_MODE_CONTEXT *cur_partition_ctx,
2865
                                   PartitionSearchState *part_search_state,
2866
                                   RD_STATS *best_rdc, const int idx,
2867
                                   int mi_row, int mi_col, BLOCK_SIZE bsize,
2868
53.9k
                                   PARTITION_TYPE partition_type) {
2869
  // Obtain the remainder from the best rd cost
2870
  // for further processing of partition.
2871
53.9k
  RD_STATS best_remain_rdcost;
2872
53.9k
  av1_rd_stats_subtraction(x->rdmult, best_rdc, &part_search_state->sum_rdc,
2873
53.9k
                           &best_remain_rdcost);
2874
2875
  // Obtain the best mode for the partition sub-block.
2876
53.9k
  pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &part_search_state->this_rdc,
2877
53.9k
                partition_type, bsize, cur_partition_ctx, best_remain_rdcost);
2878
53.9k
  av1_rd_cost_update(x->rdmult, &part_search_state->this_rdc);
2879
2880
  // Update the partition rd cost with the current sub-block rd.
2881
53.9k
  if (part_search_state->this_rdc.rate == INT_MAX) {
2882
24.4k
    part_search_state->sum_rdc.rdcost = INT64_MAX;
2883
29.4k
  } else {
2884
29.4k
    part_search_state->sum_rdc.rate += part_search_state->this_rdc.rate;
2885
29.4k
    part_search_state->sum_rdc.dist += part_search_state->this_rdc.dist;
2886
29.4k
    av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc);
2887
29.4k
  }
2888
53.9k
  const RECT_PART_TYPE rect_part =
2889
53.9k
      partition_type == PARTITION_HORZ ? HORZ : VERT;
2890
53.9k
  part_search_state->rect_part_rd[rect_part][idx] =
2891
53.9k
      part_search_state->this_rdc.rdcost;
2892
53.9k
}
2893
2894
typedef int (*active_edge_info)(const AV1_COMP *cpi, int mi_col, int mi_step);
2895
2896
// Checks if HORZ / VERT partition search is allowed.
2897
static AOM_INLINE int is_rect_part_allowed(
2898
    const AV1_COMP *cpi, const PartitionSearchState *part_search_state,
2899
    const active_edge_info *active_edge, RECT_PART_TYPE rect_part,
2900
176k
    const int mi_pos) {
2901
176k
  const PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
2902
176k
  const int is_part_allowed =
2903
176k
      (!part_search_state->terminate_partition_search &&
2904
176k
       part_search_state->partition_rect_allowed[rect_part] &&
2905
176k
       !part_search_state->prune_rect_part[rect_part] &&
2906
176k
       (part_search_state->do_rectangular_split ||
2907
90.7k
        active_edge[rect_part](cpi, mi_pos, blk_params->mi_step)));
2908
176k
  return is_part_allowed;
2909
176k
}
2910
2911
static void rectangular_partition_search(
2912
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
2913
    TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree,
2914
    RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
2915
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
2916
    RD_RECT_PART_WIN_INFO *rect_part_win_info, const RECT_PART_TYPE start_type,
2917
88.0k
    const RECT_PART_TYPE end_type) {
2918
88.0k
  const AV1_COMMON *const cm = &cpi->common;
2919
88.0k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
2920
88.0k
  RD_STATS *sum_rdc = &part_search_state->sum_rdc;
2921
88.0k
  const int rect_partition_type[NUM_RECT_PARTS] = { PARTITION_HORZ,
2922
88.0k
                                                    PARTITION_VERT };
2923
2924
  // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][0]: mi_row postion of
2925
  //                                           HORZ and VERT partition types.
2926
  // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][1]: mi_col postion of
2927
  //                                           HORZ and VERT partition types.
2928
88.0k
  const int mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][2] = {
2929
88.0k
    { { blk_params.mi_row, blk_params.mi_col },
2930
88.0k
      { blk_params.mi_row_edge, blk_params.mi_col } },
2931
88.0k
    { { blk_params.mi_row, blk_params.mi_col },
2932
88.0k
      { blk_params.mi_row, blk_params.mi_col_edge } }
2933
88.0k
  };
2934
2935
  // Initialize active edge_type function pointer
2936
  // for HOZR and VERT partition types.
2937
88.0k
  active_edge_info active_edge_type[NUM_RECT_PARTS] = { av1_active_h_edge,
2938
88.0k
                                                        av1_active_v_edge };
2939
2940
  // Indicates edge blocks for HORZ and VERT partition types.
2941
88.0k
  const int is_not_edge_block[NUM_RECT_PARTS] = { blk_params.has_rows,
2942
88.0k
                                                  blk_params.has_cols };
2943
2944
  // Initialize pc tree context for HORZ and VERT partition types.
2945
88.0k
  PICK_MODE_CONTEXT **cur_ctx[NUM_RECT_PARTS][SUB_PARTITIONS_RECT] = {
2946
88.0k
    { &pc_tree->horizontal[0], &pc_tree->horizontal[1] },
2947
88.0k
    { &pc_tree->vertical[0], &pc_tree->vertical[1] }
2948
88.0k
  };
2949
2950
  // Loop over rectangular partition types.
2951
264k
  for (RECT_PART_TYPE i = start_type; i <= end_type; i++) {
2952
176k
    assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
2953
176k
                   !part_search_state->partition_rect_allowed[i]));
2954
2955
    // Check if the HORZ / VERT partition search is to be performed.
2956
176k
    if (!is_rect_part_allowed(cpi, part_search_state, active_edge_type, i,
2957
176k
                              mi_pos_rect[i][0][i]))
2958
128k
      continue;
2959
2960
    // Sub-partition idx.
2961
47.6k
    int sub_part_idx = 0;
2962
47.6k
    PARTITION_TYPE partition_type = rect_partition_type[i];
2963
47.6k
    blk_params.subsize =
2964
47.6k
        get_partition_subsize(blk_params.bsize, partition_type);
2965
47.6k
    assert(blk_params.subsize <= BLOCK_LARGEST);
2966
47.6k
    av1_init_rd_stats(sum_rdc);
2967
143k
    for (int j = 0; j < SUB_PARTITIONS_RECT; j++) {
2968
95.3k
      if (cur_ctx[i][j][0] == NULL) {
2969
95.3k
        cur_ctx[i][j][0] =
2970
95.3k
            av1_alloc_pmc(cpi, blk_params.subsize, &td->shared_coeff_buf);
2971
95.3k
      }
2972
95.3k
    }
2973
47.6k
    sum_rdc->rate = part_search_state->partition_cost[partition_type];
2974
47.6k
    sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, 0);
2975
#if CONFIG_COLLECT_PARTITION_STATS
2976
    PartitionTimingStats *part_timing_stats =
2977
        &part_search_state->part_timing_stats;
2978
    if (best_rdc->rdcost - sum_rdc->rdcost >= 0) {
2979
      start_partition_block_timer(part_timing_stats, partition_type);
2980
    }
2981
#endif
2982
2983
    // First sub-partition evaluation in HORZ / VERT partition type.
2984
47.6k
    rd_pick_rect_partition(
2985
47.6k
        cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state,
2986
47.6k
        best_rdc, 0, mi_pos_rect[i][sub_part_idx][0],
2987
47.6k
        mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
2988
2989
    // Start of second sub-partition evaluation.
2990
    // Evaluate second sub-partition if the first sub-partition cost
2991
    // is less than the best cost and if it is not an edge block.
2992
47.6k
    if (sum_rdc->rdcost < best_rdc->rdcost && is_not_edge_block[i]) {
2993
6.24k
      const MB_MODE_INFO *const mbmi = &cur_ctx[i][sub_part_idx][0]->mic;
2994
6.24k
      const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
2995
      // Neither palette mode nor cfl predicted.
2996
6.24k
      if (pmi->palette_size[PLANE_TYPE_Y] == 0 &&
2997
6.24k
          pmi->palette_size[PLANE_TYPE_UV] == 0) {
2998
6.24k
        if (mbmi->uv_mode != UV_CFL_PRED)
2999
6.24k
          part_search_state->is_rect_ctx_is_ready[i] = 1;
3000
6.24k
      }
3001
6.24k
      av1_update_state(cpi, td, cur_ctx[i][sub_part_idx][0], blk_params.mi_row,
3002
6.24k
                       blk_params.mi_col, blk_params.subsize, DRY_RUN_NORMAL);
3003
6.24k
      encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL,
3004
6.24k
                        blk_params.subsize, NULL);
3005
3006
      // Second sub-partition evaluation in HORZ / VERT partition type.
3007
6.24k
      sub_part_idx = 1;
3008
6.24k
      rd_pick_rect_partition(
3009
6.24k
          cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state,
3010
6.24k
          best_rdc, 1, mi_pos_rect[i][sub_part_idx][0],
3011
6.24k
          mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
3012
6.24k
    }
3013
    // Update HORZ / VERT best partition.
3014
47.6k
    if (sum_rdc->rdcost < best_rdc->rdcost) {
3015
17.8k
      sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, sum_rdc->dist);
3016
17.8k
      if (sum_rdc->rdcost < best_rdc->rdcost) {
3017
17.8k
        *best_rdc = *sum_rdc;
3018
17.8k
        part_search_state->found_best_partition = true;
3019
17.8k
        pc_tree->partitioning = partition_type;
3020
17.8k
      }
3021
29.8k
    } else {
3022
      // Update HORZ / VERT win flag.
3023
29.8k
      if (rect_part_win_info != NULL)
3024
29.8k
        rect_part_win_info->rect_part_win[i] = false;
3025
29.8k
    }
3026
#if CONFIG_COLLECT_PARTITION_STATS
3027
    if (part_timing_stats->timer_is_on) {
3028
      end_partition_block_timer(part_timing_stats, partition_type,
3029
                                sum_rdc->rdcost);
3030
    }
3031
#endif
3032
47.6k
    av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
3033
47.6k
                        blk_params.bsize, av1_num_planes(cm));
3034
47.6k
  }
3035
88.0k
}
3036
3037
// AB partition type evaluation.
3038
static void rd_pick_ab_part(
3039
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3040
    TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3041
    PC_TREE *pc_tree, PICK_MODE_CONTEXT *dst_ctxs[SUB_PARTITIONS_AB],
3042
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3043
    const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
3044
    const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type,
3045
0
    const MB_MODE_INFO **mode_cache) {
3046
0
  const AV1_COMMON *const cm = &cpi->common;
3047
0
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3048
0
  const int mi_row = blk_params.mi_row;
3049
0
  const int mi_col = blk_params.mi_col;
3050
0
  const int bsize = blk_params.bsize;
3051
0
  int64_t this_rdcost = 0;
3052
3053
#if CONFIG_COLLECT_PARTITION_STATS
3054
  PartitionTimingStats *part_timing_stats =
3055
      &part_search_state->part_timing_stats;
3056
  {
3057
    RD_STATS tmp_sum_rdc;
3058
    av1_init_rd_stats(&tmp_sum_rdc);
3059
    tmp_sum_rdc.rate = part_search_state->partition_cost[part_type];
3060
    tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3061
    if (best_rdc->rdcost - tmp_sum_rdc.rdcost >= 0) {
3062
      start_partition_block_timer(part_timing_stats, part_type);
3063
    }
3064
  }
3065
#endif
3066
3067
  // Test this partition and update the best partition.
3068
0
  const bool find_best_ab_part = rd_test_partition3(
3069
0
      cpi, td, tile_data, tp, pc_tree, best_rdc, &this_rdcost, dst_ctxs, mi_row,
3070
0
      mi_col, bsize, part_type, ab_subsize, ab_mi_pos, mode_cache);
3071
0
  part_search_state->found_best_partition |= find_best_ab_part;
3072
3073
#if CONFIG_COLLECT_PARTITION_STATS
3074
  if (part_timing_stats->timer_is_on) {
3075
    if (!find_best_ab_part) this_rdcost = INT64_MAX;
3076
    end_partition_block_timer(part_timing_stats, part_type, this_rdcost);
3077
  }
3078
#endif
3079
0
  av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
3080
0
}
3081
3082
// Set mode search context.
3083
static AOM_INLINE void set_mode_search_ctx(
3084
    PC_TREE *pc_tree, const int is_ctx_ready[NUM_AB_PARTS][2],
3085
66.6k
    PICK_MODE_CONTEXT **mode_srch_ctx[NUM_AB_PARTS][2]) {
3086
66.6k
  mode_srch_ctx[HORZ_B][0] = &pc_tree->horizontal[0];
3087
66.6k
  mode_srch_ctx[VERT_B][0] = &pc_tree->vertical[0];
3088
3089
66.6k
  if (is_ctx_ready[HORZ_A][0])
3090
15.7k
    mode_srch_ctx[HORZ_A][0] = &pc_tree->split[0]->none;
3091
3092
66.6k
  if (is_ctx_ready[VERT_A][0])
3093
15.7k
    mode_srch_ctx[VERT_A][0] = &pc_tree->split[0]->none;
3094
3095
66.6k
  if (is_ctx_ready[HORZ_A][1])
3096
7.03k
    mode_srch_ctx[HORZ_A][1] = &pc_tree->split[1]->none;
3097
66.6k
}
3098
3099
static AOM_INLINE void copy_partition_mode_from_mode_context(
3100
0
    const MB_MODE_INFO **dst_mode, const PICK_MODE_CONTEXT *ctx) {
3101
0
  if (ctx && ctx->rd_stats.rate < INT_MAX) {
3102
0
    *dst_mode = &ctx->mic;
3103
0
  } else {
3104
0
    *dst_mode = NULL;
3105
0
  }
3106
0
}
3107
3108
static AOM_INLINE void copy_partition_mode_from_pc_tree(
3109
0
    const MB_MODE_INFO **dst_mode, const PC_TREE *pc_tree) {
3110
0
  if (pc_tree) {
3111
0
    copy_partition_mode_from_mode_context(dst_mode, pc_tree->none);
3112
0
  } else {
3113
0
    *dst_mode = NULL;
3114
0
  }
3115
0
}
3116
3117
static AOM_INLINE void set_mode_cache_for_partition_ab(
3118
    const MB_MODE_INFO **mode_cache, const PC_TREE *pc_tree,
3119
0
    AB_PART_TYPE ab_part_type) {
3120
0
  switch (ab_part_type) {
3121
0
    case HORZ_A:
3122
0
      copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]);
3123
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]);
3124
0
      copy_partition_mode_from_mode_context(&mode_cache[2],
3125
0
                                            pc_tree->horizontal[1]);
3126
0
      break;
3127
0
    case HORZ_B:
3128
0
      copy_partition_mode_from_mode_context(&mode_cache[0],
3129
0
                                            pc_tree->horizontal[0]);
3130
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]);
3131
0
      copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]);
3132
0
      break;
3133
0
    case VERT_A:
3134
0
      copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]);
3135
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]);
3136
0
      copy_partition_mode_from_mode_context(&mode_cache[2],
3137
0
                                            pc_tree->vertical[1]);
3138
0
      break;
3139
0
    case VERT_B:
3140
0
      copy_partition_mode_from_mode_context(&mode_cache[0],
3141
0
                                            pc_tree->vertical[0]);
3142
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]);
3143
0
      copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]);
3144
0
      break;
3145
0
    default: assert(0 && "Invalid ab partition type!\n");
3146
0
  }
3147
0
}
3148
3149
// AB Partitions type search.
3150
static void ab_partitions_search(
3151
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3152
    TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3153
    PC_TREE *pc_tree, PartitionSearchState *part_search_state,
3154
    RD_STATS *best_rdc, RD_RECT_PART_WIN_INFO *rect_part_win_info,
3155
    int pb_source_variance, int ext_partition_allowed,
3156
88.0k
    const AB_PART_TYPE start_type, const AB_PART_TYPE end_type) {
3157
88.0k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3158
88.0k
  const int mi_row = blk_params.mi_row;
3159
88.0k
  const int mi_col = blk_params.mi_col;
3160
88.0k
  const int bsize = blk_params.bsize;
3161
3162
88.0k
  if (part_search_state->terminate_partition_search) {
3163
21.4k
    return;
3164
21.4k
  }
3165
3166
66.6k
  int ab_partitions_allowed[NUM_AB_PARTS];
3167
  // Prune AB partitions
3168
66.6k
  av1_prune_ab_partitions(cpi, x, pc_tree, pb_source_variance, best_rdc->rdcost,
3169
66.6k
                          rect_part_win_info, ext_partition_allowed,
3170
66.6k
                          part_search_state, ab_partitions_allowed);
3171
3172
  // Flags to indicate whether the mode search is done.
3173
66.6k
  const int is_ctx_ready[NUM_AB_PARTS][2] = {
3174
66.6k
    { part_search_state->is_split_ctx_is_ready[0],
3175
66.6k
      part_search_state->is_split_ctx_is_ready[1] },
3176
66.6k
    { part_search_state->is_rect_ctx_is_ready[HORZ], 0 },
3177
66.6k
    { part_search_state->is_split_ctx_is_ready[0], 0 },
3178
66.6k
    { part_search_state->is_rect_ctx_is_ready[VERT], 0 }
3179
66.6k
  };
3180
3181
  // Current partition context.
3182
66.6k
  PICK_MODE_CONTEXT **cur_part_ctxs[NUM_AB_PARTS] = { pc_tree->horizontala,
3183
66.6k
                                                      pc_tree->horizontalb,
3184
66.6k
                                                      pc_tree->verticala,
3185
66.6k
                                                      pc_tree->verticalb };
3186
3187
  // Context of already evaluted partition types.
3188
66.6k
  PICK_MODE_CONTEXT **mode_srch_ctx[NUM_AB_PARTS][2];
3189
  // Set context of already evaluted partition types.
3190
66.6k
  set_mode_search_ctx(pc_tree, is_ctx_ready, mode_srch_ctx);
3191
3192
  // Array of sub-partition size of AB partition types.
3193
66.6k
  const BLOCK_SIZE ab_subsize[NUM_AB_PARTS][SUB_PARTITIONS_AB] = {
3194
66.6k
    { blk_params.split_bsize2, blk_params.split_bsize2,
3195
66.6k
      get_partition_subsize(bsize, PARTITION_HORZ_A) },
3196
66.6k
    { get_partition_subsize(bsize, PARTITION_HORZ_B), blk_params.split_bsize2,
3197
66.6k
      blk_params.split_bsize2 },
3198
66.6k
    { blk_params.split_bsize2, blk_params.split_bsize2,
3199
66.6k
      get_partition_subsize(bsize, PARTITION_VERT_A) },
3200
66.6k
    { get_partition_subsize(bsize, PARTITION_VERT_B), blk_params.split_bsize2,
3201
66.6k
      blk_params.split_bsize2 }
3202
66.6k
  };
3203
3204
  // Array of mi_row, mi_col positions corresponds to each sub-partition in AB
3205
  // partition types.
3206
66.6k
  const int ab_mi_pos[NUM_AB_PARTS][SUB_PARTITIONS_AB][2] = {
3207
66.6k
    { { mi_row, mi_col },
3208
66.6k
      { mi_row, blk_params.mi_col_edge },
3209
66.6k
      { blk_params.mi_row_edge, mi_col } },
3210
66.6k
    { { mi_row, mi_col },
3211
66.6k
      { blk_params.mi_row_edge, mi_col },
3212
66.6k
      { blk_params.mi_row_edge, blk_params.mi_col_edge } },
3213
66.6k
    { { mi_row, mi_col },
3214
66.6k
      { blk_params.mi_row_edge, mi_col },
3215
66.6k
      { mi_row, blk_params.mi_col_edge } },
3216
66.6k
    { { mi_row, mi_col },
3217
66.6k
      { mi_row, blk_params.mi_col_edge },
3218
66.6k
      { blk_params.mi_row_edge, blk_params.mi_col_edge } }
3219
66.6k
  };
3220
3221
  // Loop over AB partition types.
3222
332k
  for (AB_PART_TYPE ab_part_type = start_type; ab_part_type <= end_type;
3223
266k
       ab_part_type++) {
3224
266k
    const PARTITION_TYPE part_type = ab_part_type + PARTITION_HORZ_A;
3225
3226
    // Check if the AB partition search is to be performed.
3227
266k
    if (!ab_partitions_allowed[ab_part_type]) {
3228
266k
      continue;
3229
266k
    }
3230
3231
18.4E
    blk_params.subsize = get_partition_subsize(bsize, part_type);
3232
18.4E
    for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
3233
      // Set AB partition context.
3234
0
      cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc(
3235
0
          cpi, ab_subsize[ab_part_type][i], &td->shared_coeff_buf);
3236
      // Set mode as not ready.
3237
0
      cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
3238
0
    }
3239
3240
18.4E
    if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab) {
3241
      // We can copy directly the mode search results if we have already
3242
      // searched the current block and the contexts match.
3243
0
      if (is_ctx_ready[ab_part_type][0]) {
3244
0
        av1_copy_tree_context(cur_part_ctxs[ab_part_type][0],
3245
0
                              mode_srch_ctx[ab_part_type][0][0]);
3246
0
        cur_part_ctxs[ab_part_type][0]->mic.partition = part_type;
3247
0
        cur_part_ctxs[ab_part_type][0]->rd_mode_is_ready = 1;
3248
0
        if (is_ctx_ready[ab_part_type][1]) {
3249
0
          av1_copy_tree_context(cur_part_ctxs[ab_part_type][1],
3250
0
                                mode_srch_ctx[ab_part_type][1][0]);
3251
0
          cur_part_ctxs[ab_part_type][1]->mic.partition = part_type;
3252
0
          cur_part_ctxs[ab_part_type][1]->rd_mode_is_ready = 1;
3253
0
        }
3254
0
      }
3255
0
    }
3256
3257
    // Even if the contexts don't match, we can still speed up by reusing the
3258
    // previous prediction mode.
3259
18.4E
    const MB_MODE_INFO *mode_cache[3] = { NULL, NULL, NULL };
3260
18.4E
    if (cpi->sf.part_sf.reuse_best_prediction_for_part_ab) {
3261
0
      set_mode_cache_for_partition_ab(mode_cache, pc_tree, ab_part_type);
3262
0
    }
3263
3264
    // Evaluation of AB partition type.
3265
18.4E
    rd_pick_ab_part(cpi, td, tile_data, tp, x, x_ctx, pc_tree,
3266
18.4E
                    cur_part_ctxs[ab_part_type], part_search_state, best_rdc,
3267
18.4E
                    ab_subsize[ab_part_type], ab_mi_pos[ab_part_type],
3268
18.4E
                    part_type, mode_cache);
3269
18.4E
  }
3270
66.6k
}
3271
3272
// Set mi positions for HORZ4 / VERT4 sub-block partitions.
3273
static void set_mi_pos_partition4(const int inc_step[NUM_PART4_TYPES],
3274
                                  int mi_pos[SUB_PARTITIONS_PART4][2],
3275
38
                                  const int mi_row, const int mi_col) {
3276
190
  for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; i++) {
3277
152
    mi_pos[i][0] = mi_row + i * inc_step[HORZ4];
3278
152
    mi_pos[i][1] = mi_col + i * inc_step[VERT4];
3279
152
  }
3280
38
}
3281
3282
// Set context and RD cost for HORZ4 / VERT4 partition types.
3283
static void set_4_part_ctx_and_rdcost(
3284
    MACROBLOCK *x, const AV1_COMP *const cpi, ThreadData *td,
3285
    PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4],
3286
    PartitionSearchState *part_search_state, PARTITION_TYPE partition_type,
3287
38
    BLOCK_SIZE bsize) {
3288
  // Initialize sum_rdc RD cost structure.
3289
38
  av1_init_rd_stats(&part_search_state->sum_rdc);
3290
38
  const int subsize = get_partition_subsize(bsize, partition_type);
3291
38
  part_search_state->sum_rdc.rate =
3292
38
      part_search_state->partition_cost[partition_type];
3293
38
  part_search_state->sum_rdc.rdcost =
3294
38
      RDCOST(x->rdmult, part_search_state->sum_rdc.rate, 0);
3295
190
  for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i)
3296
152
    cur_part_ctx[i] = av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
3297
38
}
3298
3299
// Partition search of HORZ4 / VERT4 partition types.
3300
static void rd_pick_4partition(
3301
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3302
    TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3303
    PC_TREE *pc_tree, PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4],
3304
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3305
38
    const int inc_step[NUM_PART4_TYPES], PARTITION_TYPE partition_type) {
3306
38
  const AV1_COMMON *const cm = &cpi->common;
3307
38
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3308
  // mi positions needed for HORZ4 and VERT4 partition types.
3309
38
  int mi_pos_check[NUM_PART4_TYPES] = { cm->mi_params.mi_rows,
3310
38
                                        cm->mi_params.mi_cols };
3311
38
  const PART4_TYPES part4_idx = (partition_type != PARTITION_HORZ_4);
3312
38
  int mi_pos[SUB_PARTITIONS_PART4][2];
3313
3314
38
  blk_params.subsize = get_partition_subsize(blk_params.bsize, partition_type);
3315
  // Set partition context and RD cost.
3316
38
  set_4_part_ctx_and_rdcost(x, cpi, td, cur_part_ctx, part_search_state,
3317
38
                            partition_type, blk_params.bsize);
3318
  // Set mi positions for sub-block sizes.
3319
38
  set_mi_pos_partition4(inc_step, mi_pos, blk_params.mi_row, blk_params.mi_col);
3320
#if CONFIG_COLLECT_PARTITION_STATS
3321
  PartitionTimingStats *part_timing_stats =
3322
      &part_search_state->part_timing_stats;
3323
  if (best_rdc->rdcost - part_search_state->sum_rdc.rdcost >= 0) {
3324
    start_partition_block_timer(part_timing_stats, partition_type);
3325
  }
3326
#endif
3327
  // Loop over sub-block partitions.
3328
76
  for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) {
3329
76
    if (i > 0 && mi_pos[i][part4_idx] >= mi_pos_check[part4_idx]) break;
3330
3331
    // Sub-block evaluation of Horz4 / Vert4 partition type.
3332
76
    cur_part_ctx[i]->rd_mode_is_ready = 0;
3333
76
    if (!rd_try_subblock(
3334
76
            cpi, td, tile_data, tp, (i == SUB_PARTITIONS_PART4 - 1),
3335
76
            mi_pos[i][0], mi_pos[i][1], blk_params.subsize, *best_rdc,
3336
76
            &part_search_state->sum_rdc, partition_type, cur_part_ctx[i])) {
3337
38
      av1_invalid_rd_stats(&part_search_state->sum_rdc);
3338
38
      break;
3339
38
    }
3340
76
  }
3341
3342
  // Calculate the total cost and update the best partition.
3343
38
  av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc);
3344
38
  if (part_search_state->sum_rdc.rdcost < best_rdc->rdcost) {
3345
0
    *best_rdc = part_search_state->sum_rdc;
3346
0
    part_search_state->found_best_partition = true;
3347
0
    pc_tree->partitioning = partition_type;
3348
0
  }
3349
#if CONFIG_COLLECT_PARTITION_STATS
3350
  if (part_timing_stats->timer_is_on) {
3351
    end_partition_block_timer(part_timing_stats, partition_type,
3352
                              part_search_state->sum_rdc.rdcost);
3353
  }
3354
#endif
3355
38
  av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
3356
38
                      blk_params.bsize, av1_num_planes(cm));
3357
38
}
3358
3359
// Prune 4-way partitions based on the number of horz/vert wins
3360
// in the current block and sub-blocks in PARTITION_SPLIT.
3361
static void prune_4_partition_using_split_info(
3362
    AV1_COMP *const cpi, MACROBLOCK *x, PartitionSearchState *part_search_state,
3363
45.6k
    int part4_search_allowed[NUM_PART4_TYPES]) {
3364
45.6k
  PART4_TYPES cur_part[NUM_PART4_TYPES] = { HORZ4, VERT4 };
3365
  // Count of child blocks in which HORZ or VERT partition has won
3366
45.6k
  int num_child_rect_win[NUM_RECT_PARTS] = { 0, 0 };
3367
  // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of
3368
  // split partiitons.
3369
  // Conservative pruning for high quantizers.
3370
45.6k
  const int num_win_thresh = AOMMIN(3 * (MAXQ - x->qindex) / MAXQ + 1, 3);
3371
3372
137k
  for (RECT_PART_TYPE i = HORZ; i < NUM_RECT_PARTS; i++) {
3373
91.3k
    if (!(cpi->sf.part_sf.prune_ext_part_using_split_info &&
3374
91.3k
          part4_search_allowed[cur_part[i]]))
3375
91.3k
      continue;
3376
    // Loop over split partitions.
3377
    // Get rectangular partitions winner info of split partitions.
3378
192
    for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; idx++)
3379
152
      num_child_rect_win[i] +=
3380
152
          (part_search_state->split_part_rect_win[idx].rect_part_win[i]) ? 1
3381
152
                                                                         : 0;
3382
40
    if (num_child_rect_win[i] < num_win_thresh) {
3383
0
      part4_search_allowed[cur_part[i]] = 0;
3384
0
    }
3385
40
  }
3386
45.6k
}
3387
3388
// Prune 4-way partition search.
3389
static void prune_4_way_partition_search(
3390
    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree,
3391
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3392
    int pb_source_variance, int ext_partition_allowed,
3393
88.0k
    int part4_search_allowed[NUM_PART4_TYPES]) {
3394
88.0k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3395
3396
  // Disable 4-way partition search flags for width less than a multiple of the
3397
  // minimum partition width.
3398
88.0k
  if (blk_params.width < (blk_params.min_partition_size_1d
3399
88.0k
                          << cpi->sf.part_sf.prune_part4_search)) {
3400
42.3k
    part4_search_allowed[HORZ4] = 0;
3401
42.3k
    part4_search_allowed[VERT4] = 0;
3402
42.3k
    return;
3403
42.3k
  }
3404
3405
45.6k
  const int bsize = blk_params.bsize;
3406
45.6k
  PARTITION_TYPE cur_part[NUM_PART4_TYPES] = { PARTITION_HORZ_4,
3407
45.6k
                                               PARTITION_VERT_4 };
3408
45.6k
  const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg;
3409
  // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
3410
  // PARTITION_VERT_4 for this block. This is almost the same as
3411
  // ext_partition_allowed, except that we don't allow 128x32 or 32x128
3412
  // blocks, so we require that bsize is not BLOCK_128X128.
3413
45.6k
  const int partition4_allowed = part_cfg->enable_1to4_partitions &&
3414
45.6k
                                 ext_partition_allowed &&
3415
45.6k
                                 bsize != BLOCK_128X128;
3416
3417
137k
  for (PART4_TYPES i = HORZ4; i < NUM_PART4_TYPES; i++) {
3418
91.3k
    part4_search_allowed[i] =
3419
91.3k
        partition4_allowed && part_search_state->partition_rect_allowed[i] &&
3420
91.3k
        get_plane_block_size(get_partition_subsize(bsize, cur_part[i]),
3421
10.8k
                             part_search_state->ss_x,
3422
10.8k
                             part_search_state->ss_y) != BLOCK_INVALID;
3423
91.3k
  }
3424
  // Pruning: pruning out 4-way partitions based on the current best partition.
3425
45.6k
  if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) {
3426
0
    part4_search_allowed[HORZ4] &= (pc_tree->partitioning == PARTITION_HORZ ||
3427
0
                                    pc_tree->partitioning == PARTITION_HORZ_A ||
3428
0
                                    pc_tree->partitioning == PARTITION_HORZ_B ||
3429
0
                                    pc_tree->partitioning == PARTITION_SPLIT ||
3430
0
                                    pc_tree->partitioning == PARTITION_NONE);
3431
0
    part4_search_allowed[VERT4] &= (pc_tree->partitioning == PARTITION_VERT ||
3432
0
                                    pc_tree->partitioning == PARTITION_VERT_A ||
3433
0
                                    pc_tree->partitioning == PARTITION_VERT_B ||
3434
0
                                    pc_tree->partitioning == PARTITION_SPLIT ||
3435
0
                                    pc_tree->partitioning == PARTITION_NONE);
3436
0
  }
3437
3438
  // Pruning: pruning out some 4-way partitions using a DNN taking rd costs of
3439
  // sub-blocks from basic partition types.
3440
45.6k
  if (cpi->sf.part_sf.ml_prune_partition && partition4_allowed &&
3441
45.6k
      part_search_state->partition_rect_allowed[HORZ] &&
3442
45.6k
      part_search_state->partition_rect_allowed[VERT]) {
3443
5.40k
    av1_ml_prune_4_partition(cpi, x, pc_tree->partitioning, best_rdc->rdcost,
3444
5.40k
                             part_search_state, part4_search_allowed,
3445
5.40k
                             pb_source_variance);
3446
5.40k
  }
3447
3448
  // Pruning: pruning out 4-way partitions based on the number of horz/vert wins
3449
  // in the current block and sub-blocks in PARTITION_SPLIT.
3450
45.6k
  prune_4_partition_using_split_info(cpi, x, part_search_state,
3451
45.6k
                                     part4_search_allowed);
3452
45.6k
}
3453
3454
// Set params needed for PARTITION_NONE search.
3455
static void set_none_partition_params(const AV1_COMP *const cpi, ThreadData *td,
3456
                                      MACROBLOCK *x, PC_TREE *pc_tree,
3457
                                      PartitionSearchState *part_search_state,
3458
                                      RD_STATS *best_remain_rdcost,
3459
68.6k
                                      RD_STATS *best_rdc, int *pt_cost) {
3460
68.6k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3461
68.6k
  RD_STATS partition_rdcost;
3462
  // Set PARTITION_NONE context.
3463
68.6k
  if (pc_tree->none == NULL)
3464
68.6k
    pc_tree->none = av1_alloc_pmc(cpi, blk_params.bsize, &td->shared_coeff_buf);
3465
3466
  // Set PARTITION_NONE type cost.
3467
68.6k
  if (part_search_state->partition_none_allowed) {
3468
68.6k
    if (blk_params.bsize_at_least_8x8) {
3469
48.5k
      *pt_cost = part_search_state->partition_cost[PARTITION_NONE] < INT_MAX
3470
48.5k
                     ? part_search_state->partition_cost[PARTITION_NONE]
3471
48.5k
                     : 0;
3472
48.5k
    }
3473
3474
    // Initialize the RD stats structure.
3475
68.6k
    av1_init_rd_stats(&partition_rdcost);
3476
68.6k
    partition_rdcost.rate = *pt_cost;
3477
68.6k
    av1_rd_cost_update(x->rdmult, &partition_rdcost);
3478
68.6k
    av1_rd_stats_subtraction(x->rdmult, best_rdc, &partition_rdcost,
3479
68.6k
                             best_remain_rdcost);
3480
68.6k
  }
3481
68.6k
}
3482
3483
// Skip other partitions based on PARTITION_NONE rd cost.
3484
static void prune_partitions_after_none(AV1_COMP *const cpi, MACROBLOCK *x,
3485
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
3486
                                        PICK_MODE_CONTEXT *ctx_none,
3487
                                        PartitionSearchState *part_search_state,
3488
                                        RD_STATS *best_rdc,
3489
36.0k
                                        unsigned int *pb_source_variance) {
3490
36.0k
  const AV1_COMMON *const cm = &cpi->common;
3491
36.0k
  MACROBLOCKD *const xd = &x->e_mbd;
3492
36.0k
  const PartitionBlkParams blk_params = part_search_state->part_blk_params;
3493
36.0k
  RD_STATS *this_rdc = &part_search_state->this_rdc;
3494
36.0k
  const BLOCK_SIZE bsize = blk_params.bsize;
3495
36.0k
  assert(bsize < BLOCK_SIZES_ALL);
3496
3497
36.0k
  if (!frame_is_intra_only(cm) &&
3498
36.0k
      (part_search_state->do_square_split ||
3499
0
       part_search_state->do_rectangular_split) &&
3500
36.0k
      !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
3501
0
    const int use_ml_based_breakout =
3502
0
        bsize <= cpi->sf.part_sf.use_square_partition_only_threshold &&
3503
0
        bsize > BLOCK_4X4 && cpi->sf.part_sf.ml_predict_breakout_level >= 1;
3504
0
    if (use_ml_based_breakout) {
3505
0
      av1_ml_predict_breakout(cpi, x, this_rdc, *pb_source_variance, xd->bd,
3506
0
                              part_search_state);
3507
0
    }
3508
3509
    // Adjust dist breakout threshold according to the partition size.
3510
0
    const int64_t dist_breakout_thr =
3511
0
        cpi->sf.part_sf.partition_search_breakout_dist_thr >>
3512
0
        ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
3513
0
         (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
3514
0
    const int rate_breakout_thr =
3515
0
        cpi->sf.part_sf.partition_search_breakout_rate_thr *
3516
0
        num_pels_log2_lookup[bsize];
3517
    // If all y, u, v transform blocks in this partition are skippable,
3518
    // and the dist & rate are within the thresholds, the partition
3519
    // search is terminated for current branch of the partition search
3520
    // tree. The dist & rate thresholds are set to 0 at speed 0 to
3521
    // disable the early termination at that speed.
3522
0
    if (best_rdc->dist < dist_breakout_thr &&
3523
0
        best_rdc->rate < rate_breakout_thr) {
3524
0
      part_search_state->do_square_split = 0;
3525
0
      part_search_state->do_rectangular_split = 0;
3526
0
    }
3527
0
  }
3528
3529
  // Early termination: using simple_motion_search features and the
3530
  // rate, distortion, and rdcost of PARTITION_NONE, a DNN will make a
3531
  // decision on early terminating at PARTITION_NONE.
3532
36.0k
  if (cpi->sf.part_sf.simple_motion_search_early_term_none && cm->show_frame &&
3533
36.0k
      !frame_is_intra_only(cm) && bsize >= BLOCK_16X16 &&
3534
36.0k
      av1_blk_has_rows_and_cols(&blk_params) && this_rdc->rdcost < INT64_MAX &&
3535
36.0k
      this_rdc->rdcost >= 0 && this_rdc->rate < INT_MAX &&
3536
36.0k
      this_rdc->rate >= 0 &&
3537
36.0k
      (part_search_state->do_square_split ||
3538
0
       part_search_state->do_rectangular_split)) {
3539
0
    av1_simple_motion_search_early_term_none(cpi, x, sms_tree, this_rdc,
3540
0
                                             part_search_state);
3541
0
  }
3542
36.0k
}
3543
3544
// Decide early termination and rectangular partition pruning
3545
// based on PARTITION_NONE and PARTITION_SPLIT costs.
3546
static void prune_partitions_after_split(
3547
    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
3548
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3549
88.0k
    int64_t part_none_rd, int64_t part_split_rd) {
3550
88.0k
  const AV1_COMMON *const cm = &cpi->common;
3551
88.0k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3552
88.0k
  const int mi_row = blk_params.mi_row;
3553
88.0k
  const int mi_col = blk_params.mi_col;
3554
88.0k
  const BLOCK_SIZE bsize = blk_params.bsize;
3555
88.0k
  assert(bsize < BLOCK_SIZES_ALL);
3556
3557
  // Early termination: using the rd costs of PARTITION_NONE and subblocks
3558
  // from PARTITION_SPLIT to determine an early breakout.
3559
88.0k
  if (cpi->sf.part_sf.ml_early_term_after_part_split_level &&
3560
88.0k
      !frame_is_intra_only(cm) &&
3561
88.0k
      !part_search_state->terminate_partition_search &&
3562
88.0k
      part_search_state->do_rectangular_split &&
3563
88.0k
      (part_search_state->partition_rect_allowed[HORZ] ||
3564
0
       part_search_state->partition_rect_allowed[VERT])) {
3565
0
    av1_ml_early_term_after_split(
3566
0
        cpi, x, sms_tree, best_rdc->rdcost, part_none_rd, part_split_rd,
3567
0
        part_search_state->split_rd, part_search_state);
3568
0
  }
3569
3570
  // Use the rd costs of PARTITION_NONE and subblocks from PARTITION_SPLIT
3571
  // to prune out rectangular partitions in some directions.
3572
88.0k
  if (!cpi->sf.part_sf.ml_early_term_after_part_split_level &&
3573
88.0k
      cpi->sf.part_sf.ml_prune_partition && !frame_is_intra_only(cm) &&
3574
88.0k
      (part_search_state->partition_rect_allowed[HORZ] ||
3575
0
       part_search_state->partition_rect_allowed[VERT]) &&
3576
88.0k
      !(part_search_state->prune_rect_part[HORZ] ||
3577
0
        part_search_state->prune_rect_part[VERT]) &&
3578
88.0k
      !part_search_state->terminate_partition_search) {
3579
0
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, av1_num_planes(cm),
3580
0
                         bsize);
3581
0
    av1_ml_prune_rect_partition(cpi, x, best_rdc->rdcost,
3582
0
                                part_search_state->none_rd,
3583
0
                                part_search_state->split_rd, part_search_state);
3584
0
  }
3585
88.0k
}
3586
3587
// PARTITION_NONE search.
3588
static void none_partition_search(
3589
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, MACROBLOCK *x,
3590
    PC_TREE *pc_tree, SIMPLE_MOTION_DATA_TREE *sms_tree,
3591
    RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3592
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3593
88.0k
    unsigned int *pb_source_variance, int64_t *none_rd, int64_t *part_none_rd) {
3594
88.0k
  const AV1_COMMON *const cm = &cpi->common;
3595
88.0k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3596
88.0k
  RD_STATS *this_rdc = &part_search_state->this_rdc;
3597
88.0k
  const int mi_row = blk_params.mi_row;
3598
88.0k
  const int mi_col = blk_params.mi_col;
3599
88.0k
  const BLOCK_SIZE bsize = blk_params.bsize;
3600
88.0k
  assert(bsize < BLOCK_SIZES_ALL);
3601
3602
88.0k
  if (part_search_state->terminate_partition_search ||
3603
88.0k
      !part_search_state->partition_none_allowed)
3604
19.4k
    return;
3605
3606
68.6k
  int pt_cost = 0;
3607
68.6k
  RD_STATS best_remain_rdcost;
3608
68.6k
  av1_invalid_rd_stats(&best_remain_rdcost);
3609
3610
  // Set PARTITION_NONE context and cost.
3611
68.6k
  set_none_partition_params(cpi, td, x, pc_tree, part_search_state,
3612
68.6k
                            &best_remain_rdcost, best_rdc, &pt_cost);
3613
3614
#if CONFIG_COLLECT_PARTITION_STATS
3615
  // Timer start for partition None.
3616
  PartitionTimingStats *part_timing_stats =
3617
      &part_search_state->part_timing_stats;
3618
  if (best_remain_rdcost.rdcost >= 0) {
3619
    start_partition_block_timer(part_timing_stats, PARTITION_NONE);
3620
  }
3621
#endif
3622
  // PARTITION_NONE evaluation and cost update.
3623
68.6k
  pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, PARTITION_NONE,
3624
68.6k
                bsize, pc_tree->none, best_remain_rdcost);
3625
3626
68.6k
  av1_rd_cost_update(x->rdmult, this_rdc);
3627
3628
#if CONFIG_COLLECT_PARTITION_STATS
3629
  // Timer end for partition None.
3630
  if (part_timing_stats->timer_is_on) {
3631
    RD_STATS tmp_rdc;
3632
    av1_init_rd_stats(&tmp_rdc);
3633
    if (this_rdc->rate != INT_MAX) {
3634
      tmp_rdc.rate = this_rdc->rate;
3635
      tmp_rdc.dist = this_rdc->dist;
3636
      tmp_rdc.rdcost = this_rdc->rdcost;
3637
      if (blk_params.bsize_at_least_8x8) {
3638
        tmp_rdc.rate += pt_cost;
3639
        tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
3640
      }
3641
    }
3642
    end_partition_block_timer(part_timing_stats, PARTITION_NONE,
3643
                              tmp_rdc.rdcost);
3644
  }
3645
#endif
3646
68.6k
  *pb_source_variance = x->source_variance;
3647
68.6k
  if (none_rd) *none_rd = this_rdc->rdcost;
3648
68.6k
  part_search_state->none_rd = this_rdc->rdcost;
3649
68.6k
  if (this_rdc->rate != INT_MAX) {
3650
    // Record picked ref frame to prune ref frames for other partition types.
3651
38.8k
    if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions) {
3652
0
      const int ref_type = av1_ref_frame_type(pc_tree->none->mic.ref_frame);
3653
0
      av1_update_picked_ref_frames_mask(
3654
0
          x, ref_type, bsize, cm->seq_params->mib_size, mi_row, mi_col);
3655
0
    }
3656
3657
    // Calculate the total cost and update the best partition.
3658
38.8k
    if (blk_params.bsize_at_least_8x8) {
3659
35.0k
      this_rdc->rate += pt_cost;
3660
35.0k
      this_rdc->rdcost = RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist);
3661
35.0k
    }
3662
38.8k
    *part_none_rd = this_rdc->rdcost;
3663
38.8k
    if (this_rdc->rdcost < best_rdc->rdcost) {
3664
36.0k
      *best_rdc = *this_rdc;
3665
36.0k
      part_search_state->found_best_partition = true;
3666
36.0k
      if (blk_params.bsize_at_least_8x8) {
3667
32.7k
        pc_tree->partitioning = PARTITION_NONE;
3668
32.7k
      }
3669
3670
      // Disable split and rectangular partition search
3671
      // based on PARTITION_NONE cost.
3672
36.0k
      prune_partitions_after_none(cpi, x, sms_tree, pc_tree->none,
3673
36.0k
                                  part_search_state, best_rdc,
3674
36.0k
                                  pb_source_variance);
3675
36.0k
    }
3676
38.8k
  }
3677
68.6k
  av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
3678
68.6k
}
3679
3680
// PARTITION_SPLIT search.
3681
static void split_partition_search(
3682
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3683
    TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree,
3684
    SIMPLE_MOTION_DATA_TREE *sms_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3685
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3686
88.0k
    SB_MULTI_PASS_MODE multi_pass_mode, int64_t *part_split_rd) {
3687
88.0k
  const AV1_COMMON *const cm = &cpi->common;
3688
88.0k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3689
88.0k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
3690
88.0k
  const int mi_row = blk_params.mi_row;
3691
88.0k
  const int mi_col = blk_params.mi_col;
3692
88.0k
  const int bsize = blk_params.bsize;
3693
88.0k
  assert(bsize < BLOCK_SIZES_ALL);
3694
88.0k
  RD_STATS sum_rdc = part_search_state->sum_rdc;
3695
88.0k
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3696
3697
  // Check if partition split is allowed.
3698
88.0k
  if (part_search_state->terminate_partition_search ||
3699
88.0k
      !part_search_state->do_square_split)
3700
26.6k
    return;
3701
3702
306k
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
3703
245k
    if (pc_tree->split[i] == NULL)
3704
245k
      pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
3705
245k
    pc_tree->split[i]->index = i;
3706
245k
  }
3707
3708
  // Initialization of this partition RD stats.
3709
61.3k
  av1_init_rd_stats(&sum_rdc);
3710
61.3k
  sum_rdc.rate = part_search_state->partition_cost[PARTITION_SPLIT];
3711
61.3k
  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3712
3713
61.3k
  int idx;
3714
#if CONFIG_COLLECT_PARTITION_STATS
3715
  PartitionTimingStats *part_timing_stats =
3716
      &part_search_state->part_timing_stats;
3717
  if (best_rdc->rdcost - sum_rdc.rdcost >= 0) {
3718
    start_partition_block_timer(part_timing_stats, PARTITION_SPLIT);
3719
  }
3720
#endif
3721
  // Recursive partition search on 4 sub-blocks.
3722
144k
  for (idx = 0; idx < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc->rdcost;
3723
116k
       ++idx) {
3724
116k
    const int x_idx = (idx & 1) * blk_params.mi_step;
3725
116k
    const int y_idx = (idx >> 1) * blk_params.mi_step;
3726
3727
116k
    if (mi_row + y_idx >= mi_params->mi_rows ||
3728
116k
        mi_col + x_idx >= mi_params->mi_cols)
3729
40.0k
      continue;
3730
3731
76.2k
    pc_tree->split[idx]->index = idx;
3732
76.2k
    int64_t *p_split_rd = &part_search_state->split_rd[idx];
3733
76.2k
    RD_STATS best_remain_rdcost;
3734
76.2k
    av1_rd_stats_subtraction(x->rdmult, best_rdc, &sum_rdc,
3735
76.2k
                             &best_remain_rdcost);
3736
3737
76.2k
    int curr_quad_tree_idx = 0;
3738
76.2k
    if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3739
76.2k
      curr_quad_tree_idx = part_search_state->intra_part_info->quad_tree_idx;
3740
76.2k
      part_search_state->intra_part_info->quad_tree_idx =
3741
76.2k
          4 * curr_quad_tree_idx + idx + 1;
3742
76.2k
    }
3743
    // Split partition evaluation of corresponding idx.
3744
    // If the RD cost exceeds the best cost then do not
3745
    // evaluate other split sub-partitions.
3746
76.2k
    if (!av1_rd_pick_partition(
3747
76.2k
            cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
3748
76.2k
            &part_search_state->this_rdc, best_remain_rdcost,
3749
76.2k
            pc_tree->split[idx], sms_tree->split[idx], p_split_rd,
3750
76.2k
            multi_pass_mode, &part_search_state->split_part_rect_win[idx])) {
3751
32.7k
      av1_invalid_rd_stats(&sum_rdc);
3752
32.7k
      break;
3753
32.7k
    }
3754
43.5k
    if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3755
43.5k
      part_search_state->intra_part_info->quad_tree_idx = curr_quad_tree_idx;
3756
43.5k
    }
3757
3758
43.5k
    sum_rdc.rate += part_search_state->this_rdc.rate;
3759
43.5k
    sum_rdc.dist += part_search_state->this_rdc.dist;
3760
43.5k
    av1_rd_cost_update(x->rdmult, &sum_rdc);
3761
3762
    // Set split ctx as ready for use.
3763
43.5k
    if (idx <= 1 && (bsize <= BLOCK_8X8 ||
3764
34.0k
                     pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
3765
22.8k
      const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none->mic;
3766
22.8k
      const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3767
      // Neither palette mode nor cfl predicted.
3768
22.8k
      if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3769
22.8k
        if (mbmi->uv_mode != UV_CFL_PRED)
3770
22.8k
          part_search_state->is_split_ctx_is_ready[idx] = 1;
3771
22.8k
      }
3772
22.8k
    }
3773
43.5k
  }
3774
#if CONFIG_COLLECT_PARTITION_STATS
3775
  if (part_timing_stats->timer_is_on) {
3776
    end_partition_block_timer(part_timing_stats, PARTITION_SPLIT,
3777
                              sum_rdc.rdcost);
3778
  }
3779
#endif
3780
61.3k
  const int reached_last_index = (idx == SUB_PARTITIONS_SPLIT);
3781
3782
  // Calculate the total cost and update the best partition.
3783
61.3k
  *part_split_rd = sum_rdc.rdcost;
3784
61.3k
  if (reached_last_index && sum_rdc.rdcost < best_rdc->rdcost) {
3785
19.2k
    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3786
19.2k
    if (sum_rdc.rdcost < best_rdc->rdcost) {
3787
19.2k
      *best_rdc = sum_rdc;
3788
19.2k
      part_search_state->found_best_partition = true;
3789
19.2k
      pc_tree->partitioning = PARTITION_SPLIT;
3790
19.2k
    }
3791
42.0k
  } else if (cpi->sf.part_sf.less_rectangular_check_level > 0) {
3792
    // Skip rectangular partition test when partition type none gives better
3793
    // rd than partition type split.
3794
42.0k
    if (cpi->sf.part_sf.less_rectangular_check_level == 2 || idx <= 2) {
3795
42.0k
      const int partition_none_valid = part_search_state->none_rd > 0;
3796
42.0k
      const int partition_none_better =
3797
42.0k
          part_search_state->none_rd < sum_rdc.rdcost;
3798
42.0k
      part_search_state->do_rectangular_split &=
3799
42.0k
          !(partition_none_valid && partition_none_better);
3800
42.0k
    }
3801
42.0k
  }
3802
61.3k
  av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
3803
61.3k
}
3804
3805
// The max number of nodes in the partition tree.
3806
// The number of leaf nodes is (128x128) / (4x4) = 1024.
3807
// The number of All possible parent nodes is 1 + 2 + ... + 512 = 1023.
3808
#define NUM_NODES 2048
3809
3810
static void write_partition_tree(AV1_COMP *const cpi,
3811
                                 const PC_TREE *const pc_tree,
3812
                                 const BLOCK_SIZE bsize, const int mi_row,
3813
0
                                 const int mi_col) {
3814
0
  (void)mi_row;
3815
0
  (void)mi_col;
3816
0
  const char *path = cpi->oxcf.partition_info_path;
3817
0
  char filename[256];
3818
0
  snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path,
3819
0
           cpi->sb_counter, 0);
3820
0
  FILE *pfile = fopen(filename, "w");
3821
0
  fprintf(pfile, "%d", bsize);
3822
0
3823
0
  // Write partition type with BFS order.
3824
0
  const PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
3825
0
  int q_idx = 0;
3826
0
  int depth = 0;
3827
0
  int last_idx = 1;
3828
0
  int num_nodes = 1;
3829
0
3830
0
  // First traversal to get number of leaf nodes and depth.
3831
0
  tree_node_queue[q_idx] = pc_tree;
3832
0
  while (num_nodes > 0) {
3833
0
    const PC_TREE *node = tree_node_queue[q_idx];
3834
0
    if (node->partitioning == PARTITION_SPLIT) {
3835
0
      for (int i = 0; i < 4; ++i) {
3836
0
        tree_node_queue[last_idx] = node->split[i];
3837
0
        ++last_idx;
3838
0
      }
3839
0
      ++depth;
3840
0
      num_nodes += 4;
3841
0
    }
3842
0
    --num_nodes;
3843
0
    ++q_idx;
3844
0
  }
3845
0
  const int num_leafs = last_idx;
3846
0
  fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1);
3847
0
3848
0
  // Write partitions for each node.
3849
0
  q_idx = 0;
3850
0
  depth = 0;
3851
0
  last_idx = 1;
3852
0
  num_nodes = 1;
3853
0
  tree_node_queue[q_idx] = pc_tree;
3854
0
  while (num_nodes > 0) {
3855
0
    const PC_TREE *node = tree_node_queue[q_idx];
3856
0
    fprintf(pfile, ",%d", node->partitioning);
3857
0
    if (node->partitioning == PARTITION_SPLIT) {
3858
0
      for (int i = 0; i < 4; ++i) {
3859
0
        tree_node_queue[last_idx] = node->split[i];
3860
0
        ++last_idx;
3861
0
      }
3862
0
      ++depth;
3863
0
      num_nodes += 4;
3864
0
    }
3865
0
    --num_nodes;
3866
0
    ++q_idx;
3867
0
  }
3868
0
  fprintf(pfile, "\n");
3869
0
3870
0
  fclose(pfile);
3871
0
}
3872
3873
static void verify_write_partition_tree(const AV1_COMP *const cpi,
3874
                                        const PC_TREE *const pc_tree,
3875
                                        const BLOCK_SIZE bsize,
3876
                                        const int config_id, const int mi_row,
3877
0
                                        const int mi_col) {
3878
0
  (void)mi_row;
3879
0
  (void)mi_col;
3880
0
  const char *path = cpi->oxcf.partition_info_path;
3881
0
  char filename[256];
3882
0
  snprintf(filename, sizeof(filename), "%s/verify_partition_tree_sb%d_c%d",
3883
0
           path, cpi->sb_counter, config_id);
3884
0
  FILE *pfile = fopen(filename, "w");
3885
0
  fprintf(pfile, "%d", bsize);
3886
3887
  // Write partition type with BFS order.
3888
0
  const PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
3889
0
  int q_idx = 0;
3890
0
  int depth = 0;
3891
0
  int last_idx = 1;
3892
0
  int num_nodes = 1;
3893
3894
  // First traversal to get number of leaf nodes and depth.
3895
0
  tree_node_queue[q_idx] = pc_tree;
3896
0
  while (num_nodes > 0) {
3897
0
    const PC_TREE *node = tree_node_queue[q_idx];
3898
0
    if (node != NULL && node->partitioning == PARTITION_SPLIT) {
3899
0
      for (int i = 0; i < 4; ++i) {
3900
0
        tree_node_queue[last_idx] = node->split[i];
3901
0
        ++last_idx;
3902
0
      }
3903
0
      ++depth;
3904
0
      num_nodes += 4;
3905
0
    }
3906
0
    --num_nodes;
3907
0
    ++q_idx;
3908
0
  }
3909
0
  const int num_leafs = last_idx;
3910
0
  fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1);
3911
3912
  // Write partitions for each node.
3913
0
  q_idx = 0;
3914
0
  depth = 0;
3915
0
  last_idx = 1;
3916
0
  num_nodes = 1;
3917
0
  tree_node_queue[q_idx] = pc_tree;
3918
0
  while (num_nodes > 0) {
3919
0
    const PC_TREE *node = tree_node_queue[q_idx];
3920
0
    if (node != NULL) {  // suppress warning
3921
0
      fprintf(pfile, ",%d", node->partitioning);
3922
0
      if (node->partitioning == PARTITION_SPLIT) {
3923
0
        for (int i = 0; i < 4; ++i) {
3924
0
          tree_node_queue[last_idx] = node->split[i];
3925
0
          ++last_idx;
3926
0
        }
3927
0
        ++depth;
3928
0
        num_nodes += 4;
3929
0
      }
3930
0
    }
3931
0
    --num_nodes;
3932
0
    ++q_idx;
3933
0
  }
3934
0
  fprintf(pfile, "\n");
3935
3936
0
  fclose(pfile);
3937
0
}
3938
3939
static int read_partition_tree(AV1_COMP *const cpi, PC_TREE *const pc_tree,
3940
0
                               const int config_id) {
3941
0
  const char *path = cpi->oxcf.partition_info_path;
3942
0
  char filename[256];
3943
0
  snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path,
3944
0
           cpi->sb_counter, config_id);
3945
0
  FILE *pfile = fopen(filename, "r");
3946
0
  if (pfile == NULL) {
3947
0
    printf("Can't find the file: %s\n", filename);
3948
0
    exit(0);
3949
0
  }
3950
3951
0
  int read_bsize;
3952
0
  int num_nodes;
3953
0
  int num_configs;
3954
0
  fscanf(pfile, "%d,%d,%d", &read_bsize, &num_nodes, &num_configs);
3955
0
  assert(read_bsize == cpi->common.seq_params->sb_size);
3956
0
  BLOCK_SIZE bsize = (BLOCK_SIZE)read_bsize;
3957
0
  assert(bsize == pc_tree->block_size);
3958
3959
0
  PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
3960
0
  int last_idx = 1;
3961
0
  int q_idx = 0;
3962
0
  tree_node_queue[q_idx] = pc_tree;
3963
0
  while (num_nodes > 0) {
3964
0
    int partitioning;
3965
0
    fscanf(pfile, ",%d", &partitioning);
3966
0
    assert(partitioning >= PARTITION_NONE &&
3967
0
           partitioning < EXT_PARTITION_TYPES);
3968
0
    PC_TREE *node = tree_node_queue[q_idx];
3969
0
    if (node != NULL) {
3970
0
      node->partitioning = partitioning;
3971
0
      bsize = node->block_size;
3972
0
    }
3973
0
    if (partitioning == PARTITION_SPLIT) {
3974
0
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3975
0
      for (int i = 0; i < 4; ++i) {
3976
0
        if (node != NULL) {  // Suppress warning
3977
0
          node->split[i] = av1_alloc_pc_tree_node(subsize);
3978
0
          node->split[i]->index = i;
3979
0
          tree_node_queue[last_idx] = node->split[i];
3980
0
          ++last_idx;
3981
0
        }
3982
0
      }
3983
0
    }
3984
0
    --num_nodes;
3985
0
    ++q_idx;
3986
0
  }
3987
0
  fclose(pfile);
3988
3989
0
  return num_configs;
3990
0
}
3991
3992
static RD_STATS rd_search_for_fixed_partition(
3993
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3994
    TokenExtra **tp, SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row, int mi_col,
3995
0
    const BLOCK_SIZE bsize, PC_TREE *pc_tree) {
3996
0
  const PARTITION_TYPE partition = pc_tree->partitioning;
3997
0
  const AV1_COMMON *const cm = &cpi->common;
3998
0
  const int num_planes = av1_num_planes(cm);
3999
0
  MACROBLOCK *const x = &td->mb;
4000
0
  MACROBLOCKD *const xd = &x->e_mbd;
4001
0
  TileInfo *const tile_info = &tile_data->tile_info;
4002
0
  RD_STATS best_rdc;
4003
0
  av1_invalid_rd_stats(&best_rdc);
4004
0
  int sum_subblock_rate = 0;
4005
0
  int64_t sum_subblock_dist = 0;
4006
0
  PartitionSearchState part_search_state;
4007
0
  init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col,
4008
0
                                     bsize);
4009
  // Override partition costs at the edges of the frame in the same
4010
  // way as in read_partition (see decodeframe.c).
4011
0
  PartitionBlkParams blk_params = part_search_state.part_blk_params;
4012
0
  if (!av1_blk_has_rows_and_cols(&blk_params))
4013
0
    set_partition_cost_for_edge_blk(cm, &part_search_state);
4014
4015
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
4016
4017
  // Save rdmult before it might be changed, so it can be restored later.
4018
0
  const int orig_rdmult = x->rdmult;
4019
0
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
4020
0
  (void)orig_rdmult;
4021
4022
  // Set the context.
4023
0
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
4024
0
  xd->above_txfm_context =
4025
0
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
4026
0
  xd->left_txfm_context =
4027
0
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
4028
0
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4029
4030
0
  assert(bsize < BLOCK_SIZES_ALL);
4031
0
  unsigned int pb_source_variance = UINT_MAX;
4032
0
  int64_t part_none_rd = INT64_MAX;
4033
0
  int64_t none_rd = INT64_MAX;
4034
0
  int inc_step[NUM_PART4_TYPES] = { 0 };
4035
0
  if (partition == PARTITION_HORZ_4) inc_step[HORZ4] = mi_size_high[bsize] / 4;
4036
0
  if (partition == PARTITION_VERT_4) inc_step[VERT4] = mi_size_wide[bsize] / 4;
4037
4038
0
  switch (partition) {
4039
0
    case PARTITION_NONE:
4040
0
      none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
4041
0
                            &part_search_state, &best_rdc, &pb_source_variance,
4042
0
                            &none_rd, &part_none_rd);
4043
0
      break;
4044
0
    case PARTITION_HORZ:
4045
0
      rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
4046
0
                                   &part_search_state, &best_rdc, NULL, HORZ,
4047
0
                                   HORZ);
4048
0
      break;
4049
0
    case PARTITION_VERT:
4050
0
      rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
4051
0
                                   &part_search_state, &best_rdc, NULL, VERT,
4052
0
                                   VERT);
4053
0
      break;
4054
0
    case PARTITION_HORZ_A:
4055
0
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4056
0
                           &part_search_state, &best_rdc, NULL,
4057
0
                           pb_source_variance, 1, HORZ_A, HORZ_A);
4058
0
      break;
4059
0
    case PARTITION_HORZ_B:
4060
0
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4061
0
                           &part_search_state, &best_rdc, NULL,
4062
0
                           pb_source_variance, 1, HORZ_B, HORZ_B);
4063
0
      break;
4064
0
    case PARTITION_VERT_A:
4065
0
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4066
0
                           &part_search_state, &best_rdc, NULL,
4067
0
                           pb_source_variance, 1, VERT_A, VERT_A);
4068
0
      break;
4069
0
    case PARTITION_VERT_B:
4070
0
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4071
0
                           &part_search_state, &best_rdc, NULL,
4072
0
                           pb_source_variance, 1, VERT_B, VERT_B);
4073
0
      break;
4074
0
    case PARTITION_HORZ_4:
4075
0
      rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4076
0
                         pc_tree->horizontal4, &part_search_state, &best_rdc,
4077
0
                         inc_step, PARTITION_HORZ_4);
4078
0
      break;
4079
0
    case PARTITION_VERT_4:
4080
0
      rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4081
0
                         pc_tree->vertical4, &part_search_state, &best_rdc,
4082
0
                         inc_step, PARTITION_VERT_4);
4083
0
      break;
4084
0
    case PARTITION_SPLIT:
4085
0
      for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; ++idx) {
4086
0
        const BLOCK_SIZE subsize =
4087
0
            get_partition_subsize(bsize, PARTITION_SPLIT);
4088
0
        assert(subsize < BLOCK_SIZES_ALL);
4089
0
        const int next_mi_row =
4090
0
            idx < 2 ? mi_row : mi_row + mi_size_high[subsize];
4091
0
        const int next_mi_col =
4092
0
            idx % 2 == 0 ? mi_col : mi_col + mi_size_wide[subsize];
4093
0
        if (next_mi_row >= cm->mi_params.mi_rows ||
4094
0
            next_mi_col >= cm->mi_params.mi_cols) {
4095
0
          continue;
4096
0
        }
4097
0
        const RD_STATS subblock_rdc = rd_search_for_fixed_partition(
4098
0
            cpi, td, tile_data, tp, sms_tree->split[idx], next_mi_row,
4099
0
            next_mi_col, subsize, pc_tree->split[idx]);
4100
0
        sum_subblock_rate += subblock_rdc.rate;
4101
0
        sum_subblock_dist += subblock_rdc.dist;
4102
0
      }
4103
0
      best_rdc.rate = sum_subblock_rate;
4104
0
      best_rdc.rate += part_search_state.partition_cost[PARTITION_SPLIT];
4105
0
      best_rdc.dist = sum_subblock_dist;
4106
0
      best_rdc.rdcost = RDCOST(x->rdmult, best_rdc.rate, best_rdc.dist);
4107
0
      break;
4108
0
    default: assert(0 && "invalid partition type."); exit(0);
4109
0
  }
4110
  // Note: it is necessary to restore context information.
4111
0
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4112
4113
0
  if (bsize != cm->seq_params->sb_size) {
4114
0
    encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
4115
0
              pc_tree, NULL);
4116
0
  }
4117
0
  x->rdmult = orig_rdmult;
4118
4119
0
  return best_rdc;
4120
0
}
4121
4122
static void prepare_sb_features_before_search(
4123
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row,
4124
0
    int mi_col, const BLOCK_SIZE bsize, aom_partition_features_t *features) {
4125
0
  av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
4126
0
                                        bsize, features);
4127
0
  collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, features);
4128
0
}
4129
4130
static void update_partition_stats(const RD_STATS *const this_rdcost,
4131
0
                                   aom_partition_stats_t *stats) {
4132
0
  stats->rate = this_rdcost->rate;
4133
0
  stats->dist = this_rdcost->dist;
4134
0
  stats->rdcost = this_rdcost->rdcost;
4135
0
}
4136
4137
static void build_pc_tree_from_part_decision(
4138
    const aom_partition_decision_t *partition_decision,
4139
0
    const BLOCK_SIZE this_bsize, PC_TREE *pc_tree) {
4140
0
  BLOCK_SIZE bsize = this_bsize;
4141
0
  int num_nodes = partition_decision->num_nodes;
4142
0
  PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
4143
0
  int last_idx = 1;
4144
0
  int q_idx = 0;
4145
0
  tree_node_queue[q_idx] = pc_tree;
4146
0
  while (num_nodes > 0) {
4147
0
    const int partitioning = partition_decision->partition_decision[q_idx];
4148
0
    assert(partitioning >= PARTITION_NONE &&
4149
0
           partitioning < EXT_PARTITION_TYPES);
4150
0
    PC_TREE *node = tree_node_queue[q_idx];
4151
0
    if (node != NULL) {
4152
0
      node->partitioning = partitioning;
4153
0
      bsize = node->block_size;
4154
0
    }
4155
0
    if (partitioning == PARTITION_SPLIT) {
4156
0
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
4157
0
      for (int i = 0; i < 4; ++i) {
4158
0
        if (node != NULL) {  // Suppress warning
4159
0
          node->split[i] = av1_alloc_pc_tree_node(subsize);
4160
0
          node->split[i]->index = i;
4161
0
          tree_node_queue[last_idx] = node->split[i];
4162
0
          ++last_idx;
4163
0
        }
4164
0
      }
4165
0
    }
4166
0
    --num_nodes;
4167
0
    ++q_idx;
4168
0
  }
4169
0
}
4170
4171
// The ML model needs to provide the whole decision tree for the superblock.
4172
static bool ml_partition_search_whole_tree(AV1_COMP *const cpi, ThreadData *td,
4173
                                           TileDataEnc *tile_data,
4174
                                           TokenExtra **tp,
4175
                                           SIMPLE_MOTION_DATA_TREE *sms_root,
4176
                                           int mi_row, int mi_col,
4177
0
                                           const BLOCK_SIZE bsize) {
4178
0
  AV1_COMMON *const cm = &cpi->common;
4179
0
  MACROBLOCK *const x = &td->mb;
4180
0
  ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
4181
0
  aom_partition_features_t features;
4182
0
  prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
4183
0
                                    &features);
4184
0
  features.mi_row = mi_row;
4185
0
  features.mi_col = mi_col;
4186
0
  features.frame_width = cpi->frame_info.frame_width;
4187
0
  features.frame_height = cpi->frame_info.frame_height;
4188
0
  features.block_size = bsize;
4189
0
  av1_ext_part_send_features(ext_part_controller, &features);
4190
0
  PC_TREE *pc_tree;
4191
4192
  // rd mode search (dry run) for a valid partition decision from the ml model.
4193
0
  aom_partition_decision_t partition_decision;
4194
0
  do {
4195
0
    const bool valid_decision = av1_ext_part_get_partition_decision(
4196
0
        ext_part_controller, &partition_decision);
4197
0
    if (!valid_decision) return false;
4198
4199
    // First, let's take the easy approach.
4200
    // We require that the ml model has to provide partition decisions for the
4201
    // whole superblock.
4202
0
    pc_tree = av1_alloc_pc_tree_node(bsize);
4203
0
    build_pc_tree_from_part_decision(&partition_decision, bsize, pc_tree);
4204
4205
0
    const RD_STATS this_rdcost = rd_search_for_fixed_partition(
4206
0
        cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, pc_tree);
4207
0
    aom_partition_stats_t stats;
4208
0
    update_partition_stats(&this_rdcost, &stats);
4209
0
    av1_ext_part_send_partition_stats(ext_part_controller, &stats);
4210
0
    if (!partition_decision.is_final_decision) {
4211
0
      av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
4212
0
    }
4213
0
  } while (!partition_decision.is_final_decision);
4214
4215
  // Encode with the selected mode and partition.
4216
0
  set_cb_offsets(x->cb_offset, 0, 0);
4217
0
  encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
4218
0
            pc_tree, NULL);
4219
4220
0
  av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
4221
4222
0
  return true;
4223
0
}
4224
4225
// Use a bitmask to represent the valid partition types for the current
4226
// block. "1" represents the corresponding partition type is vaild.
4227
// The least significant bit represents "PARTITION_NONE", the
4228
// largest significant bit represents "PARTITION_VERT_4", follow
4229
// the enum order for PARTITION_TYPE in "enums.h"
4230
static int get_valid_partition_types(
4231
    const AV1_COMP *const cpi,
4232
    const PartitionSearchState *const part_search_state,
4233
0
    const BLOCK_SIZE bsize) {
4234
0
  const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg;
4235
0
  const PartitionBlkParams blk_params = part_search_state->part_blk_params;
4236
0
  int valid_types = 0;
4237
  // PARTITION_NONE
4238
0
  valid_types |= (part_search_state->partition_none_allowed << 0);
4239
  // PARTITION_HORZ
4240
0
  valid_types |= (part_search_state->partition_rect_allowed[HORZ] << 1);
4241
  // PARTITION_VERT
4242
0
  valid_types |= (part_search_state->partition_rect_allowed[VERT] << 2);
4243
  // PARTITION_SPLIT
4244
0
  valid_types |= (part_search_state->do_square_split << 3);
4245
  // PARTITION_HORZ_A
4246
0
  const int ext_partition_allowed = part_search_state->do_rectangular_split &&
4247
0
                                    av1_blk_has_rows_and_cols(&blk_params);
4248
0
  const int horzab_partition_allowed =
4249
0
      ext_partition_allowed && part_cfg->enable_ab_partitions &&
4250
0
      part_search_state->partition_rect_allowed[HORZ];
4251
0
  valid_types |= (horzab_partition_allowed << 4);
4252
  // PARTITION_HORZ_B
4253
0
  valid_types |= (horzab_partition_allowed << 5);
4254
  // PARTITION_VERT_A
4255
0
  const int vertab_partition_allowed =
4256
0
      ext_partition_allowed && part_cfg->enable_ab_partitions &&
4257
0
      part_search_state->partition_rect_allowed[VERT];
4258
0
  valid_types |= (vertab_partition_allowed << 6);
4259
  // PARTITION_VERT_B
4260
0
  valid_types |= (vertab_partition_allowed << 7);
4261
  // PARTITION_HORZ_4
4262
0
  const int partition4_allowed = part_cfg->enable_1to4_partitions &&
4263
0
                                 ext_partition_allowed &&
4264
0
                                 bsize != BLOCK_128X128;
4265
0
  const int horz4_allowed =
4266
0
      partition4_allowed && part_search_state->partition_rect_allowed[HORZ] &&
4267
0
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ_4),
4268
0
                           part_search_state->ss_x,
4269
0
                           part_search_state->ss_y) != BLOCK_INVALID;
4270
0
  valid_types |= (horz4_allowed << 8);
4271
  // PARTITION_VERT_4
4272
0
  const int vert4_allowed =
4273
0
      partition4_allowed && part_search_state->partition_rect_allowed[HORZ] &&
4274
0
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT_4),
4275
0
                           part_search_state->ss_x,
4276
0
                           part_search_state->ss_y) != BLOCK_INVALID;
4277
0
  valid_types |= (vert4_allowed << 9);
4278
4279
0
  return valid_types;
4280
0
}
4281
4282
static void prepare_tpl_stats_block(const AV1_COMP *const cpi,
4283
                                    const BLOCK_SIZE bsize, const int mi_row,
4284
                                    const int mi_col, int64_t *intra_cost,
4285
0
                                    int64_t *inter_cost, int64_t *mc_dep_cost) {
4286
0
  const AV1_COMMON *const cm = &cpi->common;
4287
0
  GF_GROUP *gf_group = &cpi->ppi->gf_group;
4288
0
  if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE ||
4289
0
      gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) {
4290
0
    return;
4291
0
  }
4292
4293
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
4294
0
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index];
4295
0
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4296
  // If tpl stats is not established, early return
4297
0
  if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) {
4298
0
    return;
4299
0
  }
4300
4301
0
  const int tpl_stride = tpl_frame->stride;
4302
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
4303
0
  const int mi_width =
4304
0
      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
4305
0
  const int mi_height =
4306
0
      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
4307
4308
0
  int64_t sum_intra_cost = 0;
4309
0
  int64_t sum_inter_cost = 0;
4310
0
  int64_t sum_mc_dep_cost = 0;
4311
0
  for (int row = 0; row < mi_height; row += step) {
4312
0
    for (int col = 0; col < mi_width; col += step) {
4313
0
      TplDepStats *this_stats =
4314
0
          &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
4315
0
                                     tpl_data->tpl_stats_block_mis_log2)];
4316
0
      sum_intra_cost += this_stats->intra_cost;
4317
0
      sum_inter_cost += this_stats->inter_cost;
4318
0
      const int64_t mc_dep_delta =
4319
0
          RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
4320
0
                 this_stats->mc_dep_dist);
4321
0
      sum_mc_dep_cost += mc_dep_delta;
4322
0
    }
4323
0
  }
4324
4325
0
  *intra_cost = sum_intra_cost;
4326
0
  *inter_cost = sum_inter_cost;
4327
0
  *mc_dep_cost = sum_mc_dep_cost;
4328
0
}
4329
4330
static bool recursive_partition(AV1_COMP *const cpi, ThreadData *td,
4331
                                TileDataEnc *tile_data, TokenExtra **tp,
4332
                                SIMPLE_MOTION_DATA_TREE *sms_root,
4333
                                PC_TREE *pc_tree, int mi_row, int mi_col,
4334
0
                                const BLOCK_SIZE bsize, RD_STATS *this_rdcost) {
4335
0
  const AV1_COMMON *const cm = &cpi->common;
4336
0
  ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
4337
0
  MACROBLOCK *const x = &td->mb;
4338
0
  MACROBLOCKD *const xd = &x->e_mbd;
4339
0
  if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) {
4340
0
    return false;
4341
0
  }
4342
0
  aom_partition_decision_t partition_decision;
4343
0
  do {
4344
0
    PartitionSearchState part_search_state;
4345
    // Initialization of state variables used in partition search.
4346
    // TODO(chengchen): check if there is hidden conditions that don't allow
4347
    // all possible partition types.
4348
0
    init_partition_search_state_params(x, cpi, &part_search_state, mi_row,
4349
0
                                       mi_col, bsize);
4350
    // Override partition costs at the edges of the frame in the same
4351
    // way as in read_partition (see decodeframe.c).
4352
0
    PartitionBlkParams blk_params = part_search_state.part_blk_params;
4353
0
    if (!av1_blk_has_rows_and_cols(&blk_params))
4354
0
      set_partition_cost_for_edge_blk(cm, &part_search_state);
4355
0
    const int orig_rdmult = x->rdmult;
4356
0
    setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
4357
0
    const int valid_partition_types =
4358
0
        get_valid_partition_types(cpi, &part_search_state, bsize);
4359
0
    const FRAME_UPDATE_TYPE update_type =
4360
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4361
0
    const int qindex = av1_get_qindex(&cm->seg, xd->mi[0]->segment_id,
4362
0
                                      cm->quant_params.base_qindex);
4363
    // RD multiplier
4364
0
    const int rdmult = x->rdmult;
4365
    // pyramid level
4366
0
    const int pyramid_level =
4367
0
        cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index];
4368
0
    x->rdmult = orig_rdmult;
4369
    // Neighbor information
4370
0
    const int has_above = !!xd->above_mbmi;
4371
0
    const int has_left = !!xd->left_mbmi;
4372
0
    const BLOCK_SIZE above_bsize =
4373
0
        has_above ? xd->above_mbmi->bsize : BLOCK_INVALID;
4374
0
    const BLOCK_SIZE left_bsize =
4375
0
        has_left ? xd->left_mbmi->bsize : BLOCK_INVALID;
4376
0
    const int above_block_width =
4377
0
        above_bsize == BLOCK_INVALID ? -1 : block_size_wide[above_bsize];
4378
0
    const int above_block_height =
4379
0
        above_bsize == BLOCK_INVALID ? -1 : block_size_high[above_bsize];
4380
0
    const int left_block_width =
4381
0
        left_bsize == BLOCK_INVALID ? -1 : block_size_wide[left_bsize];
4382
0
    const int left_block_height =
4383
0
        left_bsize == BLOCK_INVALID ? -1 : block_size_high[left_bsize];
4384
    // Prepare simple motion search stats as features
4385
0
    unsigned int block_sse = -1;
4386
0
    unsigned int block_var = -1;
4387
0
    unsigned int sub_block_sse[4] = { -1, -1, -1, -1 };
4388
0
    unsigned int sub_block_var[4] = { -1, -1, -1, -1 };
4389
0
    unsigned int horz_block_sse[2] = { -1, -1 };
4390
0
    unsigned int horz_block_var[2] = { -1, -1 };
4391
0
    unsigned int vert_block_sse[2] = { -1, -1 };
4392
0
    unsigned int vert_block_var[2] = { -1, -1 };
4393
0
    av1_prepare_motion_search_features_block(
4394
0
        cpi, td, tile_data, mi_row, mi_col, bsize, valid_partition_types,
4395
0
        &block_sse, &block_var, sub_block_sse, sub_block_var, horz_block_sse,
4396
0
        horz_block_var, vert_block_sse, vert_block_var);
4397
    // Prepare tpl stats for the current block as features
4398
0
    int64_t tpl_intra_cost = -1;
4399
0
    int64_t tpl_inter_cost = -1;
4400
0
    int64_t tpl_mc_dep_cost = -1;
4401
0
    prepare_tpl_stats_block(cpi, bsize, mi_row, mi_col, &tpl_intra_cost,
4402
0
                            &tpl_inter_cost, &tpl_mc_dep_cost);
4403
4404
0
    aom_partition_features_t features;
4405
0
    features.mi_row = mi_row;
4406
0
    features.mi_col = mi_col;
4407
0
    features.frame_width = cpi->frame_info.frame_width;
4408
0
    features.frame_height = cpi->frame_info.frame_height;
4409
0
    features.block_size = bsize;
4410
0
    features.valid_partition_types = valid_partition_types;
4411
0
    features.update_type = update_type;
4412
0
    features.qindex = qindex;
4413
0
    features.rdmult = rdmult;
4414
0
    features.pyramid_level = pyramid_level;
4415
0
    features.has_above_block = has_above;
4416
0
    features.above_block_width = above_block_width;
4417
0
    features.above_block_height = above_block_height;
4418
0
    features.has_left_block = has_left;
4419
0
    features.left_block_width = left_block_width;
4420
0
    features.left_block_height = left_block_height;
4421
0
    features.block_sse = block_sse;
4422
0
    features.block_var = block_var;
4423
0
    for (int i = 0; i < 4; ++i) {
4424
0
      features.sub_block_sse[i] = sub_block_sse[i];
4425
0
      features.sub_block_var[i] = sub_block_var[i];
4426
0
    }
4427
0
    for (int i = 0; i < 2; ++i) {
4428
0
      features.horz_block_sse[i] = horz_block_sse[i];
4429
0
      features.horz_block_var[i] = horz_block_var[i];
4430
0
      features.vert_block_sse[i] = vert_block_sse[i];
4431
0
      features.vert_block_var[i] = vert_block_var[i];
4432
0
    }
4433
0
    features.tpl_intra_cost = tpl_intra_cost;
4434
0
    features.tpl_inter_cost = tpl_inter_cost;
4435
0
    features.tpl_mc_dep_cost = tpl_mc_dep_cost;
4436
0
    av1_ext_part_send_features(ext_part_controller, &features);
4437
0
    const bool valid_decision = av1_ext_part_get_partition_decision(
4438
0
        ext_part_controller, &partition_decision);
4439
0
    if (!valid_decision) return false;
4440
0
    pc_tree->partitioning = partition_decision.current_decision;
4441
4442
0
    av1_init_rd_stats(this_rdcost);
4443
0
    if (partition_decision.current_decision == PARTITION_SPLIT) {
4444
0
      assert(block_size_wide[bsize] >= 8 && block_size_high[bsize] >= 8);
4445
0
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
4446
0
      RD_STATS split_rdc[SUB_PARTITIONS_SPLIT];
4447
0
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
4448
0
        av1_init_rd_stats(&split_rdc[i]);
4449
0
        if (pc_tree->split[i] == NULL)
4450
0
          pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
4451
0
        pc_tree->split[i]->index = i;
4452
0
      }
4453
0
      const int orig_rdmult_tmp = x->rdmult;
4454
0
      setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
4455
      // TODO(chengchen): check boundary conditions
4456
      // top-left
4457
0
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[0],
4458
0
                          mi_row, mi_col, subsize, &split_rdc[0]);
4459
      // top-right
4460
0
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[1],
4461
0
                          mi_row, mi_col + mi_size_wide[subsize], subsize,
4462
0
                          &split_rdc[1]);
4463
      // bottom-left
4464
0
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[2],
4465
0
                          mi_row + mi_size_high[subsize], mi_col, subsize,
4466
0
                          &split_rdc[2]);
4467
      // bottom_right
4468
0
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[3],
4469
0
                          mi_row + mi_size_high[subsize],
4470
0
                          mi_col + mi_size_wide[subsize], subsize,
4471
0
                          &split_rdc[3]);
4472
0
      this_rdcost->rate += part_search_state.partition_cost[PARTITION_SPLIT];
4473
      // problem is here, the rdmult is different from the rdmult in sub block.
4474
0
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
4475
0
        this_rdcost->rate += split_rdc[i].rate;
4476
0
        this_rdcost->dist += split_rdc[i].dist;
4477
0
        av1_rd_cost_update(x->rdmult, this_rdcost);
4478
0
      }
4479
0
      x->rdmult = orig_rdmult_tmp;
4480
0
    } else {
4481
0
      *this_rdcost = rd_search_for_fixed_partition(
4482
0
          cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, pc_tree);
4483
0
    }
4484
4485
0
    aom_partition_stats_t stats;
4486
0
    update_partition_stats(this_rdcost, &stats);
4487
0
    av1_ext_part_send_partition_stats(ext_part_controller, &stats);
4488
0
    if (!partition_decision.is_final_decision) {
4489
0
      if (partition_decision.current_decision == PARTITION_SPLIT) {
4490
0
        for (int i = 0; i < 4; ++i) {
4491
0
          if (pc_tree->split[i] != NULL) {
4492
0
            av1_free_pc_tree_recursive(pc_tree->split[i], av1_num_planes(cm), 0,
4493
0
                                       0);
4494
0
            pc_tree->split[i] = NULL;
4495
0
          }
4496
0
        }
4497
0
      }
4498
0
    }
4499
0
  } while (!partition_decision.is_final_decision);
4500
4501
0
  return true;
4502
0
}
4503
4504
// The ML model only needs to make decisions for the current block each time.
4505
static bool ml_partition_search_partial(AV1_COMP *const cpi, ThreadData *td,
4506
                                        TileDataEnc *tile_data, TokenExtra **tp,
4507
                                        SIMPLE_MOTION_DATA_TREE *sms_root,
4508
                                        int mi_row, int mi_col,
4509
0
                                        const BLOCK_SIZE bsize) {
4510
0
  AV1_COMMON *const cm = &cpi->common;
4511
0
  MACROBLOCK *const x = &td->mb;
4512
0
  ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
4513
0
  aom_partition_features_t features;
4514
0
  prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
4515
0
                                    &features);
4516
0
  features.mi_row = mi_row;
4517
0
  features.mi_col = mi_col;
4518
0
  features.frame_width = cpi->frame_info.frame_width;
4519
0
  features.frame_height = cpi->frame_info.frame_height;
4520
0
  features.block_size = bsize;
4521
0
  av1_ext_part_send_features(ext_part_controller, &features);
4522
0
  PC_TREE *pc_tree;
4523
0
  pc_tree = av1_alloc_pc_tree_node(bsize);
4524
4525
0
  RD_STATS rdcost;
4526
0
  const bool valid_partition =
4527
0
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree, mi_row,
4528
0
                          mi_col, bsize, &rdcost);
4529
0
  if (!valid_partition) {
4530
0
    return false;
4531
0
  }
4532
4533
  // Encode with the selected mode and partition.
4534
0
  set_cb_offsets(x->cb_offset, 0, 0);
4535
0
  encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
4536
0
            pc_tree, NULL);
4537
4538
0
  av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
4539
4540
0
  return true;
4541
0
}
4542
4543
bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td,
4544
                             TileDataEnc *tile_data, TokenExtra **tp,
4545
                             SIMPLE_MOTION_DATA_TREE *sms_root, int mi_row,
4546
                             int mi_col, const BLOCK_SIZE bsize,
4547
0
                             RD_STATS *best_rd_cost) {
4548
0
  if (cpi->ext_part_controller.ready) {
4549
0
    bool valid_search = true;
4550
0
    const aom_ext_part_decision_mode_t decision_mode =
4551
0
        av1_get_ext_part_decision_mode(&cpi->ext_part_controller);
4552
0
    if (decision_mode == AOM_EXT_PART_WHOLE_TREE) {
4553
0
      valid_search = ml_partition_search_whole_tree(
4554
0
          cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize);
4555
0
    } else if (decision_mode == AOM_EXT_PART_RECURSIVE) {
4556
0
      valid_search = ml_partition_search_partial(
4557
0
          cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize);
4558
0
    } else {
4559
0
      assert(0 && "Unknown decision mode.");
4560
0
      return false;
4561
0
    }
4562
0
    if (!valid_search) {
4563
0
      assert(0 && "Invalid search from ML model, partition search failed.");
4564
0
      exit(0);
4565
0
    }
4566
0
    return true;
4567
0
  }
4568
4569
0
  AV1_COMMON *const cm = &cpi->common;
4570
0
  MACROBLOCK *const x = &td->mb;
4571
0
  int best_idx = 0;
4572
0
  int64_t min_rdcost = INT64_MAX;
4573
0
  int num_configs;
4574
0
  RD_STATS *rdcost = NULL;
4575
0
  int i = 0;
4576
0
  do {
4577
0
    PC_TREE *const pc_tree = av1_alloc_pc_tree_node(bsize);
4578
0
    num_configs = read_partition_tree(cpi, pc_tree, i);
4579
0
    if (i == 0) {
4580
0
      rdcost = aom_calloc(num_configs, sizeof(*rdcost));
4581
0
    }
4582
0
    if (num_configs <= 0) {
4583
0
      av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
4584
0
      if (rdcost != NULL) aom_free(rdcost);
4585
0
      exit(0);
4586
0
      return false;
4587
0
    }
4588
0
    verify_write_partition_tree(cpi, pc_tree, bsize, i, mi_row, mi_col);
4589
    // Encode the block with the given partition tree. Get rdcost and encoding
4590
    // time.
4591
0
    rdcost[i] = rd_search_for_fixed_partition(cpi, td, tile_data, tp, sms_root,
4592
0
                                              mi_row, mi_col, bsize, pc_tree);
4593
4594
0
    if (rdcost[i].rdcost < min_rdcost) {
4595
0
      min_rdcost = rdcost[i].rdcost;
4596
0
      best_idx = i;
4597
0
      *best_rd_cost = rdcost[i];
4598
0
    }
4599
0
    av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
4600
0
    ++i;
4601
0
  } while (i < num_configs);
4602
4603
  // Encode with the partition configuration with the smallest rdcost.
4604
0
  PC_TREE *const pc_tree = av1_alloc_pc_tree_node(bsize);
4605
0
  read_partition_tree(cpi, pc_tree, best_idx);
4606
0
  rd_search_for_fixed_partition(cpi, td, tile_data, tp, sms_root, mi_row,
4607
0
                                mi_col, bsize, pc_tree);
4608
0
  set_cb_offsets(x->cb_offset, 0, 0);
4609
0
  encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
4610
0
            pc_tree, NULL);
4611
4612
0
  av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
4613
0
  aom_free(rdcost);
4614
0
  ++cpi->sb_counter;
4615
4616
0
  return true;
4617
0
}
4618
4619
DECLARE_ALIGNED(16, static const uint8_t, all_zeros[MAX_SB_SIZE]) = { 0 };
4620
DECLARE_ALIGNED(16, static const uint16_t,
4621
                highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
4622
static void log_sub_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
4623
45.6k
                              double *var_min, double *var_max) {
4624
  // This functions returns a the minimum and maximum log variances for 4x4
4625
  // sub blocks in the current block.
4626
4627
45.6k
  MACROBLOCKD *xd = &x->e_mbd;
4628
45.6k
  double var;
4629
45.6k
  unsigned int sse;
4630
45.6k
  int i, j;
4631
4632
45.6k
  int right_overflow =
4633
45.6k
      (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
4634
45.6k
  int bottom_overflow =
4635
45.6k
      (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
4636
4637
45.6k
  const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
4638
45.6k
  const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
4639
4640
  // Initialize minimum variance to a large value and maximum variance to 0.
4641
45.6k
  double min_var_4x4 = (double)INT_MAX;
4642
45.6k
  double max_var_4x4 = 0.0;
4643
4644
367k
  for (i = 0; i < bh; i += 4) {
4645
3.21M
    for (j = 0; j < bw; j += 4) {
4646
2.89M
      if (is_cur_buf_hbd(xd)) {
4647
0
        var = cpi->ppi->fn_ptr[BLOCK_4X4].vf(
4648
0
            x->plane[0].src.buf + i * x->plane[0].src.stride + j,
4649
0
            x->plane[0].src.stride, CONVERT_TO_BYTEPTR(highbd_all_zeros), 0,
4650
0
            &sse);
4651
2.89M
      } else {
4652
2.89M
        var = cpi->ppi->fn_ptr[BLOCK_4X4].vf(
4653
2.89M
            x->plane[0].src.buf + i * x->plane[0].src.stride + j,
4654
2.89M
            x->plane[0].src.stride, all_zeros, 0, &sse);
4655
2.89M
      }
4656
2.89M
      min_var_4x4 = AOMMIN(min_var_4x4, var);
4657
2.89M
      max_var_4x4 = AOMMAX(max_var_4x4, var);
4658
2.89M
    }
4659
321k
  }
4660
45.6k
  *var_min = log(1.0 + min_var_4x4 / 16.0);
4661
45.6k
  *var_max = log(1.0 + max_var_4x4 / 16.0);
4662
45.6k
}
4663
4664
/*!\brief AV1 block partition search (full search).
4665
*
4666
* \ingroup partition_search
4667
* \callgraph
4668
* Searches for the best partition pattern for a block based on the
4669
* rate-distortion cost, and returns a bool value to indicate whether a valid
4670
* partition pattern is found. The partition can recursively go down to the
4671
* smallest block size.
4672
*
4673
* \param[in]    cpi                Top-level encoder structure
4674
* \param[in]    td                 Pointer to thread data
4675
* \param[in]    tile_data          Pointer to struct holding adaptive
4676
data/contexts/models for the tile during
4677
encoding
4678
* \param[in]    tp                 Pointer to the starting token
4679
* \param[in]    mi_row             Row coordinate of the block in a step size
4680
of MI_SIZE
4681
* \param[in]    mi_col             Column coordinate of the block in a step
4682
size of MI_SIZE
4683
* \param[in]    bsize              Current block size
4684
* \param[in]    rd_cost            Pointer to the final rd cost of the block
4685
* \param[in]    best_rdc           Upper bound of rd cost of a valid partition
4686
* \param[in]    pc_tree            Pointer to the PC_TREE node storing the
4687
picked partitions and mode info for the
4688
current block
4689
* \param[in]    sms_tree           Pointer to struct holding simple motion
4690
search data for the current block
4691
* \param[in]    none_rd            Pointer to the rd cost in the case of not
4692
splitting the current block
4693
* \param[in]    multi_pass_mode    SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS
4694
* \param[in]    rect_part_win_info Pointer to struct storing whether horz/vert
4695
partition outperforms previously tested
4696
partitions
4697
*
4698
* \return A bool value is returned indicating if a valid partition is found.
4699
* The pc_tree struct is modified to store the picked partition and modes.
4700
* The rd_cost struct is also updated with the RD stats corresponding to the
4701
* best partition found.
4702
*/
4703
bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
4704
                           TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
4705
                           int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost,
4706
                           RD_STATS best_rdc, PC_TREE *pc_tree,
4707
                           SIMPLE_MOTION_DATA_TREE *sms_tree, int64_t *none_rd,
4708
                           SB_MULTI_PASS_MODE multi_pass_mode,
4709
88.0k
                           RD_RECT_PART_WIN_INFO *rect_part_win_info) {
4710
88.0k
  const AV1_COMMON *const cm = &cpi->common;
4711
88.0k
  const int num_planes = av1_num_planes(cm);
4712
88.0k
  TileInfo *const tile_info = &tile_data->tile_info;
4713
88.0k
  MACROBLOCK *const x = &td->mb;
4714
88.0k
  MACROBLOCKD *const xd = &x->e_mbd;
4715
88.0k
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
4716
88.0k
  const TokenExtra *const tp_orig = *tp;
4717
88.0k
  PartitionSearchState part_search_state;
4718
4719
  // Initialization of state variables used in partition search.
4720
88.0k
  init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col,
4721
88.0k
                                     bsize);
4722
88.0k
  PartitionBlkParams blk_params = part_search_state.part_blk_params;
4723
4724
88.0k
  sms_tree->partitioning = PARTITION_NONE;
4725
88.0k
  if (best_rdc.rdcost < 0) {
4726
0
    av1_invalid_rd_stats(rd_cost);
4727
0
    return part_search_state.found_best_partition;
4728
0
  }
4729
88.0k
  if (bsize == cm->seq_params->sb_size) x->must_find_valid_partition = 0;
4730
4731
  // Override skipping rectangular partition operations for edge blocks.
4732
88.0k
  if (none_rd) *none_rd = 0;
4733
88.0k
  (void)*tp_orig;
4734
4735
#if CONFIG_COLLECT_PARTITION_STATS
4736
  // Stats at the current quad tree
4737
  PartitionTimingStats *part_timing_stats =
4738
      &part_search_state.part_timing_stats;
4739
  // Stats aggregated at frame level
4740
  FramePartitionTimingStats *fr_part_timing_stats = &cpi->partition_stats;
4741
#endif  // CONFIG_COLLECT_PARTITION_STATS
4742
4743
  // Override partition costs at the edges of the frame in the same
4744
  // way as in read_partition (see decodeframe.c).
4745
88.0k
  if (!av1_blk_has_rows_and_cols(&blk_params))
4746
19.4k
    set_partition_cost_for_edge_blk(cm, &part_search_state);
4747
4748
  // Disable rectangular partitions for inner blocks when the current block is
4749
  // forced to only use square partitions.
4750
88.0k
  if (bsize > cpi->sf.part_sf.use_square_partition_only_threshold) {
4751
11.7k
    part_search_state.partition_rect_allowed[HORZ] &= !blk_params.has_rows;
4752
11.7k
    part_search_state.partition_rect_allowed[VERT] &= !blk_params.has_cols;
4753
11.7k
  }
4754
4755
#ifndef NDEBUG
4756
  // Nothing should rely on the default value of this array (which is just
4757
  // leftover from encoding the previous block. Setting it to fixed pattern
4758
  // when debugging.
4759
  // bit 0, 1, 2 are blk_skip of each plane
4760
  // bit 4, 5, 6 are initialization checking of each plane
4761
  memset(x->txfm_search_info.blk_skip, 0x77,
4762
         sizeof(x->txfm_search_info.blk_skip));
4763
#endif  // NDEBUG
4764
4765
88.0k
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
4766
4767
  // Set buffers and offsets.
4768
88.0k
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
4769
4770
  // Save rdmult before it might be changed, so it can be restored later.
4771
88.0k
  const int orig_rdmult = x->rdmult;
4772
88.0k
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
4773
4774
  // Apply simple motion search for the entire super block with fixed block
4775
  // size, e.g., 16x16, to collect features and write to files for the
4776
  // external ML model.
4777
  // TODO(chengchen): reduce motion search. This function is similar to
4778
  // av1_get_max_min_partition_features().
4779
88.0k
  if (COLLECT_MOTION_SEARCH_FEATURE_SB && !frame_is_intra_only(cm) &&
4780
88.0k
      bsize == cm->seq_params->sb_size) {
4781
0
    av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
4782
0
                                          bsize, /*features=*/NULL);
4783
0
    collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, /*features=*/NULL);
4784
0
  }
4785
4786
  // Update rd cost of the bound using the current multiplier.
4787
88.0k
  av1_rd_cost_update(x->rdmult, &best_rdc);
4788
4789
88.0k
  if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
4790
0
    x->mb_energy = av1_log_block_var(cpi, x, bsize);
4791
4792
  // Set the context.
4793
88.0k
  xd->above_txfm_context =
4794
88.0k
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
4795
88.0k
  xd->left_txfm_context =
4796
88.0k
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
4797
88.0k
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4798
4799
#if CONFIG_COLLECT_COMPONENT_TIMING
4800
  start_timing(cpi, av1_prune_partitions_time);
4801
#endif
4802
  // Pruning: before searching any partition type, using source and simple
4803
  // motion search results to prune out unlikely partitions.
4804
88.0k
  av1_prune_partitions_before_search(cpi, x, sms_tree, &part_search_state);
4805
4806
  // Pruning: eliminating partition types leading to coding block sizes outside
4807
  // the min and max bsize limitations set from the encoder.
4808
88.0k
  av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state);
4809
#if CONFIG_COLLECT_COMPONENT_TIMING
4810
  end_timing(cpi, av1_prune_partitions_time);
4811
#endif
4812
4813
  // Partition search
4814
88.0k
BEGIN_PARTITION_SEARCH:
4815
  // If a valid partition is required, usually when the first round cannot find
4816
  // a valid one under the cost limit after pruning, reset the limitations on
4817
  // partition types and intra cnn output.
4818
88.0k
  if (x->must_find_valid_partition) {
4819
0
    reset_part_limitations(cpi, &part_search_state);
4820
    // Invalidate intra cnn output for key frames.
4821
0
    if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
4822
0
      part_search_state.intra_part_info->quad_tree_idx = 0;
4823
0
      part_search_state.intra_part_info->cnn_output_valid = 0;
4824
0
    }
4825
0
  }
4826
  // Partition block source pixel variance.
4827
88.0k
  unsigned int pb_source_variance = UINT_MAX;
4828
4829
#if CONFIG_COLLECT_COMPONENT_TIMING
4830
  start_timing(cpi, none_partition_search_time);
4831
#endif
4832
4833
  // Further pruning or in some cases reverse pruning when allintra is set
4834
  // This code helps visual and in some cases metrics quality where the current
4835
  // block comprises at least one very low variance sub-block and at least one
4836
  // where the variance is much higher.
4837
  //
4838
  // The idea is that in such cases there is danger of ringing and other visual
4839
  // artifacts from a high variance feature such as an edge into a very low
4840
  // variance region.
4841
  //
4842
  // The approach taken is to force break down / split to a smaller block size
4843
  // to try and separate out the low variance and well predicted blocks from the
4844
  // more complex ones and to prevent propagation of ringing over a large
4845
  // region.
4846
88.0k
  if ((cpi->oxcf.mode == ALLINTRA) && (bsize >= BLOCK_16X16)) {
4847
45.6k
    double var_min, var_max;
4848
45.6k
    log_sub_block_var(cpi, x, bsize, &var_min, &var_max);
4849
4850
45.6k
    if ((var_min < 0.272) && ((var_max - var_min) > 3.0)) {
4851
0
      part_search_state.partition_none_allowed = 0;
4852
0
      part_search_state.terminate_partition_search = 0;
4853
0
      part_search_state.do_square_split = 1;
4854
0
    }
4855
45.6k
  }
4856
4857
  // PARTITION_NONE search stage.
4858
88.0k
  int64_t part_none_rd = INT64_MAX;
4859
88.0k
  none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
4860
88.0k
                        &part_search_state, &best_rdc, &pb_source_variance,
4861
88.0k
                        none_rd, &part_none_rd);
4862
4863
#if CONFIG_COLLECT_COMPONENT_TIMING
4864
  end_timing(cpi, none_partition_search_time);
4865
#endif
4866
#if CONFIG_COLLECT_COMPONENT_TIMING
4867
  start_timing(cpi, split_partition_search_time);
4868
#endif
4869
  // PARTITION_SPLIT search stage.
4870
88.0k
  int64_t part_split_rd = INT64_MAX;
4871
88.0k
  split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx,
4872
88.0k
                         &part_search_state, &best_rdc, multi_pass_mode,
4873
88.0k
                         &part_split_rd);
4874
#if CONFIG_COLLECT_COMPONENT_TIMING
4875
  end_timing(cpi, split_partition_search_time);
4876
#endif
4877
  // Terminate partition search for child partition,
4878
  // when NONE and SPLIT partition rd_costs are INT64_MAX.
4879
88.0k
  if (cpi->sf.part_sf.early_term_after_none_split &&
4880
88.0k
      part_none_rd == INT64_MAX && part_split_rd == INT64_MAX &&
4881
88.0k
      !x->must_find_valid_partition && (bsize != cm->seq_params->sb_size)) {
4882
21.4k
    part_search_state.terminate_partition_search = 1;
4883
21.4k
  }
4884
4885
  // Do not evaluate non-square partitions if NONE partition did not choose a
4886
  // newmv mode and is skippable.
4887
88.0k
  if ((cpi->sf.part_sf.skip_non_sq_part_based_on_none >= 2) &&
4888
88.0k
      (pc_tree->none != NULL)) {
4889
0
    if (x->qindex <= 200 && is_inter_mode(pc_tree->none->mic.mode) &&
4890
0
        !have_newmv_in_inter_mode(pc_tree->none->mic.mode) &&
4891
0
        pc_tree->none->skippable && !x->must_find_valid_partition &&
4892
0
        bsize >= BLOCK_16X16)
4893
0
      part_search_state.do_rectangular_split = 0;
4894
0
  }
4895
4896
  // Prune partitions based on PARTITION_NONE and PARTITION_SPLIT.
4897
88.0k
  prune_partitions_after_split(cpi, x, sms_tree, &part_search_state, &best_rdc,
4898
88.0k
                               part_none_rd, part_split_rd);
4899
#if CONFIG_COLLECT_COMPONENT_TIMING
4900
  start_timing(cpi, rectangular_partition_search_time);
4901
#endif
4902
  // Rectangular partitions search stage.
4903
88.0k
  rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
4904
88.0k
                               &part_search_state, &best_rdc,
4905
88.0k
                               rect_part_win_info, HORZ, VERT);
4906
#if CONFIG_COLLECT_COMPONENT_TIMING
4907
  end_timing(cpi, rectangular_partition_search_time);
4908
#endif
4909
4910
88.0k
  if (pb_source_variance == UINT_MAX) {
4911
19.4k
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
4912
19.4k
    if (is_cur_buf_hbd(xd)) {
4913
0
      pb_source_variance = av1_high_get_sby_perpixel_variance(
4914
0
          cpi, &x->plane[0].src, bsize, xd->bd);
4915
19.4k
    } else {
4916
19.4k
      pb_source_variance =
4917
19.4k
          av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
4918
19.4k
    }
4919
19.4k
  }
4920
4921
88.0k
  assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
4922
88.0k
                 !part_search_state.do_rectangular_split));
4923
4924
88.0k
  int ext_partition_allowed =
4925
88.0k
      part_search_state.do_rectangular_split &&
4926
88.0k
      bsize > cpi->sf.part_sf.ext_partition_eval_thresh &&
4927
88.0k
      av1_blk_has_rows_and_cols(&blk_params);
4928
4929
  // Do not evaluate extended partitions if NONE partition is skippable.
4930
88.0k
  if ((cpi->sf.part_sf.skip_non_sq_part_based_on_none >= 1) &&
4931
88.0k
      (pc_tree->none != NULL)) {
4932
0
    if (pc_tree->none->skippable && !x->must_find_valid_partition &&
4933
0
        bsize >= BLOCK_16X16)
4934
0
      ext_partition_allowed = 0;
4935
0
  }
4936
#if CONFIG_COLLECT_COMPONENT_TIMING
4937
  start_timing(cpi, ab_partitions_search_time);
4938
#endif
4939
  // AB partitions search stage.
4940
88.0k
  ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4941
88.0k
                       &part_search_state, &best_rdc, rect_part_win_info,
4942
88.0k
                       pb_source_variance, ext_partition_allowed, HORZ_A,
4943
88.0k
                       VERT_B);
4944
#if CONFIG_COLLECT_COMPONENT_TIMING
4945
  end_timing(cpi, ab_partitions_search_time);
4946
#endif
4947
4948
  // 4-way partitions search stage.
4949
88.0k
  int part4_search_allowed[NUM_PART4_TYPES] = { 1, 1 };
4950
  // Prune 4-way partition search.
4951
88.0k
  prune_4_way_partition_search(cpi, x, pc_tree, &part_search_state, &best_rdc,
4952
88.0k
                               pb_source_variance, ext_partition_allowed,
4953
88.0k
                               part4_search_allowed);
4954
4955
#if CONFIG_COLLECT_COMPONENT_TIMING
4956
  start_timing(cpi, rd_pick_4partition_time);
4957
#endif
4958
  // PARTITION_HORZ_4
4959
88.0k
  assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
4960
88.0k
                 !part4_search_allowed[HORZ4]));
4961
88.0k
  if (!part_search_state.terminate_partition_search &&
4962
88.0k
      part4_search_allowed[HORZ4]) {
4963
0
    const int inc_step[NUM_PART4_TYPES] = { mi_size_high[blk_params.bsize] / 4,
4964
0
                                            0 };
4965
    // Evaluation of Horz4 partition type.
4966
0
    rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4967
0
                       pc_tree->horizontal4, &part_search_state, &best_rdc,
4968
0
                       inc_step, PARTITION_HORZ_4);
4969
0
  }
4970
4971
  // PARTITION_VERT_4
4972
88.0k
  assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
4973
88.0k
                 !part4_search_allowed[VERT4]));
4974
88.0k
  if (!part_search_state.terminate_partition_search &&
4975
88.0k
      part4_search_allowed[VERT4] && blk_params.has_cols) {
4976
38
    const int inc_step[NUM_PART4_TYPES] = { 0, mi_size_wide[blk_params.bsize] /
4977
38
                                                   4 };
4978
    // Evaluation of Vert4 partition type.
4979
38
    rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4980
38
                       pc_tree->vertical4, &part_search_state, &best_rdc,
4981
38
                       inc_step, PARTITION_VERT_4);
4982
38
  }
4983
#if CONFIG_COLLECT_COMPONENT_TIMING
4984
  end_timing(cpi, rd_pick_4partition_time);
4985
#endif
4986
4987
88.0k
  if (bsize == cm->seq_params->sb_size &&
4988
88.0k
      !part_search_state.found_best_partition) {
4989
    // Did not find a valid partition, go back and search again, with less
4990
    // constraint on which partition types to search.
4991
0
    x->must_find_valid_partition = 1;
4992
#if CONFIG_COLLECT_PARTITION_STATS
4993
    fr_part_timing_stats->partition_redo += 1;
4994
#endif  // CONFIG_COLLECT_PARTITION_STATS
4995
0
    goto BEGIN_PARTITION_SEARCH;
4996
0
  }
4997
4998
  // Store the final rd cost
4999
88.0k
  *rd_cost = best_rdc;
5000
5001
  // Also record the best partition in simple motion data tree because it is
5002
  // necessary for the related speed features.
5003
88.0k
  sms_tree->partitioning = pc_tree->partitioning;
5004
5005
#if CONFIG_COLLECT_PARTITION_STATS
5006
  if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
5007
    part_timing_stats->partition_decisions[pc_tree->partitioning] += 1;
5008
  }
5009
5010
  // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
5011
  // prediction block.
5012
  print_partition_timing_stats_with_rdcost(
5013
      part_timing_stats, mi_row, mi_col, bsize,
5014
      cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
5015
      cm->current_frame.frame_number, &best_rdc, "part_timing.csv");
5016
  /*
5017
  print_partition_timing_stats(part_timing_stats, cm->show_frame,
5018
                               frame_is_intra_only(cm), bsize,
5019
                               "part_timing_data.csv");
5020
  */
5021
  // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
5022
  // the whole clip. So we need to pass the information upstream to the encoder.
5023
  accumulate_partition_timing_stats(fr_part_timing_stats, part_timing_stats,
5024
                                    bsize);
5025
#endif  // CONFIG_COLLECT_PARTITION_STATS
5026
5027
  // Reset the PC_TREE deallocation flag.
5028
88.0k
  int pc_tree_dealloc = 0;
5029
5030
#if CONFIG_COLLECT_COMPONENT_TIMING
5031
  start_timing(cpi, encode_sb_time);
5032
#endif
5033
  // If a valid partition is found and reconstruction is required for future
5034
  // sub-blocks in the same group.
5035
88.0k
  if (part_search_state.found_best_partition && pc_tree->index != 3) {
5036
55.2k
    if (bsize == cm->seq_params->sb_size) {
5037
      // Encode the superblock.
5038
11.7k
      const int emit_output = multi_pass_mode != SB_DRY_PASS;
5039
11.7k
      const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL;
5040
5041
      // Write partition tree to file. Not used by default.
5042
11.7k
      if (COLLECT_MOTION_SEARCH_FEATURE_SB) {
5043
0
        write_partition_tree(cpi, pc_tree, bsize, mi_row, mi_col);
5044
0
        ++cpi->sb_counter;
5045
0
      }
5046
5047
11.7k
      set_cb_offsets(x->cb_offset, 0, 0);
5048
11.7k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize,
5049
11.7k
                pc_tree, NULL);
5050
      // Dealloc the whole PC_TREE after a superblock is done.
5051
11.7k
      av1_free_pc_tree_recursive(pc_tree, num_planes, 0, 0);
5052
11.7k
      pc_tree_dealloc = 1;
5053
43.5k
    } else {
5054
      // Encode the smaller blocks in DRY_RUN mode.
5055
43.5k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
5056
43.5k
                pc_tree, NULL);
5057
43.5k
    }
5058
55.2k
  }
5059
#if CONFIG_COLLECT_COMPONENT_TIMING
5060
  end_timing(cpi, encode_sb_time);
5061
#endif
5062
5063
  // If the tree still exists (non-superblock), dealloc most nodes, only keep
5064
  // nodes for the best partition and PARTITION_NONE.
5065
88.0k
  if (pc_tree_dealloc == 0)
5066
76.2k
    av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1);
5067
5068
88.0k
  if (bsize == cm->seq_params->sb_size) {
5069
11.7k
    assert(best_rdc.rate < INT_MAX);
5070
11.7k
    assert(best_rdc.dist < INT64_MAX);
5071
76.2k
  } else {
5072
76.2k
    assert(tp_orig == *tp);
5073
76.2k
  }
5074
5075
  // Restore the rd multiplier.
5076
88.0k
  x->rdmult = orig_rdmult;
5077
88.0k
  return part_search_state.found_best_partition;
5078
88.0k
}
5079
#endif  // !CONFIG_REALTIME_ONLY
5080
5081
#undef COLLECT_MOTION_SEARCH_FEATURE_SB
5082
5083
#if CONFIG_RT_ML_PARTITIONING
5084
#define FEATURES 6
5085
#define LABELS 2
5086
static int ml_predict_var_partitioning(AV1_COMP *cpi, MACROBLOCK *x,
5087
                                       BLOCK_SIZE bsize, int mi_row,
5088
                                       int mi_col) {
5089
  AV1_COMMON *const cm = &cpi->common;
5090
  const NN_CONFIG *nn_config = NULL;
5091
  const float *means = NULL;
5092
  const float *vars = NULL;
5093
  switch (bsize) {
5094
    case BLOCK_64X64:
5095
      nn_config = &av1_var_part_nnconfig_64;
5096
      means = av1_var_part_means_64;
5097
      vars = av1_var_part_vars_64;
5098
      break;
5099
    case BLOCK_32X32:
5100
      nn_config = &av1_var_part_nnconfig_32;
5101
      means = av1_var_part_means_32;
5102
      vars = av1_var_part_vars_32;
5103
      break;
5104
    case BLOCK_16X16:
5105
      nn_config = &av1_var_part_nnconfig_16;
5106
      means = av1_var_part_means_16;
5107
      vars = av1_var_part_vars_16;
5108
      break;
5109
    case BLOCK_8X8:
5110
    default: assert(0 && "Unexpected block size."); return -1;
5111
  }
5112
5113
  if (!nn_config) return -1;
5114
5115
  {
5116
    const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
5117
    float features[FEATURES] = { 0.0f };
5118
    const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
5119
                                      cm->seq_params->bit_depth);
5120
    int feature_idx = 0;
5121
    float score[LABELS];
5122
5123
    features[feature_idx] =
5124
        (logf((float)(dc_q * dc_q) / 256.0f + 1.0f) - means[feature_idx]) /
5125
        sqrtf(vars[feature_idx]);
5126
    feature_idx++;
5127
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1, bsize);
5128
    {
5129
      const int bs = block_size_wide[bsize];
5130
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
5131
      const int sb_offset_row = 4 * (mi_row & 15);
5132
      const int sb_offset_col = 4 * (mi_col & 15);
5133
      const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
5134
      const uint8_t *src = x->plane[0].src.buf;
5135
      const int src_stride = x->plane[0].src.stride;
5136
      const int pred_stride = 64;
5137
      unsigned int sse;
5138
      int i;
5139
      // Variance of whole block.
5140
      const unsigned int var =
5141
          cpi->ppi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
5142
      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
5143
5144
      features[feature_idx] = (logf((float)var + 1.0f) - means[feature_idx]) /
5145
                              sqrtf(vars[feature_idx]);
5146
      feature_idx++;
5147
      for (i = 0; i < 4; ++i) {
5148
        const int x_idx = (i & 1) * bs / 2;
5149
        const int y_idx = (i >> 1) * bs / 2;
5150
        const int src_offset = y_idx * src_stride + x_idx;
5151
        const int pred_offset = y_idx * pred_stride + x_idx;
5152
        // Variance of quarter block.
5153
        const unsigned int sub_var =
5154
            cpi->ppi->fn_ptr[subsize].vf(src + src_offset, src_stride,
5155
                                         pred + pred_offset, pred_stride, &sse);
5156
        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
5157
        features[feature_idx] =
5158
            (var_ratio - means[feature_idx]) / sqrtf(vars[feature_idx]);
5159
        feature_idx++;
5160
      }
5161
    }
5162
    //    for (int i = 0; i<FEATURES; i++)
5163
    //      printf("F_%d, %f; ", i, features[i]);
5164
    assert(feature_idx == FEATURES);
5165
    av1_nn_predict(features, nn_config, 1, score);
5166
    //    printf("Score %f, thr %f ", (float)score[0], thresh);
5167
    if (score[0] > thresh) return PARTITION_SPLIT;
5168
    if (score[0] < -thresh) return PARTITION_NONE;
5169
    return -1;
5170
  }
5171
}
5172
#undef FEATURES
5173
#undef LABELS
5174
5175
// Uncomment for collecting data for ML-based partitioning
5176
// #define _COLLECT_GROUND_TRUTH_
5177
5178
#ifdef _COLLECT_GROUND_TRUTH_
5179
static int store_partition_data(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
5180
                                int mi_row, int mi_col, PARTITION_TYPE part) {
5181
  AV1_COMMON *const cm = &cpi->common;
5182
  char fname[128];
5183
  switch (bsize) {
5184
    case BLOCK_64X64: sprintf(fname, "data_64x64.txt"); break;
5185
    case BLOCK_32X32: sprintf(fname, "data_32x32.txt"); break;
5186
    case BLOCK_16X16: sprintf(fname, "data_16x16.txt"); break;
5187
    case BLOCK_8X8: sprintf(fname, "data_8x8.txt"); break;
5188
    default: assert(0 && "Unexpected block size."); return -1;
5189
  }
5190
5191
  float features[6];  // DC_Q, VAR, VAR_RATIO-0..3
5192
5193
  FILE *f = fopen(fname, "a");
5194
5195
  {
5196
    const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
5197
                                      cm->seq_params->bit_depth);
5198
    int feature_idx = 0;
5199
5200
    features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
5201
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1, bsize);
5202
    {
5203
      const int bs = block_size_wide[bsize];
5204
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
5205
      const int sb_offset_row = 4 * (mi_row & 15);
5206
      const int sb_offset_col = 4 * (mi_col & 15);
5207
      const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
5208
      const uint8_t *src = x->plane[0].src.buf;
5209
      const int src_stride = x->plane[0].src.stride;
5210
      const int pred_stride = 64;
5211
      unsigned int sse;
5212
      int i;
5213
      // Variance of whole block.
5214
      /*
5215
                if (bs == 8)
5216
                {
5217
                  int r, c;
5218
                  printf("%d %d\n", mi_row, mi_col);
5219
                  for (r = 0; r < bs; ++r) {
5220
                    for (c = 0; c < bs; ++c) {
5221
                      printf("%3d ",
5222
                             src[r * src_stride + c] - pred[64 * r + c]);
5223
                    }
5224
                    printf("\n");
5225
                  }
5226
                  printf("\n");
5227
                }
5228
      */
5229
      const unsigned int var =
5230
          cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
5231
      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
5232
5233
      features[feature_idx++] = logf((float)var + 1.0f);
5234
5235
      fprintf(f, "%f,%f,", features[0], features[1]);
5236
      for (i = 0; i < 4; ++i) {
5237
        const int x_idx = (i & 1) * bs / 2;
5238
        const int y_idx = (i >> 1) * bs / 2;
5239
        const int src_offset = y_idx * src_stride + x_idx;
5240
        const int pred_offset = y_idx * pred_stride + x_idx;
5241
        // Variance of quarter block.
5242
        const unsigned int sub_var =
5243
            cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
5244
                                    pred + pred_offset, pred_stride, &sse);
5245
        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
5246
        features[feature_idx++] = var_ratio;
5247
        fprintf(f, "%f,", var_ratio);
5248
      }
5249
5250
      fprintf(f, "%d\n", part == PARTITION_NONE ? 0 : 1);
5251
    }
5252
5253
    fclose(f);
5254
    return -1;
5255
  }
5256
}
5257
#endif
5258
5259
static void duplicate_mode_info_in_sb(AV1_COMMON *cm, MACROBLOCKD *xd,
5260
                                      int mi_row, int mi_col,
5261
                                      BLOCK_SIZE bsize) {
5262
  const int block_width =
5263
      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
5264
  const int block_height =
5265
      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
5266
  const int mi_stride = xd->mi_stride;
5267
  MB_MODE_INFO *const src_mi = xd->mi[0];
5268
  int i, j;
5269
5270
  for (j = 0; j < block_height; ++j)
5271
    for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
5272
}
5273
5274
static INLINE void copy_mbmi_ext_frame_to_mbmi_ext(
5275
    MB_MODE_INFO_EXT *const mbmi_ext,
5276
    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_best, uint8_t ref_frame_type) {
5277
  memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
5278
         sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
5279
  memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
5280
         sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
5281
  mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
5282
  mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
5283
  memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
5284
         sizeof(mbmi_ext->global_mvs));
5285
}
5286
5287
static void fill_mode_info_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
5288
                              int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
5289
  AV1_COMMON *const cm = &cpi->common;
5290
  MACROBLOCKD *xd = &x->e_mbd;
5291
  int hbs = mi_size_wide[bsize] >> 1;
5292
  PARTITION_TYPE partition = pc_tree->partitioning;
5293
  BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
5294
5295
  assert(bsize >= BLOCK_8X8);
5296
5297
  if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols)
5298
    return;
5299
5300
  switch (partition) {
5301
    case PARTITION_NONE:
5302
      set_mode_info_offsets(&cm->mi_params, &cpi->mbmi_ext_info, x, xd, mi_row,
5303
                            mi_col);
5304
      *(xd->mi[0]) = pc_tree->none->mic;
5305
      copy_mbmi_ext_frame_to_mbmi_ext(
5306
          &x->mbmi_ext, &pc_tree->none->mbmi_ext_best, LAST_FRAME);
5307
      duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
5308
      break;
5309
    case PARTITION_SPLIT: {
5310
      fill_mode_info_sb(cpi, x, mi_row, mi_col, subsize, pc_tree->split[0]);
5311
      fill_mode_info_sb(cpi, x, mi_row, mi_col + hbs, subsize,
5312
                        pc_tree->split[1]);
5313
      fill_mode_info_sb(cpi, x, mi_row + hbs, mi_col, subsize,
5314
                        pc_tree->split[2]);
5315
      fill_mode_info_sb(cpi, x, mi_row + hbs, mi_col + hbs, subsize,
5316
                        pc_tree->split[3]);
5317
      break;
5318
    }
5319
    default: break;
5320
  }
5321
}
5322
5323
void av1_nonrd_pick_partition(AV1_COMP *cpi, ThreadData *td,
5324
                              TileDataEnc *tile_data, TokenExtra **tp,
5325
                              int mi_row, int mi_col, BLOCK_SIZE bsize,
5326
                              RD_STATS *rd_cost, int do_recon, int64_t best_rd,
5327
                              PC_TREE *pc_tree) {
5328
  AV1_COMMON *const cm = &cpi->common;
5329
  TileInfo *const tile_info = &tile_data->tile_info;
5330
  MACROBLOCK *const x = &td->mb;
5331
  MACROBLOCKD *const xd = &x->e_mbd;
5332
  const int hbs = mi_size_wide[bsize] >> 1;
5333
  TokenExtra *tp_orig = *tp;
5334
  const ModeCosts *mode_costs = &x->mode_costs;
5335
  RD_STATS this_rdc, best_rdc;
5336
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
5337
  int do_split = bsize > BLOCK_8X8;
5338
  // Override skipping rectangular partition operations for edge blocks
5339
  const int force_horz_split = (mi_row + 2 * hbs > cm->mi_params.mi_rows);
5340
  const int force_vert_split = (mi_col + 2 * hbs > cm->mi_params.mi_cols);
5341
5342
  int partition_none_allowed = !force_horz_split && !force_vert_split;
5343
5344
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);  // Square partition only
5345
  assert(cm->seq_params->sb_size == BLOCK_64X64);      // Small SB so far
5346
5347
  (void)*tp_orig;
5348
5349
  av1_invalid_rd_stats(&best_rdc);
5350
  best_rdc.rdcost = best_rd;
5351
#ifndef _COLLECT_GROUND_TRUTH_
5352
  if (partition_none_allowed && do_split) {
5353
    const int ml_predicted_partition =
5354
        ml_predict_var_partitioning(cpi, x, bsize, mi_row, mi_col);
5355
    if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
5356
    if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
5357
  }
5358
#endif
5359
5360
  xd->above_txfm_context =
5361
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
5362
  xd->left_txfm_context =
5363
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
5364
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
5365
5366
  // PARTITION_NONE
5367
  if (partition_none_allowed) {
5368
    pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
5369
    PICK_MODE_CONTEXT *ctx = pc_tree->none;
5370
5371
// Flip for RDO based pick mode
5372
#if 0
5373
    RD_STATS dummy;
5374
    av1_invalid_rd_stats(&dummy);
5375
    pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
5376
                  PARTITION_NONE, bsize, ctx, dummy);
5377
#else
5378
    pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
5379
                        ctx);
5380
#endif
5381
    if (this_rdc.rate != INT_MAX) {
5382
      const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
5383
5384
      this_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
5385
      this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
5386
      if (this_rdc.rdcost < best_rdc.rdcost) {
5387
        best_rdc = this_rdc;
5388
        if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
5389
      }
5390
    }
5391
  }
5392
5393
  // PARTITION_SPLIT
5394
  if (do_split) {
5395
    RD_STATS sum_rdc;
5396
    const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
5397
5398
    av1_init_rd_stats(&sum_rdc);
5399
5400
    for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
5401
      pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
5402
      pc_tree->split[i]->index = i;
5403
    }
5404
5405
    int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
5406
    sum_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
5407
    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
5408
    for (int i = 0;
5409
         i < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
5410
      const int x_idx = (i & 1) * hbs;
5411
      const int y_idx = (i >> 1) * hbs;
5412
5413
      if (mi_row + y_idx >= cm->mi_params.mi_rows ||
5414
          mi_col + x_idx >= cm->mi_params.mi_cols)
5415
        continue;
5416
      av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
5417
                               mi_col + x_idx, subsize, &this_rdc, i < 3,
5418
                               best_rdc.rdcost - sum_rdc.rdcost,
5419
                               pc_tree->split[i]);
5420
5421
      if (this_rdc.rate == INT_MAX) {
5422
        av1_invalid_rd_stats(&sum_rdc);
5423
      } else {
5424
        sum_rdc.rate += this_rdc.rate;
5425
        sum_rdc.dist += this_rdc.dist;
5426
        sum_rdc.rdcost += this_rdc.rdcost;
5427
      }
5428
    }
5429
    if (sum_rdc.rdcost < best_rdc.rdcost) {
5430
      best_rdc = sum_rdc;
5431
      pc_tree->partitioning = PARTITION_SPLIT;
5432
    }
5433
  }
5434
5435
#ifdef _COLLECT_GROUND_TRUTH_
5436
  store_partition_data(cpi, x, bsize, mi_row, mi_col, pc_tree->partitioning);
5437
#endif
5438
5439
  *rd_cost = best_rdc;
5440
5441
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
5442
5443
  if (best_rdc.rate == INT_MAX) {
5444
    av1_invalid_rd_stats(rd_cost);
5445
    return;
5446
  }
5447
5448
  // update mode info array
5449
  fill_mode_info_sb(cpi, x, mi_row, mi_col, bsize, pc_tree);
5450
5451
  if (do_recon) {
5452
    if (bsize == cm->seq_params->sb_size) {
5453
      // NOTE: To get estimate for rate due to the tokens, use:
5454
      // int rate_coeffs = 0;
5455
      // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
5456
      //           bsize, pc_tree, &rate_coeffs);
5457
      set_cb_offsets(x->cb_offset, 0, 0);
5458
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
5459
                pc_tree, NULL);
5460
    } else {
5461
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
5462
                pc_tree, NULL);
5463
    }
5464
  }
5465
5466
  if (bsize == BLOCK_64X64 && do_recon) {
5467
    assert(best_rdc.rate < INT_MAX);
5468
    assert(best_rdc.dist < INT64_MAX);
5469
  } else {
5470
    assert(tp_orig == *tp);
5471
  }
5472
}
5473
#endif  // CONFIG_RT_ML_PARTITIONING