Coverage Report

Created: 2025-10-10 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/partition_search.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <float.h>
13
14
#include "config/aom_config.h"
15
16
#include "aom_dsp/txfm_common.h"
17
18
#include "av1/common/av1_common_int.h"
19
#include "av1/common/blockd.h"
20
#include "av1/common/enums.h"
21
#include "av1/common/reconintra.h"
22
23
#include "av1/encoder/aq_complexity.h"
24
#include "av1/encoder/aq_variance.h"
25
#include "av1/encoder/context_tree.h"
26
#include "av1/encoder/encoder.h"
27
#include "av1/encoder/encodeframe.h"
28
#include "av1/encoder/encodeframe_utils.h"
29
#include "av1/encoder/encodemv.h"
30
#include "av1/encoder/intra_mode_search_utils.h"
31
#include "av1/encoder/motion_search_facade.h"
32
#include "av1/encoder/nonrd_opt.h"
33
#include "av1/encoder/partition_search.h"
34
#include "av1/encoder/partition_strategy.h"
35
#include "av1/encoder/reconinter_enc.h"
36
#include "av1/encoder/tokenize.h"
37
#include "av1/encoder/var_based_part.h"
38
#include "av1/encoder/av1_ml_partition_models.h"
39
40
#if CONFIG_TUNE_VMAF
41
#include "av1/encoder/tune_vmaf.h"
42
#endif
43
44
17.5M
#define COLLECT_MOTION_SEARCH_FEATURE_SB 0
45
46
#if CONFIG_PARTITION_SEARCH_ORDER
47
void av1_reset_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
48
  part_sf->partition_search_type = SEARCH_PARTITION;
49
  part_sf->less_rectangular_check_level = 0;
50
  part_sf->use_square_partition_only_threshold = BLOCK_128X128;
51
  part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
52
  part_sf->default_max_partition_size = BLOCK_LARGEST;
53
  part_sf->default_min_partition_size = BLOCK_4X4;
54
  part_sf->adjust_var_based_rd_partitioning = 0;
55
  part_sf->max_intra_bsize = BLOCK_LARGEST;
56
  // This setting only takes effect when partition_search_type is set
57
  // to FIXED_PARTITION.
58
  part_sf->fixed_partition_size = BLOCK_16X16;
59
  // Recode loop tolerance %.
60
  part_sf->partition_search_breakout_dist_thr = 0;
61
  part_sf->partition_search_breakout_rate_thr = 0;
62
  part_sf->prune_ext_partition_types_search_level = 0;
63
  part_sf->prune_part4_search = 0;
64
  part_sf->ml_prune_partition = 0;
65
  part_sf->ml_early_term_after_part_split_level = 0;
66
  for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
67
    part_sf->ml_partition_search_breakout_thresh[i] =
68
        -1;  // -1 means not enabled.
69
  }
70
  part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0;
71
  part_sf->simple_motion_search_split = 0;
72
  part_sf->simple_motion_search_prune_rect = 0;
73
  part_sf->simple_motion_search_early_term_none = 0;
74
  part_sf->simple_motion_search_reduce_search_steps = 0;
75
  part_sf->intra_cnn_based_part_prune_level = 0;
76
  part_sf->ext_partition_eval_thresh = BLOCK_8X8;
77
  part_sf->rect_partition_eval_thresh = BLOCK_128X128;
78
  part_sf->ext_part_eval_based_on_cur_best = 0;
79
  part_sf->prune_ext_part_using_split_info = 0;
80
  part_sf->prune_rectangular_split_based_on_qidx = 0;
81
  part_sf->early_term_after_none_split = 0;
82
  part_sf->ml_predict_breakout_level = 0;
83
  part_sf->prune_sub_8x8_partition_level = 0;
84
  part_sf->simple_motion_search_rect_split = 0;
85
  part_sf->reuse_prev_rd_results_for_part_ab = 0;
86
  part_sf->reuse_best_prediction_for_part_ab = 0;
87
  part_sf->use_best_rd_for_pruning = 0;
88
  part_sf->skip_non_sq_part_based_on_none = 0;
89
}
90
91
// Reset speed features that works for the baseline encoding, but
92
// blocks the external partition search.
93
void av1_reset_sf_for_ext_part(AV1_COMP *const cpi) {
94
  cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions = 0;
95
}
96
#endif  // CONFIG_PARTITION_SEARCH_ORDER
97
98
#if !CONFIG_REALTIME_ONLY
99
// If input |features| is NULL, write tpl stats to file for each super block.
100
// Otherwise, store tpl stats to |features|.
101
// The tpl stats is computed in the unit of tpl_bsize_1d (16x16).
102
// When writing to text file:
103
// The first row contains super block position, super block size,
104
// tpl unit length, number of units in the super block.
105
// The second row contains the intra prediction cost for each unit.
106
// The third row contains the inter prediction cost for each unit.
107
// The forth row contains the motion compensated dependency cost for each unit.
108
static void collect_tpl_stats_sb(const AV1_COMP *const cpi,
109
                                 const BLOCK_SIZE bsize, const int mi_row,
110
                                 const int mi_col,
111
0
                                 aom_partition_features_t *features) {
112
0
  const AV1_COMMON *const cm = &cpi->common;
113
0
  GF_GROUP *gf_group = &cpi->ppi->gf_group;
114
0
  if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE ||
115
0
      gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) {
116
0
    return;
117
0
  }
118
0
119
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
120
0
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index];
121
0
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
122
0
  // If tpl stats is not established, early return
123
0
  if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) {
124
0
    if (features != NULL) features->sb_features.tpl_features.available = 0;
125
0
    return;
126
0
  }
127
0
128
0
  const int tpl_stride = tpl_frame->stride;
129
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
130
0
  const int mi_width =
131
0
      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
132
0
  const int mi_height =
133
0
      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
134
0
  const int col_steps = (mi_width / step) + ((mi_width % step) > 0);
135
0
  const int row_steps = (mi_height / step) + ((mi_height % step) > 0);
136
0
  const int num_blocks = col_steps * row_steps;
137
0
138
0
  if (features == NULL) {
139
0
    char filename[256];
140
0
    snprintf(filename, sizeof(filename), "%s/tpl_feature_sb%d",
141
0
             cpi->oxcf.partition_info_path, cpi->sb_counter);
142
0
    FILE *pfile = fopen(filename, "w");
143
0
    fprintf(pfile, "%d,%d,%d,%d,%d\n", mi_row, mi_col, bsize,
144
0
            tpl_data->tpl_bsize_1d, num_blocks);
145
0
    int count = 0;
146
0
    for (int row = 0; row < mi_height; row += step) {
147
0
      for (int col = 0; col < mi_width; col += step) {
148
0
        TplDepStats *this_stats =
149
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
150
0
                                       tpl_data->tpl_stats_block_mis_log2)];
151
0
        fprintf(pfile, "%.0f", (double)this_stats->intra_cost);
152
0
        if (count < num_blocks - 1) fprintf(pfile, ",");
153
0
        ++count;
154
0
      }
155
0
    }
156
0
    fprintf(pfile, "\n");
157
0
    count = 0;
158
0
    for (int row = 0; row < mi_height; row += step) {
159
0
      for (int col = 0; col < mi_width; col += step) {
160
0
        TplDepStats *this_stats =
161
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
162
0
                                       tpl_data->tpl_stats_block_mis_log2)];
163
0
        fprintf(pfile, "%.0f", (double)this_stats->inter_cost);
164
0
        if (count < num_blocks - 1) fprintf(pfile, ",");
165
0
        ++count;
166
0
      }
167
0
    }
168
0
    fprintf(pfile, "\n");
169
0
    count = 0;
170
0
    for (int row = 0; row < mi_height; row += step) {
171
0
      for (int col = 0; col < mi_width; col += step) {
172
0
        TplDepStats *this_stats =
173
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
174
0
                                       tpl_data->tpl_stats_block_mis_log2)];
175
0
        const int64_t mc_dep_delta =
176
0
            RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
177
0
                   this_stats->mc_dep_dist);
178
0
        fprintf(pfile, "%.0f", (double)mc_dep_delta);
179
0
        if (count < num_blocks - 1) fprintf(pfile, ",");
180
0
        ++count;
181
0
      }
182
0
    }
183
0
    fclose(pfile);
184
0
  } else {
185
0
    features->sb_features.tpl_features.available = 1;
186
0
    features->sb_features.tpl_features.tpl_unit_length = tpl_data->tpl_bsize_1d;
187
0
    features->sb_features.tpl_features.num_units = num_blocks;
188
0
    int count = 0;
189
0
    for (int row = 0; row < mi_height; row += step) {
190
0
      for (int col = 0; col < mi_width; col += step) {
191
0
        TplDepStats *this_stats =
192
0
            &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
193
0
                                       tpl_data->tpl_stats_block_mis_log2)];
194
0
        const int64_t mc_dep_delta =
195
0
            RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
196
0
                   this_stats->mc_dep_dist);
197
0
        features->sb_features.tpl_features.intra_cost[count] =
198
0
            this_stats->intra_cost;
199
0
        features->sb_features.tpl_features.inter_cost[count] =
200
0
            this_stats->inter_cost;
201
0
        features->sb_features.tpl_features.mc_dep_cost[count] = mc_dep_delta;
202
0
        ++count;
203
0
      }
204
0
    }
205
0
  }
206
0
}
207
#endif  // !CONFIG_REALTIME_ONLY
208
209
static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
210
                              FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
211
                              int blk_row, int blk_col,
212
535k
                              uint8_t allow_update_cdf) {
213
535k
  MB_MODE_INFO *mbmi = xd->mi[0];
214
535k
  const BLOCK_SIZE bsize = mbmi->bsize;
215
535k
  const int max_blocks_high = max_block_high(xd, bsize, 0);
216
535k
  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
217
535k
  int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
218
535k
                                   xd->left_txfm_context + blk_row, mbmi->bsize,
219
535k
                                   tx_size);
220
535k
  const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
221
535k
  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
222
223
535k
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
224
535k
  assert(tx_size > TX_4X4);
225
226
533k
  if (depth == MAX_VARTX_DEPTH) {
227
    // Don't add to counts in this case
228
14.6k
    mbmi->tx_size = tx_size;
229
14.6k
    txfm_partition_update(xd->above_txfm_context + blk_col,
230
14.6k
                          xd->left_txfm_context + blk_row, tx_size, tx_size);
231
14.6k
    return;
232
14.6k
  }
233
234
518k
  if (tx_size == plane_tx_size) {
235
#if CONFIG_ENTROPY_STATS
236
    ++counts->txfm_partition[ctx][0];
237
#endif
238
462k
    if (allow_update_cdf)
239
462k
      update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
240
462k
    mbmi->tx_size = tx_size;
241
462k
    txfm_partition_update(xd->above_txfm_context + blk_col,
242
462k
                          xd->left_txfm_context + blk_row, tx_size, tx_size);
243
462k
  } else {
244
56.4k
    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
245
56.4k
    const int bsw = tx_size_wide_unit[sub_txs];
246
56.4k
    const int bsh = tx_size_high_unit[sub_txs];
247
248
#if CONFIG_ENTROPY_STATS
249
    ++counts->txfm_partition[ctx][1];
250
#endif
251
56.4k
    if (allow_update_cdf)
252
56.5k
      update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
253
56.4k
    ++x->txfm_search_info.txb_split_count;
254
255
56.4k
    if (sub_txs == TX_4X4) {
256
13.7k
      mbmi->inter_tx_size[txb_size_index] = TX_4X4;
257
13.7k
      mbmi->tx_size = TX_4X4;
258
13.7k
      txfm_partition_update(xd->above_txfm_context + blk_col,
259
13.7k
                            xd->left_txfm_context + blk_row, TX_4X4, tx_size);
260
13.7k
      return;
261
13.7k
    }
262
263
120k
    for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
264
222k
      for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
265
144k
        int offsetr = row;
266
144k
        int offsetc = col;
267
268
144k
        update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
269
144k
                          blk_col + offsetc, allow_update_cdf);
270
144k
      }
271
78.2k
    }
272
42.6k
  }
273
518k
}
274
275
static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
276
                                      BLOCK_SIZE plane_bsize,
277
                                      FRAME_COUNTS *td_counts,
278
391k
                                      uint8_t allow_update_cdf) {
279
391k
  MACROBLOCKD *xd = &x->e_mbd;
280
391k
  const int mi_width = mi_size_wide[plane_bsize];
281
391k
  const int mi_height = mi_size_high[plane_bsize];
282
391k
  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
283
391k
  const int bh = tx_size_high_unit[max_tx_size];
284
391k
  const int bw = tx_size_wide_unit[max_tx_size];
285
286
391k
  xd->above_txfm_context =
287
391k
      cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
288
391k
  xd->left_txfm_context =
289
391k
      xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
290
291
782k
  for (int idy = 0; idy < mi_height; idy += bh) {
292
782k
    for (int idx = 0; idx < mi_width; idx += bw) {
293
391k
      update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
294
391k
                        allow_update_cdf);
295
391k
    }
296
391k
  }
297
391k
}
298
299
static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
300
637k
                             int blk_col) {
301
637k
  MB_MODE_INFO *mbmi = xd->mi[0];
302
637k
  const BLOCK_SIZE bsize = mbmi->bsize;
303
637k
  const int max_blocks_high = max_block_high(xd, bsize, 0);
304
637k
  const int max_blocks_wide = max_block_wide(xd, bsize, 0);
305
637k
  const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
306
637k
  const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
307
308
637k
  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
309
310
637k
  if (tx_size == plane_tx_size) {
311
595k
    mbmi->tx_size = tx_size;
312
595k
    txfm_partition_update(xd->above_txfm_context + blk_col,
313
595k
                          xd->left_txfm_context + blk_row, tx_size, tx_size);
314
315
595k
  } else {
316
41.5k
    if (tx_size == TX_8X8) {
317
36.2k
      mbmi->inter_tx_size[txb_size_index] = TX_4X4;
318
36.2k
      mbmi->tx_size = TX_4X4;
319
36.2k
      txfm_partition_update(xd->above_txfm_context + blk_col,
320
36.2k
                            xd->left_txfm_context + blk_row, TX_4X4, tx_size);
321
36.2k
      return;
322
36.2k
    }
323
5.32k
    const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
324
5.32k
    const int bsw = tx_size_wide_unit[sub_txs];
325
5.32k
    const int bsh = tx_size_high_unit[sub_txs];
326
5.32k
    const int row_end =
327
5.32k
        AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
328
5.32k
    const int col_end =
329
5.32k
        AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
330
16.1k
    for (int row = 0; row < row_end; row += bsh) {
331
10.7k
      const int offsetr = blk_row + row;
332
32.0k
      for (int col = 0; col < col_end; col += bsw) {
333
21.3k
        const int offsetc = blk_col + col;
334
21.3k
        set_txfm_context(xd, sub_txs, offsetr, offsetc);
335
21.3k
      }
336
10.7k
    }
337
5.32k
  }
338
637k
}
339
340
static void tx_partition_set_contexts(const AV1_COMMON *const cm,
341
616k
                                      MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
342
616k
  const int mi_width = mi_size_wide[plane_bsize];
343
616k
  const int mi_height = mi_size_high[plane_bsize];
344
616k
  const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
345
616k
  const int bh = tx_size_high_unit[max_tx_size];
346
616k
  const int bw = tx_size_wide_unit[max_tx_size];
347
348
616k
  xd->above_txfm_context =
349
616k
      cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
350
616k
  xd->left_txfm_context =
351
616k
      xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
352
353
1.23M
  for (int idy = 0; idy < mi_height; idy += bh) {
354
1.23M
    for (int idx = 0; idx < mi_width; idx += bw) {
355
616k
      set_txfm_context(xd, max_tx_size, idy, idx);
356
616k
    }
357
616k
  }
358
616k
}
359
360
static void update_zeromv_cnt(const AV1_COMP *const cpi,
361
                              const MB_MODE_INFO *const mi, int mi_row,
362
0
                              int mi_col, BLOCK_SIZE bsize) {
363
0
  if (mi->ref_frame[0] != LAST_FRAME || !is_inter_block(mi) ||
364
0
      mi->segment_id > CR_SEGMENT_ID_BOOST2) {
365
0
    return;
366
0
  }
367
0
  const AV1_COMMON *const cm = &cpi->common;
368
0
  const MV mv = mi->mv[0].as_mv;
369
0
  const int bw = mi_size_wide[bsize] >> 1;
370
0
  const int bh = mi_size_high[bsize] >> 1;
371
0
  const int xmis = AOMMIN((cm->mi_params.mi_cols - mi_col) >> 1, bw);
372
0
  const int ymis = AOMMIN((cm->mi_params.mi_rows - mi_row) >> 1, bh);
373
0
  const int block_index =
374
0
      (mi_row >> 1) * (cm->mi_params.mi_cols >> 1) + (mi_col >> 1);
375
0
  for (int y = 0; y < ymis; y++) {
376
0
    for (int x = 0; x < xmis; x++) {
377
      // consec_zero_mv is in the scale of 8x8 blocks
378
0
      const int map_offset = block_index + y * (cm->mi_params.mi_cols >> 1) + x;
379
0
      if (abs(mv.row) < 10 && abs(mv.col) < 10) {
380
0
        if (cpi->consec_zero_mv[map_offset] < 255)
381
0
          cpi->consec_zero_mv[map_offset]++;
382
0
      } else {
383
0
        cpi->consec_zero_mv[map_offset] = 0;
384
0
      }
385
0
    }
386
0
  }
387
0
}
388
389
static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
390
                              ThreadData *td, TokenExtra **t, RUN_TYPE dry_run,
391
20.2M
                              BLOCK_SIZE bsize, int *rate) {
392
20.2M
  const AV1_COMMON *const cm = &cpi->common;
393
20.2M
  const int num_planes = av1_num_planes(cm);
394
20.2M
  MACROBLOCK *const x = &td->mb;
395
20.2M
  MACROBLOCKD *const xd = &x->e_mbd;
396
20.2M
  MB_MODE_INFO **mi_4x4 = xd->mi;
397
20.2M
  MB_MODE_INFO *mbmi = mi_4x4[0];
398
20.2M
  const int seg_skip =
399
20.2M
      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
400
20.2M
  const int mis = cm->mi_params.mi_stride;
401
20.2M
  const int mi_width = mi_size_wide[bsize];
402
20.2M
  const int mi_height = mi_size_high[bsize];
403
20.2M
  const int is_inter = is_inter_block(mbmi);
404
405
  // Initialize tx_mode and tx_size_search_method
406
20.2M
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
407
20.2M
  set_tx_size_search_method(
408
20.2M
      cm, &cpi->winner_mode_params, txfm_params,
409
20.2M
      cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1);
410
411
20.2M
  const int mi_row = xd->mi_row;
412
20.2M
  const int mi_col = xd->mi_col;
413
20.2M
  if (!is_inter) {
414
18.8M
    xd->cfl.store_y = store_cfl_required(cm, xd);
415
18.8M
    mbmi->skip_txfm = 1;
416
52.8M
    for (int plane = 0; plane < num_planes; ++plane) {
417
34.0M
      av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run,
418
34.0M
                                   cpi->optimize_seg_arr[mbmi->segment_id]);
419
34.0M
    }
420
421
    // If there is at least one lossless segment, force the skip for intra
422
    // block to be 0, in order to avoid the segment_id to be changed by in
423
    // write_segment_id().
424
18.8M
    if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
425
0
        cpi->enc_seg.has_lossless_segment)
426
0
      mbmi->skip_txfm = 0;
427
428
18.8M
    xd->cfl.store_y = 0;
429
18.8M
    if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
430
0
      for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
431
0
        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
432
0
          if (!dry_run) {
433
0
            av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
434
0
                                   PALETTE_MAP, tile_data->allow_update_cdf,
435
0
                                   td->counts);
436
0
          } else if (dry_run == DRY_RUN_COSTCOEFFS) {
437
0
            *rate +=
438
0
                av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
439
0
          }
440
0
        }
441
0
      }
442
0
    }
443
444
18.8M
    av1_update_intra_mb_txb_context(cpi, td, dry_run, bsize,
445
18.8M
                                    tile_data->allow_update_cdf);
446
18.8M
  } else {
447
1.41M
    int ref;
448
1.41M
    const int is_compound = has_second_ref(mbmi);
449
450
1.41M
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
451
2.80M
    for (ref = 0; ref < 1 + is_compound; ++ref) {
452
1.38M
      const YV12_BUFFER_CONFIG *cfg =
453
1.38M
          get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]);
454
1.38M
      assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
455
1.38M
      av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
456
1.38M
                           xd->block_ref_scale_factors[ref], num_planes);
457
1.38M
    }
458
    // Predicted sample of inter mode (for Luma plane) cannot be reused if
459
    // nonrd_check_partition_split speed feature is enabled, Since in such cases
460
    // the buffer may not contain the predicted sample of best mode.
461
1.41M
    const int start_plane =
462
1.41M
        (x->reuse_inter_pred && (!cpi->sf.rt_sf.nonrd_check_partition_split) &&
463
0
         cm->seq_params->bit_depth == AOM_BITS_8)
464
1.41M
            ? 1
465
1.41M
            : 0;
466
1.41M
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
467
1.41M
                                  start_plane, av1_num_planes(cm) - 1);
468
1.41M
    if (mbmi->motion_mode == OBMC_CAUSAL) {
469
0
      assert(cpi->oxcf.motion_mode_cfg.enable_obmc);
470
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
471
0
    }
472
473
#if CONFIG_MISMATCH_DEBUG
474
    if (dry_run == OUTPUT_ENABLED) {
475
      for (int plane = 0; plane < num_planes; ++plane) {
476
        const struct macroblockd_plane *pd = &xd->plane[plane];
477
        int pixel_c, pixel_r;
478
        mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
479
                        pd->subsampling_x, pd->subsampling_y);
480
        if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
481
                                 pd->subsampling_y))
482
          continue;
483
        mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
484
                                  cm->current_frame.order_hint, plane, pixel_c,
485
                                  pixel_r, pd->width, pd->height,
486
                                  xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
487
      }
488
    }
489
#else
490
1.41M
    (void)num_planes;
491
1.41M
#endif
492
493
1.41M
    av1_encode_sb(cpi, x, bsize, dry_run);
494
1.41M
    av1_tokenize_sb_vartx(cpi, td, dry_run, bsize, rate,
495
1.41M
                          tile_data->allow_update_cdf);
496
1.41M
  }
497
498
20.2M
  if (!dry_run) {
499
6.46M
    if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1;
500
6.46M
    if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
501
5.66M
        !xd->lossless[mbmi->segment_id] && mbmi->bsize > BLOCK_4X4 &&
502
5.15M
        !(is_inter && (mbmi->skip_txfm || seg_skip))) {
503
5.05M
      if (is_inter) {
504
391k
        tx_partition_count_update(cm, x, bsize, td->counts,
505
391k
                                  tile_data->allow_update_cdf);
506
4.66M
      } else {
507
4.66M
        if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
508
715k
          ++x->txfm_search_info.txb_split_count;
509
4.66M
        if (block_signals_txsize(bsize)) {
510
4.66M
          const int tx_size_ctx = get_tx_size_context(xd);
511
4.66M
          const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
512
4.66M
          const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
513
4.66M
          const int max_depths = bsize_to_max_depth(bsize);
514
515
4.66M
          if (tile_data->allow_update_cdf)
516
3.83M
            update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
517
3.83M
                       depth, max_depths + 1);
518
#if CONFIG_ENTROPY_STATS
519
          ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
520
#endif
521
4.66M
        }
522
4.66M
      }
523
5.05M
      assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
524
5.05M
    } else {
525
1.40M
      int i, j;
526
1.40M
      TX_SIZE intra_tx_size;
527
      // The new intra coding scheme requires no change of transform size
528
1.40M
      if (is_inter) {
529
110k
        if (xd->lossless[mbmi->segment_id]) {
530
15.6k
          intra_tx_size = TX_4X4;
531
95.1k
        } else {
532
95.1k
          intra_tx_size =
533
95.1k
              tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type);
534
95.1k
        }
535
1.29M
      } else {
536
1.29M
        intra_tx_size = mbmi->tx_size;
537
1.29M
      }
538
539
1.40M
      const int cols = AOMMIN(cm->mi_params.mi_cols - mi_col, mi_width);
540
1.40M
      const int rows = AOMMIN(cm->mi_params.mi_rows - mi_row, mi_height);
541
3.32M
      for (j = 0; j < rows; j++) {
542
5.20M
        for (i = 0; i < cols; i++) mi_4x4[mis * j + i]->tx_size = intra_tx_size;
543
1.91M
      }
544
545
1.40M
      if (intra_tx_size != max_txsize_rect_lookup[bsize])
546
344k
        ++x->txfm_search_info.txb_split_count;
547
1.40M
    }
548
6.46M
  }
549
550
20.2M
  if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
551
17.0M
      block_signals_txsize(mbmi->bsize) && is_inter &&
552
1.35M
      !(mbmi->skip_txfm || seg_skip) && !xd->lossless[mbmi->segment_id]) {
553
1.00M
    if (dry_run) tx_partition_set_contexts(cm, xd, bsize);
554
19.2M
  } else {
555
19.2M
    TX_SIZE tx_size = mbmi->tx_size;
556
    // The new intra coding scheme requires no change of transform size
557
19.2M
    if (is_inter) {
558
381k
      if (xd->lossless[mbmi->segment_id]) {
559
36.9k
        tx_size = TX_4X4;
560
344k
      } else {
561
344k
        tx_size = tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type);
562
344k
      }
563
18.8M
    } else {
564
18.8M
      tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
565
18.8M
    }
566
19.2M
    mbmi->tx_size = tx_size;
567
19.2M
    set_txfm_ctxs(tx_size, xd->width, xd->height,
568
19.2M
                  (mbmi->skip_txfm || seg_skip) && is_inter_block(mbmi), xd);
569
19.2M
  }
570
571
20.2M
#if !CONFIG_REALTIME_ONLY
572
20.2M
  if (is_inter_block(mbmi) && !xd->is_chroma_ref && is_cfl_allowed(xd)) {
573
0
    cfl_store_block(xd, mbmi->bsize, mbmi->tx_size);
574
0
  }
575
20.2M
#endif
576
20.2M
  if (!dry_run) {
577
6.46M
    if (cpi->oxcf.pass == AOM_RC_ONE_PASS && cpi->svc.temporal_layer_id == 0 &&
578
6.46M
        cpi->sf.rt_sf.use_temporal_noise_estimate &&
579
0
        (!cpi->ppi->use_svc ||
580
0
         (cpi->ppi->use_svc &&
581
0
          !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
582
0
          cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
583
0
      update_zeromv_cnt(cpi, mbmi, mi_row, mi_col, bsize);
584
6.46M
  }
585
20.2M
}
586
587
static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
588
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
589
41.9M
                               AQ_MODE aq_mode, MB_MODE_INFO *mbmi) {
590
41.9M
  x->rdmult = cpi->rd.RDMULT;
591
592
41.9M
  if (aq_mode != NO_AQ) {
593
0
    assert(mbmi != NULL);
594
0
    if (aq_mode == VARIANCE_AQ) {
595
0
      if (cpi->vaq_refresh) {
596
0
        const int energy = bsize <= BLOCK_16X16
597
0
                               ? x->mb_energy
598
0
                               : av1_log_block_var(cpi, x, bsize);
599
0
        mbmi->segment_id = energy;
600
0
      }
601
0
      x->rdmult = set_rdmult(cpi, x, mbmi->segment_id);
602
0
    } else if (aq_mode == COMPLEXITY_AQ) {
603
0
      x->rdmult = set_rdmult(cpi, x, mbmi->segment_id);
604
0
    } else if (aq_mode == CYCLIC_REFRESH_AQ) {
605
      // If segment is boosted, use rdmult for that segment.
606
0
      if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
607
0
        x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
608
0
    }
609
0
  }
610
611
41.9M
#if !CONFIG_REALTIME_ONLY
612
41.9M
  if (cpi->common.delta_q_info.delta_q_present_flag &&
613
147k
      !cpi->sf.rt_sf.use_nonrd_pick_mode) {
614
147k
    x->rdmult = av1_get_cb_rdmult(cpi, x, bsize, mi_row, mi_col);
615
147k
  }
616
41.9M
#endif  // !CONFIG_REALTIME_ONLY
617
618
41.9M
  if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM ||
619
0
      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
620
41.9M
      cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) {
621
41.9M
    av1_set_ssim_rdmult(cpi, &x->errorperbit, bsize, mi_row, mi_col,
622
41.9M
                        &x->rdmult);
623
41.9M
  }
624
#if CONFIG_SALIENCY_MAP
625
  else if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_SALIENCY_MAP) {
626
    av1_set_saliency_map_vmaf_rdmult(cpi, &x->errorperbit,
627
                                     cpi->common.seq_params->sb_size, mi_row,
628
                                     mi_col, &x->rdmult);
629
  }
630
#endif
631
#if CONFIG_TUNE_VMAF
632
  else if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
633
           cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN ||
634
           cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
635
    av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
636
  }
637
#endif
638
#if CONFIG_TUNE_BUTTERAUGLI
639
  else if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
640
    av1_set_butteraugli_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
641
  }
642
#endif
643
41.9M
  if (cpi->oxcf.mode == ALLINTRA) {
644
29.9M
    x->rdmult = (int)(((int64_t)x->rdmult * x->intra_sb_rdmult_modifier) >> 7);
645
29.9M
  }
646
647
  // Check to make sure that the adjustments above have not caused the
648
  // rd multiplier to be truncated to 0.
649
41.9M
  x->rdmult = (x->rdmult > 0) ? x->rdmult : 1;
650
41.9M
}
651
652
void av1_set_offsets_without_segment_id(const AV1_COMP *const cpi,
653
                                        const TileInfo *const tile,
654
                                        MACROBLOCK *const x, int mi_row,
655
40.9M
                                        int mi_col, BLOCK_SIZE bsize) {
656
40.9M
  const AV1_COMMON *const cm = &cpi->common;
657
40.9M
  const int num_planes = av1_num_planes(cm);
658
40.9M
  MACROBLOCKD *const xd = &x->e_mbd;
659
40.9M
  assert(bsize < BLOCK_SIZES_ALL);
660
40.9M
  const int mi_width = mi_size_wide[bsize];
661
40.9M
  const int mi_height = mi_size_high[bsize];
662
663
40.9M
  set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
664
40.9M
                        mi_row, mi_col);
665
666
40.9M
  set_entropy_context(xd, mi_row, mi_col, num_planes);
667
40.9M
  xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col;
668
40.9M
  xd->left_txfm_context =
669
40.9M
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
670
671
  // Set up destination pointers.
672
40.9M
  av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
673
40.9M
                       num_planes);
674
675
  // Set up limit values for MV components.
676
  // Mv beyond the range do not produce new/different prediction block.
677
40.9M
  av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height,
678
40.9M
                    mi_width, cpi->oxcf.border_in_pixels);
679
680
40.9M
  set_plane_n4(xd, mi_width, mi_height, num_planes);
681
682
  // Set up distance of MB to edge of frame in 1/8th pel units.
683
40.9M
  assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
684
40.9M
  set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
685
40.9M
                 cm->mi_params.mi_rows, cm->mi_params.mi_cols);
686
687
  // Set up source buffers.
688
40.9M
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
689
690
  // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
691
40.9M
  xd->tile = *tile;
692
40.9M
}
693
694
void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
695
                     MACROBLOCK *const x, int mi_row, int mi_col,
696
20.8M
                     BLOCK_SIZE bsize) {
697
20.8M
  const AV1_COMMON *const cm = &cpi->common;
698
20.8M
  const struct segmentation *const seg = &cm->seg;
699
20.8M
  MACROBLOCKD *const xd = &x->e_mbd;
700
20.8M
  MB_MODE_INFO *mbmi;
701
702
20.8M
  av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
703
704
  // Setup segment ID.
705
20.8M
  mbmi = xd->mi[0];
706
20.8M
  mbmi->segment_id = 0;
707
20.8M
  if (seg->enabled) {
708
0
    if (seg->enabled && !cpi->vaq_refresh) {
709
0
      const uint8_t *const map =
710
0
          seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
711
0
      mbmi->segment_id =
712
0
          map ? get_segment_id(&cm->mi_params, map, bsize, mi_row, mi_col) : 0;
713
0
    }
714
0
    av1_init_plane_quantizers(cpi, x, mbmi->segment_id, 0);
715
0
  }
716
#ifndef NDEBUG
717
  x->last_set_offsets_loc.mi_row = mi_row;
718
  x->last_set_offsets_loc.mi_col = mi_col;
719
  x->last_set_offsets_loc.bsize = bsize;
720
#endif  // NDEBUG
721
20.8M
}
722
723
/*!\brief Hybrid intra mode search.
724
 *
725
 * \ingroup intra_mode_search
726
 * \callgraph
727
 * \callergraph
728
 * This is top level function for mode search for intra frames in non-RD
729
 * optimized case. Depending on speed feature and block size it calls
730
 * either non-RD or RD optimized intra mode search.
731
 *
732
 * \param[in]    cpi            Top-level encoder structure
733
 * \param[in]    x              Pointer to structure holding all the data for
734
                                the current macroblock
735
 * \param[in]    rd_cost        Struct to keep track of the RD information
736
 * \param[in]    bsize          Current block size
737
 * \param[in]    ctx            Structure to hold snapshot of coding context
738
                                during the mode picking process
739
 *
740
 * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x
741
 * is modified to store information about the best mode computed
742
 * in this function. The rd_cost struct is also updated with the RD stats
743
 * corresponding to the best mode found.
744
 */
745
746
static inline void hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x,
747
                                            RD_STATS *rd_cost, BLOCK_SIZE bsize,
748
2.36M
                                            PICK_MODE_CONTEXT *ctx) {
749
2.36M
  int use_rdopt = 0;
750
2.36M
  const int hybrid_intra_pickmode = cpi->sf.rt_sf.hybrid_intra_pickmode;
751
  // Use rd pick for intra mode search based on block size and variance.
752
2.36M
  if (hybrid_intra_pickmode && bsize < BLOCK_16X16) {
753
1.40M
    unsigned int var_thresh[3] = { 0, 101, 201 };
754
1.40M
    assert(hybrid_intra_pickmode <= 3);
755
1.40M
    if (x->source_variance >= var_thresh[hybrid_intra_pickmode - 1])
756
1.40M
      use_rdopt = 1;
757
1.40M
  }
758
759
2.36M
  if (use_rdopt)
760
1.40M
    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
761
961k
  else
762
961k
    av1_nonrd_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
763
2.36M
}
764
765
// For real time/allintra row-mt enabled multi-threaded encoding with cost
766
// update frequency set to COST_UPD_TILE/COST_UPD_OFF, tile ctxt is not updated
767
// at superblock level. Thus, it is not required for the encoding of top-right
768
// superblock be complete for updating tile ctxt. However, when encoding a block
769
// whose right edge is also the superblock edge, intra and inter mode evaluation
770
// (ref mv list population) require the encoding of the top-right superblock to
771
// be complete. So, here, we delay the waiting of threads until the need for the
772
// data from the top-right superblock region.
773
static inline void wait_for_top_right_sb(AV1EncRowMultiThreadInfo *enc_row_mt,
774
                                         AV1EncRowMultiThreadSync *row_mt_sync,
775
                                         TileInfo *tile_info,
776
                                         BLOCK_SIZE sb_size,
777
                                         int sb_mi_size_log2, BLOCK_SIZE bsize,
778
12.0M
                                         int mi_row, int mi_col) {
779
12.0M
  const int sb_size_in_mi = mi_size_wide[sb_size];
780
12.0M
  const int bw_in_mi = mi_size_wide[bsize];
781
12.0M
  const int blk_row_in_sb = mi_row & (sb_size_in_mi - 1);
782
12.0M
  const int blk_col_in_sb = mi_col & (sb_size_in_mi - 1);
783
12.0M
  const int top_right_block_in_sb =
784
12.0M
      (blk_row_in_sb == 0) && (blk_col_in_sb + bw_in_mi >= sb_size_in_mi);
785
786
  // Don't wait if the block is the not the top-right block in the superblock.
787
12.0M
  if (!top_right_block_in_sb) return;
788
789
  // Wait for the top-right superblock to finish encoding.
790
346k
  const int sb_row_in_tile =
791
346k
      (mi_row - tile_info->mi_row_start) >> sb_mi_size_log2;
792
346k
  const int sb_col_in_tile =
793
346k
      (mi_col - tile_info->mi_col_start) >> sb_mi_size_log2;
794
795
346k
  enc_row_mt->sync_read_ptr(row_mt_sync, sb_row_in_tile, sb_col_in_tile);
796
346k
}
797
798
/*!\brief Interface for AV1 mode search for an individual coding block
799
 *
800
 * \ingroup partition_search
801
 * \callgraph
802
 * \callergraph
803
 * Searches prediction modes, transform, and coefficient coding modes for an
804
 * individual coding block. This function is the top-level interface that
805
 * directs the encoder to the proper mode search function, among these
806
 * implemented for inter/intra + rd/non-rd + non-skip segment/skip segment.
807
 *
808
 * \param[in]    cpi            Top-level encoder structure
809
 * \param[in]    tile_data      Pointer to struct holding adaptive
810
 *                              data/contexts/models for the tile during
811
 *                              encoding
812
 * \param[in]    x              Pointer to structure holding all the data for
813
 *                              the current macroblock
814
 * \param[in]    mi_row         Row coordinate of the block in a step size of
815
 *                              MI_SIZE
816
 * \param[in]    mi_col         Column coordinate of the block in a step size of
817
 *                              MI_SIZE
818
 * \param[in]    rd_cost        Pointer to structure holding rate and distortion
819
 *                              stats for the current block
820
 * \param[in]    partition      Partition mode of the parent block
821
 * \param[in]    bsize          Current block size
822
 * \param[in]    ctx            Pointer to structure holding coding contexts and
823
 *                              chosen modes for the current block
824
 * \param[in]    best_rd        Upper bound of rd cost of a valid partition
825
 *
826
 * \remark Nothing is returned. Instead, the chosen modes and contexts necessary
827
 * for reconstruction are stored in ctx, the rate-distortion stats are stored in
828
 * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be
829
 * signalled by an INT64_MAX rd_cost->rdcost.
830
 */
831
static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
832
                          MACROBLOCK *const x, int mi_row, int mi_col,
833
                          RD_STATS *rd_cost, PARTITION_TYPE partition,
834
                          BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
835
9.25M
                          RD_STATS best_rd) {
836
9.25M
  if (cpi->sf.part_sf.use_best_rd_for_pruning && best_rd.rdcost < 0) {
837
73
    ctx->rd_stats.rdcost = INT64_MAX;
838
73
    ctx->rd_stats.skip_txfm = 0;
839
73
    av1_invalid_rd_stats(rd_cost);
840
73
    return;
841
73
  }
842
843
9.25M
  av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
844
845
9.25M
  if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab &&
846
9.25M
      ctx->rd_mode_is_ready) {
847
0
    assert(ctx->mic.bsize == bsize);
848
0
    assert(ctx->mic.partition == partition);
849
0
    rd_cost->rate = ctx->rd_stats.rate;
850
0
    rd_cost->dist = ctx->rd_stats.dist;
851
0
    rd_cost->rdcost = ctx->rd_stats.rdcost;
852
0
    return;
853
0
  }
854
855
9.25M
  AV1_COMMON *const cm = &cpi->common;
856
9.25M
  const int num_planes = av1_num_planes(cm);
857
9.25M
  MACROBLOCKD *const xd = &x->e_mbd;
858
9.25M
  MB_MODE_INFO *mbmi;
859
9.25M
  struct macroblock_plane *const p = x->plane;
860
9.25M
  struct macroblockd_plane *const pd = xd->plane;
861
9.25M
  const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode;
862
9.25M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
863
864
9.25M
  int i;
865
866
  // This is only needed for real time/allintra row-mt enabled multi-threaded
867
  // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
868
9.25M
  wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
869
9.25M
                        &tile_data->tile_info, cm->seq_params->sb_size,
870
9.25M
                        cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
871
872
#if CONFIG_COLLECT_COMPONENT_TIMING
873
  start_timing(cpi, rd_pick_sb_modes_time);
874
#endif
875
876
9.25M
  mbmi = xd->mi[0];
877
9.25M
  mbmi->bsize = bsize;
878
9.25M
  mbmi->partition = partition;
879
880
#if CONFIG_RD_DEBUG
881
  mbmi->mi_row = mi_row;
882
  mbmi->mi_col = mi_col;
883
#endif
884
885
  // Sets up the tx_type_map buffer in MACROBLOCKD.
886
9.25M
  xd->tx_type_map = txfm_info->tx_type_map_;
887
9.25M
  xd->tx_type_map_stride = mi_size_wide[bsize];
888
889
26.0M
  for (i = 0; i < num_planes; ++i) {
890
16.8M
    p[i].coeff = ctx->coeff[i];
891
16.8M
    p[i].qcoeff = ctx->qcoeff[i];
892
16.8M
    p[i].dqcoeff = ctx->dqcoeff[i];
893
16.8M
    p[i].eobs = ctx->eobs[i];
894
16.8M
    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
895
16.8M
  }
896
897
27.7M
  for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
898
899
9.25M
  ctx->skippable = 0;
900
  // Set to zero to make sure we do not use the previous encoded frame stats
901
9.25M
  mbmi->skip_txfm = 0;
902
  // Reset skip mode flag.
903
9.25M
  mbmi->skip_mode = 0;
904
905
9.25M
  x->source_variance = av1_get_perpixel_variance_facade(
906
9.25M
      cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
907
908
  // Initialize default mode evaluation params
909
9.25M
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
910
911
  // Save rdmult before it might be changed, so it can be restored later.
912
9.25M
  const int orig_rdmult = x->rdmult;
913
9.25M
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
914
  // Set error per bit for current rdmult
915
9.25M
  av1_set_error_per_bit(&x->errorperbit, x->rdmult);
916
9.25M
  av1_rd_cost_update(x->rdmult, &best_rd);
917
918
  // If set best_rd.rdcost to INT64_MAX, the encoder will not use any previous
919
  // rdcost information for the following mode search.
920
  // Disabling the feature could get some coding gain, with encoder slowdown.
921
9.25M
  if (!cpi->sf.part_sf.use_best_rd_for_pruning) {
922
0
    av1_invalid_rd_stats(&best_rd);
923
0
  }
924
925
  // Find best coding mode & reconstruct the MB so it is available
926
  // as a predictor for MBs that follow in the SB
927
9.25M
  if (frame_is_intra_only(cm)) {
928
#if CONFIG_COLLECT_COMPONENT_TIMING
929
    start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
930
#endif
931
8.30M
    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
932
#if CONFIG_COLLECT_COMPONENT_TIMING
933
    end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
934
#endif
935
8.30M
  } else {
936
#if CONFIG_COLLECT_COMPONENT_TIMING
937
    start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
938
#endif
939
945k
    if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
940
0
      av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
941
0
                                         rd_cost, bsize, ctx, best_rd.rdcost);
942
945k
    } else {
943
945k
      av1_rd_pick_inter_mode(cpi, tile_data, x, rd_cost, bsize, ctx,
944
945k
                             best_rd.rdcost);
945
945k
    }
946
#if CONFIG_COLLECT_COMPONENT_TIMING
947
    end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
948
#endif
949
945k
  }
950
951
  // Examine the resulting rate and for AQ mode 2 make a segment choice.
952
9.25M
  if (rd_cost->rate != INT_MAX && aq_mode == COMPLEXITY_AQ &&
953
0
      bsize >= BLOCK_16X16) {
954
0
    av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
955
0
  }
956
957
9.25M
  x->rdmult = orig_rdmult;
958
959
  // TODO(jingning) The rate-distortion optimization flow needs to be
960
  // refactored to provide proper exit/return handle.
961
9.25M
  if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
962
963
9.25M
  ctx->rd_stats.rate = rd_cost->rate;
964
9.25M
  ctx->rd_stats.dist = rd_cost->dist;
965
9.25M
  ctx->rd_stats.rdcost = rd_cost->rdcost;
966
967
#if CONFIG_COLLECT_COMPONENT_TIMING
968
  end_timing(cpi, rd_pick_sb_modes_time);
969
#endif
970
9.25M
}
971
972
5.55M
static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
973
5.55M
  MACROBLOCK *x = &td->mb;
974
5.55M
  MACROBLOCKD *const xd = &x->e_mbd;
975
5.55M
  const MB_MODE_INFO *const mbmi = xd->mi[0];
976
5.55M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
977
5.55M
  const CurrentFrame *const current_frame = &cm->current_frame;
978
5.55M
  const BLOCK_SIZE bsize = mbmi->bsize;
979
5.55M
  FRAME_CONTEXT *fc = xd->tile_ctx;
980
5.55M
  const int seg_ref_active =
981
5.55M
      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
982
983
5.55M
  if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
984
272k
      is_comp_ref_allowed(bsize)) {
985
272k
    const int skip_mode_ctx = av1_get_skip_mode_context(xd);
986
#if CONFIG_ENTROPY_STATS
987
    td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
988
#endif
989
272k
    update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
990
272k
  }
991
992
5.55M
  if (!mbmi->skip_mode && !seg_ref_active) {
993
5.55M
    const int skip_ctx = av1_get_skip_txfm_context(xd);
994
#if CONFIG_ENTROPY_STATS
995
    td->counts->skip_txfm[skip_ctx][mbmi->skip_txfm]++;
996
#endif
997
5.55M
    update_cdf(fc->skip_txfm_cdfs[skip_ctx], mbmi->skip_txfm, 2);
998
5.55M
  }
999
1000
#if CONFIG_ENTROPY_STATS
1001
  // delta quant applies to both intra and inter
1002
  const int super_block_upper_left =
1003
      ((xd->mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
1004
      ((xd->mi_col & (cm->seq_params->mib_size - 1)) == 0);
1005
  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1006
  if (delta_q_info->delta_q_present_flag &&
1007
      (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
1008
      super_block_upper_left) {
1009
    const int dq = (mbmi->current_qindex - xd->current_base_qindex) /
1010
                   delta_q_info->delta_q_res;
1011
    const int absdq = abs(dq);
1012
    for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
1013
      td->counts->delta_q[i][1]++;
1014
    }
1015
    if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
1016
    if (delta_q_info->delta_lf_present_flag) {
1017
      if (delta_q_info->delta_lf_multi) {
1018
        const int frame_lf_count =
1019
            av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1020
        for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1021
          const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
1022
                               delta_q_info->delta_lf_res;
1023
          const int abs_delta_lf = abs(delta_lf);
1024
          for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1025
            td->counts->delta_lf_multi[lf_id][i][1]++;
1026
          }
1027
          if (abs_delta_lf < DELTA_LF_SMALL)
1028
            td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
1029
        }
1030
      } else {
1031
        const int delta_lf =
1032
            (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
1033
            delta_q_info->delta_lf_res;
1034
        const int abs_delta_lf = abs(delta_lf);
1035
        for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1036
          td->counts->delta_lf[i][1]++;
1037
        }
1038
        if (abs_delta_lf < DELTA_LF_SMALL)
1039
          td->counts->delta_lf[abs_delta_lf][0]++;
1040
      }
1041
    }
1042
  }
1043
#endif
1044
1045
5.55M
  if (!is_inter_block(mbmi)) {
1046
5.05M
    av1_sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
1047
5.05M
                        frame_is_intra_only(cm));
1048
5.05M
  }
1049
1050
5.55M
  if (av1_allow_intrabc(cm)) {
1051
0
    const int is_intrabc = is_intrabc_block(mbmi);
1052
0
    update_cdf(fc->intrabc_cdf, is_intrabc, 2);
1053
#if CONFIG_ENTROPY_STATS
1054
    ++td->counts->intrabc[is_intrabc];
1055
#endif  // CONFIG_ENTROPY_STATS
1056
0
    if (is_intrabc) {
1057
0
      const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1058
0
      const int_mv dv_ref = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
1059
0
      av1_update_mv_stats(&mbmi->mv[0].as_mv, &dv_ref.as_mv, &fc->ndvc,
1060
0
                          MV_SUBPEL_NONE);
1061
0
    }
1062
0
  }
1063
1064
5.55M
  if (frame_is_intra_only(cm) || mbmi->skip_mode) return;
1065
1066
1.22M
  FRAME_COUNTS *const counts = td->counts;
1067
1.22M
  const int inter_block = is_inter_block(mbmi);
1068
1069
1.22M
  if (!seg_ref_active) {
1070
#if CONFIG_ENTROPY_STATS
1071
    counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
1072
#endif
1073
1.22M
    update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
1074
1.22M
               inter_block, 2);
1075
    // If the segment reference feature is enabled we have only a single
1076
    // reference frame allowed for the segment so exclude it from
1077
    // the reference frame counts used to work out probabilities.
1078
1.22M
    if (inter_block) {
1079
501k
      const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1080
501k
      const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
1081
501k
      if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1082
478k
        if (is_comp_ref_allowed(bsize)) {
1083
#if CONFIG_ENTROPY_STATS
1084
          counts->comp_inter[av1_get_reference_mode_context(xd)]
1085
                            [has_second_ref(mbmi)]++;
1086
#endif  // CONFIG_ENTROPY_STATS
1087
478k
          update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 2);
1088
478k
        }
1089
478k
      }
1090
1091
501k
      if (has_second_ref(mbmi)) {
1092
3
        const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
1093
3
                                                      ? UNIDIR_COMP_REFERENCE
1094
3
                                                      : BIDIR_COMP_REFERENCE;
1095
3
        update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
1096
3
                   COMP_REFERENCE_TYPES);
1097
#if CONFIG_ENTROPY_STATS
1098
        counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
1099
                             [comp_ref_type]++;
1100
#endif  // CONFIG_ENTROPY_STATS
1101
1102
3
        if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1103
0
          const int bit = (ref0 == BWDREF_FRAME);
1104
0
          update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
1105
#if CONFIG_ENTROPY_STATS
1106
          counts
1107
              ->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0][bit]++;
1108
#endif  // CONFIG_ENTROPY_STATS
1109
0
          if (!bit) {
1110
0
            const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
1111
0
            update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
1112
#if CONFIG_ENTROPY_STATS
1113
            counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
1114
                                [bit1]++;
1115
#endif  // CONFIG_ENTROPY_STATS
1116
0
            if (bit1) {
1117
0
              update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
1118
0
                         ref1 == GOLDEN_FRAME, 2);
1119
#if CONFIG_ENTROPY_STATS
1120
              counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)][2]
1121
                                  [ref1 == GOLDEN_FRAME]++;
1122
#endif  // CONFIG_ENTROPY_STATS
1123
0
            }
1124
0
          }
1125
3
        } else {
1126
3
          const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
1127
3
          update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
1128
#if CONFIG_ENTROPY_STATS
1129
          counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
1130
#endif  // CONFIG_ENTROPY_STATS
1131
3
          if (!bit) {
1132
3
            update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), ref0 == LAST2_FRAME,
1133
3
                       2);
1134
#if CONFIG_ENTROPY_STATS
1135
            counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
1136
                            [ref0 == LAST2_FRAME]++;
1137
#endif  // CONFIG_ENTROPY_STATS
1138
3
          } else {
1139
0
            update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), ref0 == GOLDEN_FRAME,
1140
0
                       2);
1141
#if CONFIG_ENTROPY_STATS
1142
            counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
1143
                            [ref0 == GOLDEN_FRAME]++;
1144
#endif  // CONFIG_ENTROPY_STATS
1145
0
          }
1146
3
          update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), ref1 == ALTREF_FRAME,
1147
3
                     2);
1148
#if CONFIG_ENTROPY_STATS
1149
          counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
1150
                             [ref1 == ALTREF_FRAME]++;
1151
#endif  // CONFIG_ENTROPY_STATS
1152
3
          if (ref1 != ALTREF_FRAME) {
1153
0
            update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
1154
0
                       ref1 == ALTREF2_FRAME, 2);
1155
#if CONFIG_ENTROPY_STATS
1156
            counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
1157
                               [ref1 == ALTREF2_FRAME]++;
1158
#endif  // CONFIG_ENTROPY_STATS
1159
0
          }
1160
3
        }
1161
501k
      } else {
1162
501k
        const int bit = (ref0 >= BWDREF_FRAME);
1163
501k
        update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
1164
#if CONFIG_ENTROPY_STATS
1165
        counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
1166
#endif  // CONFIG_ENTROPY_STATS
1167
501k
        if (bit) {
1168
24.2k
          assert(ref0 <= ALTREF_FRAME);
1169
24.2k
          update_cdf(av1_get_pred_cdf_single_ref_p2(xd), ref0 == ALTREF_FRAME,
1170
24.2k
                     2);
1171
#if CONFIG_ENTROPY_STATS
1172
          counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
1173
                            [ref0 == ALTREF_FRAME]++;
1174
#endif  // CONFIG_ENTROPY_STATS
1175
24.2k
          if (ref0 != ALTREF_FRAME) {
1176
0
            update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
1177
0
                       ref0 == ALTREF2_FRAME, 2);
1178
#if CONFIG_ENTROPY_STATS
1179
            counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
1180
                              [ref0 == ALTREF2_FRAME]++;
1181
#endif  // CONFIG_ENTROPY_STATS
1182
0
          }
1183
477k
        } else {
1184
477k
          const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
1185
477k
          update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
1186
#if CONFIG_ENTROPY_STATS
1187
          counts->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
1188
#endif  // CONFIG_ENTROPY_STATS
1189
477k
          if (!bit1) {
1190
437k
            update_cdf(av1_get_pred_cdf_single_ref_p4(xd), ref0 != LAST_FRAME,
1191
437k
                       2);
1192
#if CONFIG_ENTROPY_STATS
1193
            counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
1194
                              [ref0 != LAST_FRAME]++;
1195
#endif  // CONFIG_ENTROPY_STATS
1196
437k
          } else {
1197
39.6k
            update_cdf(av1_get_pred_cdf_single_ref_p5(xd), ref0 != LAST3_FRAME,
1198
39.6k
                       2);
1199
#if CONFIG_ENTROPY_STATS
1200
            counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
1201
                              [ref0 != LAST3_FRAME]++;
1202
#endif  // CONFIG_ENTROPY_STATS
1203
39.6k
          }
1204
477k
        }
1205
501k
      }
1206
1207
501k
      if (cm->seq_params->enable_interintra_compound &&
1208
0
          is_interintra_allowed(mbmi)) {
1209
0
        const int bsize_group = size_group_lookup[bsize];
1210
0
        if (mbmi->ref_frame[1] == INTRA_FRAME) {
1211
#if CONFIG_ENTROPY_STATS
1212
          counts->interintra[bsize_group][1]++;
1213
#endif
1214
0
          update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
1215
#if CONFIG_ENTROPY_STATS
1216
          counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
1217
#endif
1218
0
          update_cdf(fc->interintra_mode_cdf[bsize_group],
1219
0
                     mbmi->interintra_mode, INTERINTRA_MODES);
1220
0
          if (av1_is_wedge_used(bsize)) {
1221
#if CONFIG_ENTROPY_STATS
1222
            counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
1223
#endif
1224
0
            update_cdf(fc->wedge_interintra_cdf[bsize],
1225
0
                       mbmi->use_wedge_interintra, 2);
1226
0
            if (mbmi->use_wedge_interintra) {
1227
#if CONFIG_ENTROPY_STATS
1228
              counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
1229
#endif
1230
0
              update_cdf(fc->wedge_idx_cdf[bsize], mbmi->interintra_wedge_index,
1231
0
                         16);
1232
0
            }
1233
0
          }
1234
0
        } else {
1235
#if CONFIG_ENTROPY_STATS
1236
          counts->interintra[bsize_group][0]++;
1237
#endif
1238
0
          update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
1239
0
        }
1240
0
      }
1241
1242
501k
      const MOTION_MODE motion_allowed =
1243
501k
          cm->features.switchable_motion_mode
1244
501k
              ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1245
501k
                                    cm->features.allow_warped_motion)
1246
18.4E
              : SIMPLE_TRANSLATION;
1247
501k
      if (mbmi->ref_frame[1] != INTRA_FRAME) {
1248
501k
        if (motion_allowed == WARPED_CAUSAL) {
1249
#if CONFIG_ENTROPY_STATS
1250
          counts->motion_mode[bsize][mbmi->motion_mode]++;
1251
#endif
1252
337k
          update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
1253
337k
                     MOTION_MODES);
1254
337k
        } else if (motion_allowed == OBMC_CAUSAL) {
1255
#if CONFIG_ENTROPY_STATS
1256
          counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1257
#endif
1258
24.2k
          update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, 2);
1259
24.2k
        }
1260
501k
      }
1261
1262
501k
      if (has_second_ref(mbmi)) {
1263
3
        assert(current_frame->reference_mode != SINGLE_REFERENCE &&
1264
3
               is_inter_compound_mode(mbmi->mode) &&
1265
3
               mbmi->motion_mode == SIMPLE_TRANSLATION);
1266
1267
3
        const int masked_compound_used = is_any_masked_compound_used(bsize) &&
1268
3
                                         cm->seq_params->enable_masked_compound;
1269
3
        if (masked_compound_used) {
1270
3
          const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
1271
#if CONFIG_ENTROPY_STATS
1272
          ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
1273
#endif
1274
3
          update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
1275
3
                     mbmi->comp_group_idx, 2);
1276
3
        }
1277
1278
3
        if (mbmi->comp_group_idx == 0) {
1279
3
          const int comp_index_ctx = get_comp_index_context(cm, xd);
1280
#if CONFIG_ENTROPY_STATS
1281
          ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
1282
#endif
1283
3
          update_cdf(fc->compound_index_cdf[comp_index_ctx], mbmi->compound_idx,
1284
3
                     2);
1285
3
        } else {
1286
0
          assert(masked_compound_used);
1287
0
          if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1288
#if CONFIG_ENTROPY_STATS
1289
            ++counts->compound_type[bsize][mbmi->interinter_comp.type -
1290
                                           COMPOUND_WEDGE];
1291
#endif
1292
0
            update_cdf(fc->compound_type_cdf[bsize],
1293
0
                       mbmi->interinter_comp.type - COMPOUND_WEDGE,
1294
0
                       MASKED_COMPOUND_TYPES);
1295
0
          }
1296
0
        }
1297
3
      }
1298
501k
      if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
1299
0
        if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1300
#if CONFIG_ENTROPY_STATS
1301
          counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
1302
#endif
1303
0
          update_cdf(fc->wedge_idx_cdf[bsize],
1304
0
                     mbmi->interinter_comp.wedge_index, 16);
1305
0
        }
1306
0
      }
1307
501k
    }
1308
1.22M
  }
1309
1310
1.22M
  if (inter_block && cm->features.interp_filter == SWITCHABLE &&
1311
501k
      av1_is_interp_needed(xd)) {
1312
411k
    update_filter_type_cdf(xd, mbmi, cm->seq_params->enable_dual_filter);
1313
411k
  }
1314
1.22M
  if (inter_block &&
1315
501k
      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1316
501k
    const PREDICTION_MODE mode = mbmi->mode;
1317
501k
    const int16_t mode_ctx =
1318
501k
        av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1319
501k
    if (has_second_ref(mbmi)) {
1320
#if CONFIG_ENTROPY_STATS
1321
      ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
1322
#endif
1323
3
      update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
1324
3
                 INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
1325
501k
    } else {
1326
501k
      av1_update_inter_mode_stats(fc, counts, mode, mode_ctx);
1327
501k
    }
1328
1329
501k
    const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
1330
501k
    if (new_mv) {
1331
281k
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1332
595k
      for (int idx = 0; idx < 2; ++idx) {
1333
442k
        if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1334
154k
          const uint8_t drl_ctx =
1335
154k
              av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1336
154k
          update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx, 2);
1337
#if CONFIG_ENTROPY_STATS
1338
          ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
1339
#endif
1340
154k
          if (mbmi->ref_mv_idx == idx) break;
1341
154k
        }
1342
442k
      }
1343
281k
    }
1344
1345
501k
    if (have_nearmv_in_inter_mode(mbmi->mode)) {
1346
60.2k
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1347
128k
      for (int idx = 1; idx < 3; ++idx) {
1348
95.6k
        if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1349
36.7k
          const uint8_t drl_ctx =
1350
36.7k
              av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1351
36.7k
          update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx - 1, 2);
1352
#if CONFIG_ENTROPY_STATS
1353
          ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
1354
#endif
1355
36.7k
          if (mbmi->ref_mv_idx == idx - 1) break;
1356
36.7k
        }
1357
95.6k
      }
1358
60.2k
    }
1359
501k
    if (have_newmv_in_inter_mode(mbmi->mode)) {
1360
281k
      const int allow_hp = cm->features.cur_frame_force_integer_mv
1361
281k
                               ? MV_SUBPEL_NONE
1362
281k
                               : cm->features.allow_high_precision_mv;
1363
281k
      if (new_mv) {
1364
562k
        for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
1365
281k
          const int_mv ref_mv = av1_get_ref_mv(x, ref);
1366
281k
          av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1367
281k
                              allow_hp);
1368
281k
        }
1369
18.4E
      } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAR_NEWMV) {
1370
0
        const int ref = 1;
1371
0
        const int_mv ref_mv = av1_get_ref_mv(x, ref);
1372
0
        av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1373
0
                            allow_hp);
1374
18.4E
      } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEW_NEARMV) {
1375
0
        const int ref = 0;
1376
0
        const int_mv ref_mv = av1_get_ref_mv(x, ref);
1377
0
        av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1378
0
                            allow_hp);
1379
0
      }
1380
281k
    }
1381
501k
  }
1382
1.22M
}
1383
1384
/*!\brief Reconstructs an individual coding block
1385
 *
1386
 * \ingroup partition_search
1387
 * Reconstructs an individual coding block by applying the chosen modes stored
1388
 * in ctx, also updates mode counts and entropy models.
1389
 *
1390
 * \param[in]    cpi       Top-level encoder structure
1391
 * \param[in]    tile_data Pointer to struct holding adaptive
1392
 *                         data/contexts/models for the tile during encoding
1393
 * \param[in]    td        Pointer to thread data
1394
 * \param[in]    tp        Pointer to the starting token
1395
 * \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
1396
 * \param[in]    mi_col    Column coordinate of the block in a step size of
1397
 *                         MI_SIZE
1398
 * \param[in]    dry_run   A code indicating whether it is part of the final
1399
 *                         pass for reconstructing the superblock
1400
 * \param[in]    bsize     Current block size
1401
 * \param[in]    partition Partition mode of the parent block
1402
 * \param[in]    ctx       Pointer to structure holding coding contexts and the
1403
 *                         chosen modes for the current block
1404
 * \param[in]    rate      Pointer to the total rate for the current block
1405
 *
1406
 * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters)
1407
 * will be updated in the pixel buffers in td->mb.e_mbd. Also, the chosen modes
1408
 * will be stored in the MB_MODE_INFO buffer td->mb.e_mbd.mi[0].
1409
 */
1410
static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
1411
                     ThreadData *td, TokenExtra **tp, int mi_row, int mi_col,
1412
                     RUN_TYPE dry_run, BLOCK_SIZE bsize,
1413
                     PARTITION_TYPE partition, PICK_MODE_CONTEXT *const ctx,
1414
17.1M
                     int *rate) {
1415
17.1M
  const AV1_COMMON *const cm = &cpi->common;
1416
17.1M
  TileInfo *const tile = &tile_data->tile_info;
1417
17.1M
  MACROBLOCK *const x = &td->mb;
1418
17.1M
  MACROBLOCKD *xd = &x->e_mbd;
1419
17.1M
  const int subsampling_x = cm->seq_params->subsampling_x;
1420
17.1M
  const int subsampling_y = cm->seq_params->subsampling_y;
1421
1422
17.1M
  av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
1423
17.1M
  const int origin_mult = x->rdmult;
1424
17.1M
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1425
17.1M
  MB_MODE_INFO *mbmi = xd->mi[0];
1426
17.1M
  mbmi->partition = partition;
1427
17.1M
  av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
1428
1429
17.1M
  if (!dry_run) {
1430
3.76M
    set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y],
1431
3.76M
                   x->cb_offset[PLANE_TYPE_UV]);
1432
3.76M
    assert(x->cb_offset[PLANE_TYPE_Y] <
1433
3.76M
           (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]));
1434
3.76M
    assert(x->cb_offset[PLANE_TYPE_UV] <
1435
3.76M
           ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >>
1436
3.76M
            (subsampling_x + subsampling_y)));
1437
3.76M
  }
1438
1439
17.1M
  encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
1440
1441
17.1M
  if (!dry_run) {
1442
3.76M
    update_cb_offsets(x, bsize, subsampling_x, subsampling_y);
1443
3.76M
    if (bsize == cpi->common.seq_params->sb_size && mbmi->skip_txfm == 1 &&
1444
59
        cm->delta_q_info.delta_lf_present_flag) {
1445
0
      const int frame_lf_count =
1446
0
          av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1447
0
      for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
1448
0
        mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
1449
0
      mbmi->delta_lf_from_base = xd->delta_lf_from_base;
1450
0
    }
1451
3.76M
    if (has_second_ref(mbmi)) {
1452
0
      if (mbmi->compound_idx == 0 ||
1453
0
          mbmi->interinter_comp.type == COMPOUND_AVERAGE)
1454
0
        mbmi->comp_group_idx = 0;
1455
0
      else
1456
0
        mbmi->comp_group_idx = 1;
1457
0
    }
1458
1459
    // delta quant applies to both intra and inter
1460
3.76M
    const int super_block_upper_left =
1461
3.76M
        ((mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
1462
853k
        ((mi_col & (cm->seq_params->mib_size - 1)) == 0);
1463
3.76M
    const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1464
3.76M
    if (delta_q_info->delta_q_present_flag &&
1465
12.4k
        (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
1466
12.4k
        super_block_upper_left) {
1467
2.28k
      xd->current_base_qindex = mbmi->current_qindex;
1468
2.28k
      if (delta_q_info->delta_lf_present_flag) {
1469
0
        if (delta_q_info->delta_lf_multi) {
1470
0
          const int frame_lf_count =
1471
0
              av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1472
0
          for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1473
0
            xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
1474
0
          }
1475
0
        } else {
1476
0
          xd->delta_lf_from_base = mbmi->delta_lf_from_base;
1477
0
        }
1478
0
      }
1479
2.28k
    }
1480
1481
3.76M
    RD_COUNTS *rdc = &td->rd_counts;
1482
3.76M
    if (mbmi->skip_mode) {
1483
0
      assert(!frame_is_intra_only(cm));
1484
0
      rdc->skip_mode_used_flag = 1;
1485
0
      if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1486
0
        assert(has_second_ref(mbmi));
1487
0
        rdc->compound_ref_used_flag = 1;
1488
0
      }
1489
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1490
3.76M
    } else {
1491
3.76M
      const int seg_ref_active =
1492
3.76M
          segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1493
3.76M
      if (!seg_ref_active) {
1494
        // If the segment reference feature is enabled we have only a single
1495
        // reference frame allowed for the segment so exclude it from
1496
        // the reference frame counts used to work out probabilities.
1497
3.76M
        if (is_inter_block(mbmi)) {
1498
293k
          av1_collect_neighbors_ref_counts(xd);
1499
293k
          if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1500
293k
            if (has_second_ref(mbmi)) {
1501
              // This flag is also updated for 4x4 blocks
1502
0
              rdc->compound_ref_used_flag = 1;
1503
0
            }
1504
293k
          }
1505
293k
          set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1506
293k
        }
1507
3.76M
      }
1508
3.76M
    }
1509
1510
3.76M
    if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
1511
1512
    // Gather obmc and warped motion count to update the probability.
1513
3.76M
    if ((cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
1514
1.27M
         cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) ||
1515
3.76M
        (cm->features.allow_warped_motion &&
1516
883k
         cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) {
1517
0
      const int inter_block = is_inter_block(mbmi);
1518
0
      const int seg_ref_active =
1519
0
          segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1520
0
      if (!seg_ref_active && inter_block) {
1521
0
        const MOTION_MODE motion_allowed =
1522
0
            cm->features.switchable_motion_mode
1523
0
                ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1524
0
                                      cm->features.allow_warped_motion)
1525
0
                : SIMPLE_TRANSLATION;
1526
1527
0
        if (mbmi->ref_frame[1] != INTRA_FRAME) {
1528
0
          if (motion_allowed >= OBMC_CAUSAL) {
1529
0
            td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1530
0
          }
1531
0
          if (motion_allowed == WARPED_CAUSAL) {
1532
0
            td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++;
1533
0
          }
1534
0
        }
1535
0
      }
1536
0
    }
1537
3.76M
  }
1538
  // TODO(Ravi/Remya): Move this copy function to a better logical place
1539
  // This function will copy the best mode information from block
1540
  // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
1541
  // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
1542
  // bitstream preparation.
1543
17.1M
  av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext,
1544
17.1M
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1545
17.1M
  x->rdmult = origin_mult;
1546
17.1M
}
1547
1548
/*!\brief Reconstructs a partition (may contain multiple coding blocks)
1549
 *
1550
 * \ingroup partition_search
1551
 * Reconstructs a sub-partition of the superblock by applying the chosen modes
1552
 * and partition trees stored in pc_tree.
1553
 *
1554
 * \param[in]    cpi       Top-level encoder structure
1555
 * \param[in]    td        Pointer to thread data
1556
 * \param[in]    tile_data Pointer to struct holding adaptive
1557
 *                         data/contexts/models for the tile during encoding
1558
 * \param[in]    tp        Pointer to the starting token
1559
 * \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
1560
 * \param[in]    mi_col    Column coordinate of the block in a step size of
1561
 *                         MI_SIZE
1562
 * \param[in]    dry_run   A code indicating whether it is part of the final
1563
 *                         pass for reconstructing the superblock
1564
 * \param[in]    bsize     Current block size
1565
 * \param[in]    pc_tree   Pointer to the PC_TREE node storing the picked
1566
 *                         partitions and mode info for the current block
1567
 * \param[in]    rate      Pointer to the total rate for the current block
1568
 *
1569
 * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters)
1570
 * will be updated in the pixel buffers in td->mb.e_mbd.
1571
 */
1572
static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
1573
                      TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
1574
                      int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
1575
24.7M
                      PC_TREE *pc_tree, int *rate) {
1576
24.7M
  assert(bsize < BLOCK_SIZES_ALL);
1577
24.7M
  const AV1_COMMON *const cm = &cpi->common;
1578
24.7M
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
1579
24.7M
  MACROBLOCK *const x = &td->mb;
1580
24.7M
  MACROBLOCKD *const xd = &x->e_mbd;
1581
24.7M
  assert(bsize < BLOCK_SIZES_ALL);
1582
24.7M
  const int hbs = mi_size_wide[bsize] / 2;
1583
24.7M
  const int is_partition_root = bsize >= BLOCK_8X8;
1584
24.7M
  const int ctx = is_partition_root
1585
24.7M
                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
1586
24.7M
                      : -1;
1587
24.7M
  const PARTITION_TYPE partition = pc_tree->partitioning;
1588
24.7M
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1589
24.7M
#if !CONFIG_REALTIME_ONLY
1590
24.7M
  int quarter_step = mi_size_wide[bsize] / 4;
1591
24.7M
  int i;
1592
24.7M
  BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
1593
24.7M
#endif
1594
1595
24.7M
  if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1596
21.2M
  if (subsize == BLOCK_INVALID) return;
1597
1598
21.2M
  if (!dry_run && ctx >= 0) {
1599
4.45M
    const int has_rows = (mi_row + hbs) < mi_params->mi_rows;
1600
4.45M
    const int has_cols = (mi_col + hbs) < mi_params->mi_cols;
1601
1602
4.45M
    if (has_rows && has_cols) {
1603
#if CONFIG_ENTROPY_STATS
1604
      td->counts->partition[ctx][partition]++;
1605
#endif
1606
1607
3.60M
      if (tile_data->allow_update_cdf) {
1608
3.60M
        FRAME_CONTEXT *fc = xd->tile_ctx;
1609
3.60M
        update_cdf(fc->partition_cdf[ctx], partition,
1610
3.60M
                   partition_cdf_length(bsize));
1611
3.60M
      }
1612
3.60M
    }
1613
4.45M
  }
1614
1615
21.2M
  switch (partition) {
1616
16.5M
    case PARTITION_NONE:
1617
16.5M
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1618
16.5M
               partition, pc_tree->none, rate);
1619
16.5M
      break;
1620
207k
    case PARTITION_VERT:
1621
207k
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1622
207k
               partition, pc_tree->vertical[0], rate);
1623
207k
      if (mi_col + hbs < mi_params->mi_cols) {
1624
48.8k
        encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1625
48.8k
                 partition, pc_tree->vertical[1], rate);
1626
48.8k
      }
1627
207k
      break;
1628
248k
    case PARTITION_HORZ:
1629
248k
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1630
248k
               partition, pc_tree->horizontal[0], rate);
1631
248k
      if (mi_row + hbs < mi_params->mi_rows) {
1632
73.2k
        encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1633
73.2k
                 partition, pc_tree->horizontal[1], rate);
1634
73.2k
      }
1635
248k
      break;
1636
4.22M
    case PARTITION_SPLIT:
1637
4.22M
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
1638
4.22M
                pc_tree->split[0], rate);
1639
4.22M
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
1640
4.22M
                pc_tree->split[1], rate);
1641
4.22M
      encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
1642
4.22M
                pc_tree->split[2], rate);
1643
4.22M
      encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
1644
4.22M
                subsize, pc_tree->split[3], rate);
1645
4.22M
      break;
1646
1647
0
#if !CONFIG_REALTIME_ONLY
1648
0
    case PARTITION_HORZ_A:
1649
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1650
0
               partition, pc_tree->horizontala[0], rate);
1651
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1652
0
               partition, pc_tree->horizontala[1], rate);
1653
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1654
0
               partition, pc_tree->horizontala[2], rate);
1655
0
      break;
1656
0
    case PARTITION_HORZ_B:
1657
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1658
0
               partition, pc_tree->horizontalb[0], rate);
1659
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1660
0
               partition, pc_tree->horizontalb[1], rate);
1661
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1662
0
               bsize2, partition, pc_tree->horizontalb[2], rate);
1663
0
      break;
1664
0
    case PARTITION_VERT_A:
1665
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1666
0
               partition, pc_tree->verticala[0], rate);
1667
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1668
0
               partition, pc_tree->verticala[1], rate);
1669
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1670
0
               partition, pc_tree->verticala[2], rate);
1671
1672
0
      break;
1673
0
    case PARTITION_VERT_B:
1674
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1675
0
               partition, pc_tree->verticalb[0], rate);
1676
0
      encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1677
0
               partition, pc_tree->verticalb[1], rate);
1678
0
      encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1679
0
               bsize2, partition, pc_tree->verticalb[2], rate);
1680
0
      break;
1681
0
    case PARTITION_HORZ_4:
1682
0
      for (i = 0; i < SUB_PARTITIONS_PART4; ++i) {
1683
0
        int this_mi_row = mi_row + i * quarter_step;
1684
0
        if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
1685
1686
0
        encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
1687
0
                 partition, pc_tree->horizontal4[i], rate);
1688
0
      }
1689
0
      break;
1690
0
    case PARTITION_VERT_4:
1691
0
      for (i = 0; i < SUB_PARTITIONS_PART4; ++i) {
1692
0
        int this_mi_col = mi_col + i * quarter_step;
1693
0
        if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
1694
0
        encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
1695
0
                 partition, pc_tree->vertical4[i], rate);
1696
0
      }
1697
0
      break;
1698
0
#endif
1699
0
    default: assert(0 && "Invalid partition type."); break;
1700
21.2M
  }
1701
1702
21.2M
  update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
1703
21.2M
}
1704
1705
static inline int is_adjust_var_based_part_enabled(
1706
    AV1_COMMON *const cm, const PARTITION_SPEED_FEATURES *const part_sf,
1707
347k
    BLOCK_SIZE bsize) {
1708
347k
  if (part_sf->partition_search_type != VAR_BASED_PARTITION) return 0;
1709
347k
  if (part_sf->adjust_var_based_rd_partitioning == 0 ||
1710
0
      part_sf->adjust_var_based_rd_partitioning > 2)
1711
347k
    return 0;
1712
1713
5
  if (bsize <= BLOCK_32X32) return 1;
1714
5
  if (part_sf->adjust_var_based_rd_partitioning == 2) {
1715
0
    const int is_larger_qindex = cm->quant_params.base_qindex > 190;
1716
0
    const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
1717
0
    return is_360p_or_larger && is_larger_qindex && bsize == BLOCK_64X64;
1718
0
  }
1719
5
  return 0;
1720
5
}
1721
1722
/*!\brief AV1 block partition search (partition estimation and partial search).
1723
*
1724
* \ingroup partition_search
1725
* Encode the block by applying pre-calculated partition patterns that are
1726
* represented by coding block sizes stored in the mbmi array. Minor partition
1727
* adjustments are tested and applied if they lead to lower rd costs. The
1728
* partition types are limited to a basic set: none, horz, vert, and split.
1729
*
1730
* \param[in]    cpi       Top-level encoder structure
1731
* \param[in]    td        Pointer to thread data
1732
* \param[in]    tile_data Pointer to struct holding adaptive
1733
data/contexts/models for the tile during encoding
1734
* \param[in]    mib       Array representing MB_MODE_INFO pointers for mi
1735
blocks starting from the first pixel of the current
1736
block
1737
* \param[in]    tp        Pointer to the starting token
1738
* \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
1739
* \param[in]    mi_col    Column coordinate of the block in a step size of
1740
MI_SIZE
1741
* \param[in]    bsize     Current block size
1742
* \param[in]    rate      Pointer to the final rate for encoding the current
1743
block
1744
* \param[in]    dist      Pointer to the final distortion of the current block
1745
* \param[in]    do_recon  Whether the reconstruction function needs to be run,
1746
either for finalizing a superblock or providing
1747
reference for future sub-partitions
1748
* \param[in]    pc_tree   Pointer to the PC_TREE node holding the picked
1749
partitions and mode info for the current block
1750
*
1751
* \remark Nothing is returned. The pc_tree struct is modified to store the
1752
* picked partition and modes. The rate and dist are also updated with those
1753
* corresponding to the best partition found.
1754
*/
1755
void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
1756
                          MB_MODE_INFO **mib, TokenExtra **tp, int mi_row,
1757
                          int mi_col, BLOCK_SIZE bsize, int *rate,
1758
1.22M
                          int64_t *dist, int do_recon, PC_TREE *pc_tree) {
1759
1.22M
  AV1_COMMON *const cm = &cpi->common;
1760
1.22M
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
1761
1.22M
  const int num_planes = av1_num_planes(cm);
1762
1.22M
  TileInfo *const tile_info = &tile_data->tile_info;
1763
1.22M
  MACROBLOCK *const x = &td->mb;
1764
1.22M
  MACROBLOCKD *const xd = &x->e_mbd;
1765
1.22M
  const ModeCosts *mode_costs = &x->mode_costs;
1766
1.22M
  const int bs = mi_size_wide[bsize];
1767
1.22M
  const int hbs = bs / 2;
1768
1.22M
  const int pl = (bsize >= BLOCK_8X8)
1769
1.22M
                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
1770
1.22M
                     : 0;
1771
1.22M
  const PARTITION_TYPE partition =
1772
1.22M
      (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1773
1.22M
                           : PARTITION_NONE;
1774
1.22M
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1775
1.22M
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1776
1.22M
  RD_STATS last_part_rdc, none_rdc, chosen_rdc, invalid_rdc;
1777
1.22M
  BLOCK_SIZE bs_type = mib[0]->bsize;
1778
1.22M
  int use_partition_none = 0;
1779
1.22M
  x->try_merge_partition = 0;
1780
1781
1.22M
  if (pc_tree->none == NULL) {
1782
1.22M
    pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
1783
1.22M
    if (!pc_tree->none)
1784
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1785
0
                         "Failed to allocate PICK_MODE_CONTEXT");
1786
1.22M
  }
1787
1.22M
  PICK_MODE_CONTEXT *ctx_none = pc_tree->none;
1788
1789
1.22M
  if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1790
1791
1.22M
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1792
  // In rt mode, currently the min partition size is BLOCK_8X8.
1793
1.22M
  assert(bsize >= cpi->sf.part_sf.default_min_partition_size);
1794
1795
1.22M
  av1_invalid_rd_stats(&last_part_rdc);
1796
1.22M
  av1_invalid_rd_stats(&none_rdc);
1797
1.22M
  av1_invalid_rd_stats(&chosen_rdc);
1798
1.22M
  av1_invalid_rd_stats(&invalid_rdc);
1799
1800
1.22M
  pc_tree->partitioning = partition;
1801
1802
1.22M
  xd->above_txfm_context =
1803
1.22M
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
1804
1.22M
  xd->left_txfm_context =
1805
1.22M
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1806
1.22M
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1807
1808
1.22M
  if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1809
0
    av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1810
0
    x->mb_energy = av1_log_block_var(cpi, x, bsize);
1811
0
  }
1812
1813
  // Save rdmult before it might be changed, so it can be restored later.
1814
1.22M
  const int orig_rdmult = x->rdmult;
1815
1.22M
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1816
1817
1.22M
  if (partition != PARTITION_NONE &&
1818
347k
      is_adjust_var_based_part_enabled(cm, &cpi->sf.part_sf, bsize) &&
1819
0
      (mi_row + hbs < mi_params->mi_rows &&
1820
0
       mi_col + hbs < mi_params->mi_cols)) {
1821
0
    assert(bsize > cpi->sf.part_sf.default_min_partition_size);
1822
0
    mib[0]->bsize = bsize;
1823
0
    pc_tree->partitioning = PARTITION_NONE;
1824
0
    x->try_merge_partition = 1;
1825
0
    pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, PARTITION_NONE,
1826
0
                  bsize, ctx_none, invalid_rdc);
1827
1828
0
    if (none_rdc.rate < INT_MAX) {
1829
0
      none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
1830
0
      none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
1831
0
    }
1832
1833
    // Try to skip split partition evaluation based on none partition
1834
    // characteristics.
1835
0
    if (none_rdc.rate < INT_MAX && none_rdc.skip_txfm == 1) {
1836
0
      use_partition_none = 1;
1837
0
    }
1838
1839
0
    av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1840
0
    mib[0]->bsize = bs_type;
1841
0
    pc_tree->partitioning = partition;
1842
0
  }
1843
1844
6.12M
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
1845
4.90M
    pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
1846
4.90M
    if (!pc_tree->split[i])
1847
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1848
0
                         "Failed to allocate PC_TREE");
1849
4.90M
    pc_tree->split[i]->index = i;
1850
4.90M
  }
1851
1.22M
  switch (partition) {
1852
879k
    case PARTITION_NONE:
1853
879k
      pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1854
879k
                    PARTITION_NONE, bsize, ctx_none, invalid_rdc);
1855
879k
      break;
1856
947
    case PARTITION_HORZ:
1857
947
      if (use_partition_none) {
1858
0
        av1_invalid_rd_stats(&last_part_rdc);
1859
0
        break;
1860
0
      }
1861
1862
2.84k
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
1863
1.89k
        pc_tree->horizontal[i] =
1864
1.89k
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
1865
1.89k
        if (!pc_tree->horizontal[i])
1866
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1867
0
                             "Failed to allocate PICK_MODE_CONTEXT");
1868
1.89k
      }
1869
947
      pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1870
947
                    PARTITION_HORZ, subsize, pc_tree->horizontal[0],
1871
947
                    invalid_rdc);
1872
947
      if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1873
947
          mi_row + hbs < mi_params->mi_rows) {
1874
15
        RD_STATS tmp_rdc;
1875
15
        const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0];
1876
15
        av1_init_rd_stats(&tmp_rdc);
1877
15
        av1_update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
1878
15
        encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
1879
15
                          NULL);
1880
15
        pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
1881
15
                      PARTITION_HORZ, subsize, pc_tree->horizontal[1],
1882
15
                      invalid_rdc);
1883
15
        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1884
0
          av1_invalid_rd_stats(&last_part_rdc);
1885
0
          break;
1886
0
        }
1887
15
        last_part_rdc.rate += tmp_rdc.rate;
1888
15
        last_part_rdc.dist += tmp_rdc.dist;
1889
15
        last_part_rdc.rdcost += tmp_rdc.rdcost;
1890
15
      }
1891
947
      break;
1892
947
    case PARTITION_VERT:
1893
652
      if (use_partition_none) {
1894
0
        av1_invalid_rd_stats(&last_part_rdc);
1895
0
        break;
1896
0
      }
1897
1898
1.95k
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
1899
1.30k
        pc_tree->vertical[i] =
1900
1.30k
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
1901
1.30k
        if (!pc_tree->vertical[i])
1902
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
1903
0
                             "Failed to allocate PICK_MODE_CONTEXT");
1904
1.30k
      }
1905
652
      pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1906
652
                    PARTITION_VERT, subsize, pc_tree->vertical[0], invalid_rdc);
1907
652
      if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1908
652
          mi_col + hbs < mi_params->mi_cols) {
1909
3
        RD_STATS tmp_rdc;
1910
3
        const PICK_MODE_CONTEXT *const ctx_v = pc_tree->vertical[0];
1911
3
        av1_init_rd_stats(&tmp_rdc);
1912
3
        av1_update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
1913
3
        encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
1914
3
                          NULL);
1915
3
        pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
1916
3
                      PARTITION_VERT, subsize,
1917
3
                      pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc);
1918
3
        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1919
0
          av1_invalid_rd_stats(&last_part_rdc);
1920
0
          break;
1921
0
        }
1922
3
        last_part_rdc.rate += tmp_rdc.rate;
1923
3
        last_part_rdc.dist += tmp_rdc.dist;
1924
3
        last_part_rdc.rdcost += tmp_rdc.rdcost;
1925
3
      }
1926
652
      break;
1927
345k
    case PARTITION_SPLIT:
1928
345k
      if (use_partition_none) {
1929
0
        av1_invalid_rd_stats(&last_part_rdc);
1930
0
        break;
1931
0
      }
1932
1933
345k
      last_part_rdc.rate = 0;
1934
345k
      last_part_rdc.dist = 0;
1935
345k
      last_part_rdc.rdcost = 0;
1936
1.72M
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
1937
1.38M
        int x_idx = (i & 1) * hbs;
1938
1.38M
        int y_idx = (i >> 1) * hbs;
1939
1.38M
        int jj = i >> 1, ii = i & 0x01;
1940
1.38M
        RD_STATS tmp_rdc;
1941
1.38M
        if ((mi_row + y_idx >= mi_params->mi_rows) ||
1942
1.28M
            (mi_col + x_idx >= mi_params->mi_cols))
1943
182k
          continue;
1944
1945
1.19M
        av1_init_rd_stats(&tmp_rdc);
1946
1.19M
        av1_rd_use_partition(
1947
1.19M
            cpi, td, tile_data,
1948
1.19M
            mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
1949
1.19M
            mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
1950
1.19M
            &tmp_rdc.dist, i != (SUB_PARTITIONS_SPLIT - 1), pc_tree->split[i]);
1951
1.20M
        if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1952
0
          av1_invalid_rd_stats(&last_part_rdc);
1953
0
          break;
1954
0
        }
1955
1.19M
        last_part_rdc.rate += tmp_rdc.rate;
1956
1.19M
        last_part_rdc.dist += tmp_rdc.dist;
1957
1.19M
      }
1958
345k
      break;
1959
0
    case PARTITION_VERT_A:
1960
0
    case PARTITION_VERT_B:
1961
0
    case PARTITION_HORZ_A:
1962
0
    case PARTITION_HORZ_B:
1963
0
    case PARTITION_HORZ_4:
1964
0
    case PARTITION_VERT_4:
1965
0
      assert(0 && "Cannot handle extended partition types");
1966
0
    default: assert(0); break;
1967
1.22M
  }
1968
1969
1.22M
  if (last_part_rdc.rate < INT_MAX) {
1970
1.22M
    last_part_rdc.rate += mode_costs->partition_cost[pl][partition];
1971
1.22M
    last_part_rdc.rdcost =
1972
1.22M
        RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
1973
1.22M
  }
1974
1975
1.22M
  if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
1976
1.22M
       cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) &&
1977
0
      partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
1978
0
      (mi_row + bs < mi_params->mi_rows ||
1979
0
       mi_row + hbs == mi_params->mi_rows) &&
1980
0
      (mi_col + bs < mi_params->mi_cols ||
1981
0
       mi_col + hbs == mi_params->mi_cols)) {
1982
0
    BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
1983
0
    chosen_rdc.rate = 0;
1984
0
    chosen_rdc.dist = 0;
1985
1986
0
    av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1987
0
    pc_tree->partitioning = PARTITION_SPLIT;
1988
1989
    // Split partition.
1990
0
    for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
1991
0
      int x_idx = (i & 1) * hbs;
1992
0
      int y_idx = (i >> 1) * hbs;
1993
0
      RD_STATS tmp_rdc;
1994
1995
0
      if ((mi_row + y_idx >= mi_params->mi_rows) ||
1996
0
          (mi_col + x_idx >= mi_params->mi_cols))
1997
0
        continue;
1998
1999
0
      av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2000
0
      pc_tree->split[i]->partitioning = PARTITION_NONE;
2001
0
      if (pc_tree->split[i]->none == NULL)
2002
0
        pc_tree->split[i]->none =
2003
0
            av1_alloc_pmc(cpi, split_subsize, &td->shared_coeff_buf);
2004
0
      if (!pc_tree->split[i]->none)
2005
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2006
0
                           "Failed to allocate PICK_MODE_CONTEXT");
2007
0
      pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
2008
0
                    PARTITION_SPLIT, split_subsize, pc_tree->split[i]->none,
2009
0
                    invalid_rdc);
2010
2011
0
      av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2012
0
      if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2013
0
        av1_invalid_rd_stats(&chosen_rdc);
2014
0
        break;
2015
0
      }
2016
2017
0
      chosen_rdc.rate += tmp_rdc.rate;
2018
0
      chosen_rdc.dist += tmp_rdc.dist;
2019
2020
0
      if (i != SUB_PARTITIONS_SPLIT - 1)
2021
0
        encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
2022
0
                  OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
2023
2024
0
      chosen_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
2025
0
    }
2026
0
    if (chosen_rdc.rate < INT_MAX) {
2027
0
      chosen_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
2028
0
      chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
2029
0
    }
2030
0
  }
2031
2032
  // If last_part is better set the partitioning to that.
2033
1.22M
  if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2034
1.22M
    mib[0]->bsize = bs_type;
2035
1.22M
    if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
2036
2037
1.22M
    chosen_rdc = last_part_rdc;
2038
1.22M
  }
2039
  // If none was better set the partitioning to that.
2040
1.22M
  if (none_rdc.rdcost < INT64_MAX &&
2041
0
      none_rdc.rdcost - (none_rdc.rdcost >> 9) < chosen_rdc.rdcost) {
2042
0
    mib[0]->bsize = bsize;
2043
0
    if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2044
0
    chosen_rdc = none_rdc;
2045
0
  }
2046
2047
1.22M
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2048
2049
  // We must have chosen a partitioning and encoding or we'll fail later on.
2050
  // No other opportunities for success.
2051
1.22M
  if (bsize == cm->seq_params->sb_size)
2052
1.22M
    assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2053
2054
#if CONFIG_COLLECT_COMPONENT_TIMING
2055
  start_timing(cpi, encode_sb_time);
2056
#endif
2057
1.22M
  if (do_recon) {
2058
966k
    if (bsize == cm->seq_params->sb_size) {
2059
      // NOTE: To get estimate for rate due to the tokens, use:
2060
      // int rate_coeffs = 0;
2061
      // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
2062
      //           bsize, pc_tree, &rate_coeffs);
2063
26.1k
      set_cb_offsets(x->cb_offset, 0, 0);
2064
26.1k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
2065
26.1k
                pc_tree, NULL);
2066
940k
    } else {
2067
940k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2068
940k
                pc_tree, NULL);
2069
940k
    }
2070
966k
  }
2071
#if CONFIG_COLLECT_COMPONENT_TIMING
2072
  end_timing(cpi, encode_sb_time);
2073
#endif
2074
2075
1.22M
  *rate = chosen_rdc.rate;
2076
1.22M
  *dist = chosen_rdc.dist;
2077
1.22M
  x->rdmult = orig_rdmult;
2078
1.22M
}
2079
2080
static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
2081
                           ThreadData *td, TokenExtra **tp, int mi_row,
2082
                           int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
2083
                           PARTITION_TYPE partition,
2084
2.85M
                           PICK_MODE_CONTEXT *const ctx, int *rate) {
2085
#if CONFIG_COLLECT_COMPONENT_TIMING
2086
  start_timing((AV1_COMP *)cpi, encode_b_nonrd_time);
2087
#endif
2088
2.85M
  const AV1_COMMON *const cm = &cpi->common;
2089
2.85M
  TileInfo *const tile = &tile_data->tile_info;
2090
2.85M
  MACROBLOCK *const x = &td->mb;
2091
2.85M
  MACROBLOCKD *xd = &x->e_mbd;
2092
2.85M
  av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
2093
2.85M
  const int origin_mult = x->rdmult;
2094
2.85M
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
2095
2.85M
  MB_MODE_INFO *mbmi = xd->mi[0];
2096
2.85M
  mbmi->partition = partition;
2097
2.85M
  av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
2098
2.85M
  const int subsampling_x = cpi->common.seq_params->subsampling_x;
2099
2.85M
  const int subsampling_y = cpi->common.seq_params->subsampling_y;
2100
2.85M
  if (!dry_run) {
2101
2.69M
    set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y],
2102
2.69M
                   x->cb_offset[PLANE_TYPE_UV]);
2103
2.69M
    assert(x->cb_offset[PLANE_TYPE_Y] <
2104
2.69M
           (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]));
2105
2.69M
    assert(x->cb_offset[PLANE_TYPE_UV] <
2106
2.69M
           ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >>
2107
2.69M
            (subsampling_x + subsampling_y)));
2108
2.69M
  }
2109
2110
2.85M
  encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
2111
2.85M
  if (!dry_run) {
2112
2.70M
    update_cb_offsets(x, bsize, subsampling_x, subsampling_y);
2113
2.70M
    if (has_second_ref(mbmi)) {
2114
3
      if (mbmi->compound_idx == 0 ||
2115
3
          mbmi->interinter_comp.type == COMPOUND_AVERAGE)
2116
3
        mbmi->comp_group_idx = 0;
2117
0
      else
2118
0
        mbmi->comp_group_idx = 1;
2119
3
      mbmi->compound_idx = 1;
2120
3
    }
2121
2.70M
    RD_COUNTS *const rdc = &td->rd_counts;
2122
2.70M
    if (mbmi->skip_mode) {
2123
0
      assert(!frame_is_intra_only(cm));
2124
0
      rdc->skip_mode_used_flag = 1;
2125
0
      if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
2126
0
          has_second_ref(mbmi)) {
2127
0
        rdc->compound_ref_used_flag = 1;
2128
0
      }
2129
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2130
2.70M
    } else {
2131
2.70M
      const int seg_ref_active =
2132
2.70M
          segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
2133
2.70M
      if (!seg_ref_active) {
2134
        // If the segment reference feature is enabled we have only a single
2135
        // reference frame allowed for the segment so exclude it from
2136
        // the reference frame counts used to work out probabilities.
2137
2.69M
        if (is_inter_block(mbmi)) {
2138
208k
          av1_collect_neighbors_ref_counts(xd);
2139
208k
          if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
2140
185k
              has_second_ref(mbmi)) {
2141
            // This flag is also updated for 4x4 blocks
2142
3
            rdc->compound_ref_used_flag = 1;
2143
3
          }
2144
208k
          set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2145
208k
        }
2146
2.69M
      }
2147
2.70M
    }
2148
2.70M
    if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY &&
2149
0
        (mbmi->mode == NEWMV || mbmi->mode < INTRA_MODE_END)) {
2150
0
      int32_t blocks = mi_size_high[bsize] * mi_size_wide[bsize];
2151
0
      rdc->newmv_or_intra_blocks += blocks;
2152
0
    }
2153
2.70M
    if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
2154
2.70M
  }
2155
2.85M
  if ((cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ ||
2156
2.85M
       cpi->active_map.enabled || cpi->roi.enabled) &&
2157
0
      mbmi->skip_txfm && !cpi->rc.rtc_external_ratectrl && cm->seg.enabled)
2158
0
    av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize, dry_run);
2159
  // TODO(Ravi/Remya): Move this copy function to a better logical place
2160
  // This function will copy the best mode information from block
2161
  // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
2162
  // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
2163
  // bitstream preparation.
2164
2.85M
  av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext,
2165
2.85M
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
2166
2.85M
  x->rdmult = origin_mult;
2167
#if CONFIG_COLLECT_COMPONENT_TIMING
2168
  end_timing((AV1_COMP *)cpi, encode_b_nonrd_time);
2169
#endif
2170
2.85M
}
2171
2172
static int get_force_zeromv_skip_flag_for_blk(const AV1_COMP *cpi,
2173
                                              const MACROBLOCK *x,
2174
2.83M
                                              BLOCK_SIZE bsize) {
2175
  // Force zero MV skip based on SB level decision
2176
2.83M
  if (x->force_zeromv_skip_for_sb < 2) return x->force_zeromv_skip_for_sb;
2177
2178
  // For blocks of size equal to superblock size, the decision would have been
2179
  // already done at superblock level. Hence zeromv-skip decision is skipped.
2180
18.4E
  const AV1_COMMON *const cm = &cpi->common;
2181
18.4E
  if (bsize == cm->seq_params->sb_size) return 0;
2182
2183
18.4E
  const int num_planes = av1_num_planes(cm);
2184
18.4E
  const MACROBLOCKD *const xd = &x->e_mbd;
2185
18.4E
  const unsigned int thresh_exit_part_y =
2186
18.4E
      cpi->zeromv_skip_thresh_exit_part[bsize];
2187
18.4E
  const unsigned int thresh_exit_part_uv =
2188
18.4E
      CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
2189
18.4E
  const unsigned int thresh_exit_part[MAX_MB_PLANE] = { thresh_exit_part_y,
2190
18.4E
                                                        thresh_exit_part_uv,
2191
18.4E
                                                        thresh_exit_part_uv };
2192
18.4E
  const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
2193
18.4E
  const struct scale_factors *const sf =
2194
18.4E
      get_ref_scale_factors_const(cm, LAST_FRAME);
2195
2196
18.4E
  struct buf_2d yv12_mb[MAX_MB_PLANE];
2197
18.4E
  av1_setup_pred_block(xd, yv12_mb, yv12, sf, sf, num_planes);
2198
2199
18.4E
  for (int plane = 0; plane < num_planes; ++plane) {
2200
0
    const struct macroblock_plane *const p = &x->plane[plane];
2201
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
2202
0
    const BLOCK_SIZE bs =
2203
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
2204
0
    const unsigned int plane_sad = cpi->ppi->fn_ptr[bs].sdf(
2205
0
        p->src.buf, p->src.stride, yv12_mb[plane].buf, yv12_mb[plane].stride);
2206
0
    assert(plane < MAX_MB_PLANE);
2207
0
    if (plane_sad >= thresh_exit_part[plane]) return 0;
2208
0
  }
2209
18.4E
  return 1;
2210
18.4E
}
2211
2212
/*!\brief Top level function to pick block mode for non-RD optimized case
2213
 *
2214
 * \ingroup partition_search
2215
 * \callgraph
2216
 * \callergraph
2217
 * Searches prediction modes, transform, and coefficient coding modes for an
2218
 * individual coding block. This function is the top-level function that is
2219
 * used for non-RD optimized mode search (controlled by
2220
 * \c cpi->sf.rt_sf.use_nonrd_pick_mode). Depending on frame type it calls
2221
 * inter/skip/hybrid-intra mode search functions
2222
 *
2223
 * \param[in]    cpi            Top-level encoder structure
2224
 * \param[in]    tile_data      Pointer to struct holding adaptive
2225
 *                              data/contexts/models for the tile during
2226
 *                              encoding
2227
 * \param[in]    x              Pointer to structure holding all the data for
2228
 *                              the current macroblock
2229
 * \param[in]    mi_row         Row coordinate of the block in a step size of
2230
 *                              MI_SIZE
2231
 * \param[in]    mi_col         Column coordinate of the block in a step size of
2232
 *                              MI_SIZE
2233
 * \param[in]    rd_cost        Pointer to structure holding rate and distortion
2234
 *                              stats for the current block
2235
 * \param[in]    bsize          Current block size
2236
 * \param[in]    ctx            Pointer to structure holding coding contexts and
2237
 *                              chosen modes for the current block
2238
 *
2239
 * \remark Nothing is returned. Instead, the chosen modes and contexts necessary
2240
 * for reconstruction are stored in ctx, the rate-distortion stats are stored in
2241
 * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be
2242
 * signalled by an INT64_MAX rd_cost->rdcost.
2243
 */
2244
static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
2245
                                MACROBLOCK *const x, int mi_row, int mi_col,
2246
                                RD_STATS *rd_cost, BLOCK_SIZE bsize,
2247
2.83M
                                PICK_MODE_CONTEXT *ctx) {
2248
  // For nonrd mode, av1_set_offsets is already called at the superblock level
2249
  // in encode_nonrd_sb when we determine the partitioning.
2250
2.83M
  if (bsize != cpi->common.seq_params->sb_size ||
2251
2.83M
      cpi->sf.rt_sf.nonrd_check_partition_split == 1) {
2252
2.83M
    av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
2253
2.83M
  }
2254
2.83M
  assert(x->last_set_offsets_loc.mi_row == mi_row &&
2255
2.83M
         x->last_set_offsets_loc.mi_col == mi_col &&
2256
2.83M
         x->last_set_offsets_loc.bsize == bsize);
2257
2.83M
  AV1_COMMON *const cm = &cpi->common;
2258
2.83M
  const int num_planes = av1_num_planes(cm);
2259
2.83M
  MACROBLOCKD *const xd = &x->e_mbd;
2260
2.83M
  MB_MODE_INFO *mbmi = xd->mi[0];
2261
2.83M
  struct macroblock_plane *const p = x->plane;
2262
2.83M
  struct macroblockd_plane *const pd = xd->plane;
2263
2.83M
  const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode;
2264
2.83M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2265
2.83M
  int i;
2266
2.83M
  const int seg_skip =
2267
2.83M
      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
2268
2269
  // This is only needed for real time/allintra row-mt enabled multi-threaded
2270
  // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
2271
2.83M
  wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
2272
2.83M
                        &tile_data->tile_info, cm->seq_params->sb_size,
2273
2.83M
                        cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
2274
2275
#if CONFIG_COLLECT_COMPONENT_TIMING
2276
  start_timing(cpi, pick_sb_modes_nonrd_time);
2277
#endif
2278
  // Sets up the tx_type_map buffer in MACROBLOCKD.
2279
2.83M
  xd->tx_type_map = txfm_info->tx_type_map_;
2280
2.83M
  xd->tx_type_map_stride = mi_size_wide[bsize];
2281
7.44M
  for (i = 0; i < num_planes; ++i) {
2282
4.61M
    p[i].coeff = ctx->coeff[i];
2283
4.61M
    p[i].qcoeff = ctx->qcoeff[i];
2284
4.61M
    p[i].dqcoeff = ctx->dqcoeff[i];
2285
4.61M
    p[i].eobs = ctx->eobs[i];
2286
4.61M
    p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
2287
4.61M
  }
2288
8.49M
  for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
2289
2290
2.83M
  if (!seg_skip) {
2291
2.83M
    x->force_zeromv_skip_for_blk =
2292
2.83M
        get_force_zeromv_skip_flag_for_blk(cpi, x, bsize);
2293
2294
    // Source variance may be already compute at superblock level, so no need
2295
    // to recompute, unless bsize < sb_size or source_variance is not yet set.
2296
2.83M
    if (!x->force_zeromv_skip_for_blk &&
2297
2.83M
        (x->source_variance == UINT_MAX || bsize < cm->seq_params->sb_size))
2298
2.83M
      x->source_variance = av1_get_perpixel_variance_facade(
2299
2.83M
          cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
2300
2.83M
  }
2301
2302
  // Save rdmult before it might be changed, so it can be restored later.
2303
2.83M
  const int orig_rdmult = x->rdmult;
2304
2.83M
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
2305
2.83M
  if (cpi->roi.enabled && cpi->roi.delta_qp_enabled && mbmi->segment_id)
2306
0
    x->rdmult = cpi->roi.rdmult_delta_qp;
2307
  // Set error per bit for current rdmult
2308
2.83M
  av1_set_error_per_bit(&x->errorperbit, x->rdmult);
2309
  // Find best coding mode & reconstruct the MB so it is available
2310
  // as a predictor for MBs that follow in the SB
2311
2.83M
  if (frame_is_intra_only(cm)) {
2312
#if CONFIG_COLLECT_COMPONENT_TIMING
2313
    start_timing(cpi, hybrid_intra_mode_search_time);
2314
#endif
2315
2.36M
    hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
2316
#if CONFIG_COLLECT_COMPONENT_TIMING
2317
    end_timing(cpi, hybrid_intra_mode_search_time);
2318
#endif
2319
2.36M
  } else {
2320
#if CONFIG_COLLECT_COMPONENT_TIMING
2321
    start_timing(cpi, nonrd_pick_inter_mode_sb_time);
2322
#endif
2323
473k
    if (seg_skip) {
2324
0
      x->force_zeromv_skip_for_blk = 1;
2325
      // TODO(marpan): Consider adding a function for nonrd:
2326
      // av1_nonrd_pick_inter_mode_sb_seg_skip(), instead of setting
2327
      // x->force_zeromv_skip flag and entering av1_nonrd_pick_inter_mode_sb().
2328
0
    }
2329
473k
    av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx);
2330
#if CONFIG_COLLECT_COMPONENT_TIMING
2331
    end_timing(cpi, nonrd_pick_inter_mode_sb_time);
2332
#endif
2333
473k
  }
2334
2.83M
  if (cpi->sf.rt_sf.skip_cdef_sb) {
2335
    // cdef_strength is initialized to 1 which means skip_cdef, and is updated
2336
    // here. Check to see is skipping cdef is allowed. Never skip on slide/scene
2337
    // change, near a key frame, or when color sensitivity is set. Always allow
2338
    // cdef_skip for seg_skip = 1.
2339
0
    const int allow_cdef_skipping =
2340
0
        seg_skip ||
2341
0
        (cpi->rc.frames_since_key > 10 && !cpi->rc.high_source_sad &&
2342
0
         !(x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] ||
2343
0
           x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)]));
2344
2345
    // Find the corresponding 64x64 block. It'll be the 128x128 block if that's
2346
    // the block size.
2347
0
    const int mi_row_sb = mi_row - mi_row % MI_SIZE_64X64;
2348
0
    const int mi_col_sb = mi_col - mi_col % MI_SIZE_64X64;
2349
0
    MB_MODE_INFO **mi_sb =
2350
0
        cm->mi_params.mi_grid_base +
2351
0
        get_mi_grid_idx(&cm->mi_params, mi_row_sb, mi_col_sb);
2352
0
    const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
2353
0
    unsigned int thresh_spatial_var =
2354
0
        (cpi->oxcf.speed >= 11 && !is_720p_or_larger &&
2355
0
         cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN)
2356
0
            ? 400
2357
0
            : UINT_MAX;
2358
    // For skip_cdef_sb = 1: do not skip if allow_cdef_skipping is false or
2359
    // intra or new mv is picked, with possible conidition on spatial variance.
2360
    // For skip_cdef_sb >= 2: more aggressive mode to always skip unless
2361
    // allow_cdef_skipping is false and source_variance is non-zero.
2362
0
    if (cpi->sf.rt_sf.skip_cdef_sb >= 2) {
2363
0
      mi_sb[0]->cdef_strength =
2364
0
          mi_sb[0]->cdef_strength &&
2365
0
          (allow_cdef_skipping || x->source_variance == 0);
2366
0
    } else {
2367
0
      mi_sb[0]->cdef_strength =
2368
0
          mi_sb[0]->cdef_strength && allow_cdef_skipping &&
2369
0
          !(x->source_variance < thresh_spatial_var &&
2370
0
            (mbmi->mode < INTRA_MODES || mbmi->mode == NEWMV));
2371
0
    }
2372
    // Store in the pickmode context.
2373
0
    ctx->mic.cdef_strength = mi_sb[0]->cdef_strength;
2374
0
  }
2375
2.83M
  x->rdmult = orig_rdmult;
2376
2.83M
  ctx->rd_stats.rate = rd_cost->rate;
2377
2.83M
  ctx->rd_stats.dist = rd_cost->dist;
2378
2.83M
  ctx->rd_stats.rdcost = rd_cost->rdcost;
2379
#if CONFIG_COLLECT_COMPONENT_TIMING
2380
  end_timing(cpi, pick_sb_modes_nonrd_time);
2381
#endif
2382
2.83M
}
2383
2384
static int try_split_partition(AV1_COMP *const cpi, ThreadData *const td,
2385
                               TileDataEnc *const tile_data,
2386
                               TileInfo *const tile_info, TokenExtra **tp,
2387
                               MACROBLOCK *const x, MACROBLOCKD *const xd,
2388
                               const CommonModeInfoParams *const mi_params,
2389
                               const int mi_row, const int mi_col,
2390
                               const BLOCK_SIZE bsize, const int pl,
2391
0
                               PC_TREE *pc_tree) {
2392
0
  AV1_COMMON *const cm = &cpi->common;
2393
0
  const ModeCosts *mode_costs = &x->mode_costs;
2394
0
  const int hbs = mi_size_wide[bsize] / 2;
2395
0
  if (mi_row + mi_size_high[bsize] >= mi_params->mi_rows ||
2396
0
      mi_col + mi_size_wide[bsize] >= mi_params->mi_cols)
2397
0
    return 0;
2398
0
  if (bsize <= BLOCK_8X8 || frame_is_intra_only(cm)) return 0;
2399
0
  if (x->content_state_sb.source_sad_nonrd <= kLowSad) return 0;
2400
2401
  // Do not try split partition when the source sad is small, or
2402
  // the prediction residual is small.
2403
0
  const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
2404
0
  const struct scale_factors *const sf =
2405
0
      get_ref_scale_factors_const(cm, LAST_FRAME);
2406
0
  const int num_planes = av1_num_planes(cm);
2407
0
  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
2408
0
  av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf, num_planes);
2409
0
  int block_sad = 0;
2410
0
  for (int plane = 0; plane < num_planes; ++plane) {
2411
0
    const struct macroblock_plane *const p = &x->plane[plane];
2412
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
2413
0
    const BLOCK_SIZE bs =
2414
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
2415
0
    const unsigned int plane_sad = cpi->ppi->fn_ptr[bs].sdf(
2416
0
        p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
2417
0
    block_sad += plane_sad;
2418
0
  }
2419
0
  const int blk_pix = block_size_wide[bsize] * block_size_high[bsize];
2420
0
  const int block_avg_sad = block_sad / blk_pix;
2421
  // TODO(chengchen): find a proper threshold. It might change according to
2422
  // q as well.
2423
0
  const int threshold = 25;
2424
0
  if (block_avg_sad < threshold) return 0;
2425
2426
0
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2427
0
  RD_STATS split_rdc, none_rdc;
2428
0
  av1_invalid_rd_stats(&split_rdc);
2429
0
  av1_invalid_rd_stats(&none_rdc);
2430
0
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2431
0
  xd->above_txfm_context =
2432
0
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2433
0
  xd->left_txfm_context =
2434
0
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2435
2436
  // Calculate rdcost for none partition
2437
0
  pc_tree->partitioning = PARTITION_NONE;
2438
0
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2439
0
  if (!pc_tree->none) {
2440
0
    pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
2441
0
    if (!pc_tree->none)
2442
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2443
0
                         "Failed to allocate PICK_MODE_CONTEXT");
2444
0
  } else {
2445
0
    av1_reset_pmc(pc_tree->none);
2446
0
  }
2447
0
  pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
2448
0
                      pc_tree->none);
2449
0
  none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
2450
0
  none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2451
0
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2452
2453
  // Calculate rdcost for split partition
2454
0
  pc_tree->partitioning = PARTITION_SPLIT;
2455
0
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2456
0
  av1_init_rd_stats(&split_rdc);
2457
0
  split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
2458
0
  if (subsize >= BLOCK_8X8) {
2459
0
    split_rdc.rate += (mode_costs->partition_cost[pl][PARTITION_NONE] * 4);
2460
0
  }
2461
0
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
2462
0
    if (!pc_tree->split[i]) {
2463
0
      pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
2464
0
      if (!pc_tree->split[i])
2465
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2466
0
                           "Failed to allocate PC_TREE");
2467
0
    }
2468
0
    pc_tree->split[i]->index = i;
2469
0
  }
2470
0
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2471
0
    RD_STATS block_rdc;
2472
0
    av1_invalid_rd_stats(&block_rdc);
2473
0
    int x_idx = (i & 1) * hbs;
2474
0
    int y_idx = (i >> 1) * hbs;
2475
0
    if ((mi_row + y_idx >= mi_params->mi_rows) ||
2476
0
        (mi_col + x_idx >= mi_params->mi_cols))
2477
0
      continue;
2478
0
    xd->above_txfm_context =
2479
0
        cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2480
0
    xd->left_txfm_context =
2481
0
        xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
2482
0
    if (!pc_tree->split[i]->none) {
2483
0
      pc_tree->split[i]->none =
2484
0
          av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2485
0
      if (!pc_tree->split[i]->none)
2486
0
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2487
0
                           "Failed to allocate PICK_MODE_CONTEXT");
2488
0
    } else {
2489
0
      av1_reset_pmc(pc_tree->split[i]->none);
2490
0
    }
2491
0
    pc_tree->split[i]->partitioning = PARTITION_NONE;
2492
0
    pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2493
0
                        &block_rdc, subsize, pc_tree->split[i]->none);
2494
0
    split_rdc.rate += block_rdc.rate;
2495
0
    split_rdc.dist += block_rdc.dist;
2496
0
    av1_rd_cost_update(x->rdmult, &split_rdc);
2497
0
    if (none_rdc.rdcost < split_rdc.rdcost) break;
2498
0
    if (i != SUB_PARTITIONS_SPLIT - 1)
2499
0
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
2500
0
                     subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
2501
0
  }
2502
0
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2503
0
  split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2504
0
  const int split = split_rdc.rdcost < none_rdc.rdcost;
2505
2506
0
  return split;
2507
0
}
2508
2509
// Returns if SPLIT partitions should be evaluated
2510
static bool calc_do_split_flag(const AV1_COMP *cpi, const MACROBLOCK *x,
2511
                               const PC_TREE *pc_tree, const RD_STATS *none_rdc,
2512
                               const CommonModeInfoParams *mi_params,
2513
                               int mi_row, int mi_col, int hbs,
2514
49.9k
                               BLOCK_SIZE bsize, PARTITION_TYPE partition) {
2515
49.9k
  const AV1_COMMON *const cm = &cpi->common;
2516
49.9k
  const int is_larger_qindex = cm->quant_params.base_qindex > 100;
2517
49.9k
  const MACROBLOCKD *const xd = &x->e_mbd;
2518
49.9k
  bool do_split =
2519
49.9k
      (cpi->sf.rt_sf.nonrd_check_partition_merge_mode == 3)
2520
49.9k
          ? (bsize <= BLOCK_32X32 || (is_larger_qindex && bsize <= BLOCK_64X64))
2521
49.9k
          : true;
2522
49.9k
  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN ||
2523
49.9k
      cpi->sf.rt_sf.nonrd_check_partition_merge_mode < 2 ||
2524
49.9k
      cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) ||
2525
49.9k
      !none_rdc->skip_txfm)
2526
49.6k
    return do_split;
2527
2528
307
  const int use_model_yrd_large = get_model_rd_flag(cpi, xd, bsize);
2529
2530
  // When model based skip is not used (i.e.,use_model_yrd_large = 0), skip_txfm
2531
  // would have been populated based on Hadamard transform and skip_txfm flag is
2532
  // more reliable. Hence SPLIT evaluation is disabled at all quantizers for 8x8
2533
  // and 16x16 blocks.
2534
  // When model based skip is used (i.e.,use_model_yrd_large = 1), skip_txfm may
2535
  // not be reliable. Hence SPLIT evaluation is disabled only at lower
2536
  // quantizers for blocks >= 32x32.
2537
307
  if ((!use_model_yrd_large) || (!is_larger_qindex)) return false;
2538
2539
  // Use residual statistics to decide if SPLIT partition should be evaluated
2540
  // for 32x32 blocks. The pruning logic is avoided for larger block size to
2541
  // avoid the visual artifacts
2542
254
  if (pc_tree->none->mic.mode == NEWMV && bsize == BLOCK_32X32 && do_split) {
2543
204
    const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
2544
204
    assert(subsize < BLOCK_SIZES_ALL);
2545
204
    double min_per_pixel_error = DBL_MAX;
2546
204
    double max_per_pixel_error = 0.;
2547
204
    int i;
2548
1.02k
    for (i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2549
818
      const int x_idx = (i & 1) * hbs;
2550
818
      const int y_idx = (i >> 1) * hbs;
2551
818
      if ((mi_row + y_idx >= mi_params->mi_rows) ||
2552
818
          (mi_col + x_idx >= mi_params->mi_cols)) {
2553
0
        break;
2554
0
      }
2555
2556
      // Populate the appropriate buffer pointers.
2557
      // Pass scale factors as NULL as the base pointer of the block would have
2558
      // been calculated appropriately.
2559
818
      struct buf_2d src_split_buf_2d, pred_split_buf_2d;
2560
818
      const struct buf_2d *src_none_buf_2d = &x->plane[AOM_PLANE_Y].src;
2561
818
      setup_pred_plane(&src_split_buf_2d, subsize, src_none_buf_2d->buf,
2562
818
                       src_none_buf_2d->width, src_none_buf_2d->height,
2563
818
                       src_none_buf_2d->stride, y_idx, x_idx, NULL, 0, 0);
2564
818
      const struct buf_2d *pred_none_buf_2d = &xd->plane[AOM_PLANE_Y].dst;
2565
818
      setup_pred_plane(&pred_split_buf_2d, subsize, pred_none_buf_2d->buf,
2566
818
                       pred_none_buf_2d->width, pred_none_buf_2d->height,
2567
818
                       pred_none_buf_2d->stride, y_idx, x_idx, NULL, 0, 0);
2568
2569
818
      unsigned int curr_uint_mse;
2570
818
      const unsigned int curr_uint_var = cpi->ppi->fn_ptr[subsize].vf(
2571
818
          src_split_buf_2d.buf, src_split_buf_2d.stride, pred_split_buf_2d.buf,
2572
818
          pred_split_buf_2d.stride, &curr_uint_mse);
2573
818
      const double curr_per_pixel_error =
2574
818
          sqrt((double)curr_uint_var / block_size_wide[subsize] /
2575
818
               block_size_high[subsize]);
2576
818
      if (curr_per_pixel_error < min_per_pixel_error)
2577
382
        min_per_pixel_error = curr_per_pixel_error;
2578
818
      if (curr_per_pixel_error > max_per_pixel_error)
2579
459
        max_per_pixel_error = curr_per_pixel_error;
2580
818
    }
2581
2582
    // Prune based on residual statistics only if all the sub-partitions are
2583
    // valid.
2584
204
    if (i == SUB_PARTITIONS_SPLIT) {
2585
204
      if (max_per_pixel_error - min_per_pixel_error <= 1.5) do_split = false;
2586
204
    }
2587
204
  }
2588
2589
254
  return do_split;
2590
307
}
2591
2592
static void try_merge(AV1_COMP *const cpi, ThreadData *td,
2593
                      TileDataEnc *tile_data, MB_MODE_INFO **mib,
2594
                      TokenExtra **tp, const int mi_row, const int mi_col,
2595
                      const BLOCK_SIZE bsize, PC_TREE *const pc_tree,
2596
                      const PARTITION_TYPE partition, const BLOCK_SIZE subsize,
2597
50.2k
                      const int pl) {
2598
50.2k
  AV1_COMMON *const cm = &cpi->common;
2599
50.2k
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
2600
50.2k
  TileInfo *const tile_info = &tile_data->tile_info;
2601
50.2k
  MACROBLOCK *const x = &td->mb;
2602
50.2k
  MACROBLOCKD *const xd = &x->e_mbd;
2603
50.2k
  const ModeCosts *mode_costs = &x->mode_costs;
2604
50.2k
  const int num_planes = av1_num_planes(cm);
2605
  // Only square blocks from 8x8 to 128x128 are supported
2606
50.2k
  assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128);
2607
50.2k
  const int bs = mi_size_wide[bsize];
2608
50.2k
  const int hbs = bs / 2;
2609
50.2k
  bool do_split = false;
2610
50.2k
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2611
50.2k
  RD_STATS split_rdc, none_rdc;
2612
50.2k
  av1_invalid_rd_stats(&split_rdc);
2613
50.2k
  av1_invalid_rd_stats(&none_rdc);
2614
50.2k
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2615
50.2k
  xd->above_txfm_context =
2616
50.2k
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2617
50.2k
  xd->left_txfm_context =
2618
50.2k
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2619
50.2k
  pc_tree->partitioning = PARTITION_NONE;
2620
50.2k
  if (!pc_tree->none) {
2621
29.0k
    pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
2622
29.0k
    if (!pc_tree->none)
2623
0
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2624
0
                         "Failed to allocate PICK_MODE_CONTEXT");
2625
29.0k
  } else {
2626
21.1k
    av1_reset_pmc(pc_tree->none);
2627
21.1k
  }
2628
50.2k
  pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
2629
50.2k
                      pc_tree->none);
2630
50.2k
  none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
2631
50.2k
  none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2632
50.2k
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2633
2634
50.2k
  if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode < 2 ||
2635
50.1k
      none_rdc.skip_txfm != 1 || pc_tree->none->mic.mode == NEWMV) {
2636
49.9k
    do_split = calc_do_split_flag(cpi, x, pc_tree, &none_rdc, mi_params, mi_row,
2637
49.9k
                                  mi_col, hbs, bsize, partition);
2638
49.9k
    if (do_split) {
2639
49.8k
      av1_init_rd_stats(&split_rdc);
2640
49.8k
      split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
2641
221k
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2642
198k
        RD_STATS block_rdc;
2643
198k
        av1_invalid_rd_stats(&block_rdc);
2644
198k
        int x_idx = (i & 1) * hbs;
2645
198k
        int y_idx = (i >> 1) * hbs;
2646
198k
        if ((mi_row + y_idx >= mi_params->mi_rows) ||
2647
198k
            (mi_col + x_idx >= mi_params->mi_cols))
2648
0
          continue;
2649
198k
        xd->above_txfm_context =
2650
198k
            cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2651
198k
        xd->left_txfm_context =
2652
198k
            xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
2653
198k
        if (!pc_tree->split[i]->none) {
2654
116k
          pc_tree->split[i]->none =
2655
116k
              av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2656
116k
          if (!pc_tree->split[i]->none)
2657
0
            aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2658
0
                               "Failed to allocate PICK_MODE_CONTEXT");
2659
116k
        } else {
2660
81.9k
          av1_reset_pmc(pc_tree->split[i]->none);
2661
81.9k
        }
2662
198k
        pc_tree->split[i]->partitioning = PARTITION_NONE;
2663
198k
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2664
198k
                            &block_rdc, subsize, pc_tree->split[i]->none);
2665
        // TODO(yunqingwang): The rate here did not include the cost of
2666
        // signaling PARTITION_NONE token in the sub-blocks.
2667
198k
        split_rdc.rate += block_rdc.rate;
2668
198k
        split_rdc.dist += block_rdc.dist;
2669
2670
198k
        av1_rd_cost_update(x->rdmult, &split_rdc);
2671
2672
198k
        if (none_rdc.rdcost < split_rdc.rdcost) {
2673
27.0k
          break;
2674
27.0k
        }
2675
2676
171k
        if (i != SUB_PARTITIONS_SPLIT - 1)
2677
149k
          encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx,
2678
149k
                         1, subsize, PARTITION_NONE, pc_tree->split[i]->none,
2679
149k
                         NULL);
2680
171k
      }
2681
49.8k
      av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2682
49.8k
      split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2683
49.8k
    }
2684
49.9k
  }
2685
2686
50.2k
  if (none_rdc.rdcost < split_rdc.rdcost) {
2687
    /* Predicted samples can not be reused for PARTITION_NONE since same
2688
     * buffer is being used to store the reconstructed samples of
2689
     * PARTITION_SPLIT block. */
2690
27.2k
    if (do_split) x->reuse_inter_pred = false;
2691
2692
27.2k
    mib[0]->bsize = bsize;
2693
27.2k
    pc_tree->partitioning = PARTITION_NONE;
2694
27.2k
    encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2695
27.2k
                   pc_tree->none, NULL);
2696
27.2k
  } else {
2697
23.0k
    mib[0]->bsize = subsize;
2698
23.0k
    pc_tree->partitioning = PARTITION_SPLIT;
2699
    /* Predicted samples can not be reused for PARTITION_SPLIT since same
2700
     * buffer is being used to write the reconstructed samples. */
2701
    // TODO(Cherma): Store and reuse predicted samples generated by
2702
    // encode_b_nonrd() in DRY_RUN_NORMAL mode.
2703
23.0k
    x->reuse_inter_pred = false;
2704
2705
114k
    for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
2706
91.8k
      int x_idx = (i & 1) * hbs;
2707
91.8k
      int y_idx = (i >> 1) * hbs;
2708
91.8k
      if ((mi_row + y_idx >= mi_params->mi_rows) ||
2709
91.8k
          (mi_col + x_idx >= mi_params->mi_cols))
2710
0
        continue;
2711
2712
      // Note: We don't reset pc_tree->split[i]->none here because it
2713
      // could contain results from the additional check. Instead, it is
2714
      // reset before we enter the nonrd_check_partition_merge_mode
2715
      // condition.
2716
91.8k
      if (!pc_tree->split[i]->none) {
2717
0
        pc_tree->split[i]->none =
2718
0
            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
2719
0
        if (!pc_tree->split[i]->none)
2720
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2721
0
                             "Failed to allocate PICK_MODE_CONTEXT");
2722
0
      }
2723
91.8k
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
2724
91.8k
                     subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL);
2725
91.8k
    }
2726
23.0k
  }
2727
50.2k
}
2728
2729
// Evaluate if the sub-partitions can be merged directly into a large partition
2730
// without calculating the RD cost.
2731
static void direct_partition_merging(AV1_COMP *cpi, ThreadData *td,
2732
                                     TileDataEnc *tile_data, MB_MODE_INFO **mib,
2733
0
                                     int mi_row, int mi_col, BLOCK_SIZE bsize) {
2734
0
  AV1_COMMON *const cm = &cpi->common;
2735
0
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
2736
0
  TileInfo *const tile_info = &tile_data->tile_info;
2737
0
  MACROBLOCK *const x = &td->mb;
2738
0
  MACROBLOCKD *const xd = &x->e_mbd;
2739
0
  const int bs = mi_size_wide[bsize];
2740
0
  const int hbs = bs / 2;
2741
0
  const PARTITION_TYPE partition =
2742
0
      (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
2743
0
                           : PARTITION_NONE;
2744
0
  BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
2745
2746
0
  MB_MODE_INFO **b0 = mib;
2747
0
  MB_MODE_INFO **b1 = mib + hbs;
2748
0
  MB_MODE_INFO **b2 = mib + hbs * mi_params->mi_stride;
2749
0
  MB_MODE_INFO **b3 = mib + hbs * mi_params->mi_stride + hbs;
2750
2751
  // Check if the following conditions are met. This can be updated
2752
  // later with more support added.
2753
0
  const int further_split = b0[0]->bsize < subsize || b1[0]->bsize < subsize ||
2754
0
                            b2[0]->bsize < subsize || b3[0]->bsize < subsize;
2755
0
  if (further_split) return;
2756
2757
0
  const int no_skip = !b0[0]->skip_txfm || !b1[0]->skip_txfm ||
2758
0
                      !b2[0]->skip_txfm || !b3[0]->skip_txfm;
2759
0
  if (no_skip) return;
2760
2761
0
  const int compound = (b0[0]->ref_frame[1] != b1[0]->ref_frame[1] ||
2762
0
                        b0[0]->ref_frame[1] != b2[0]->ref_frame[1] ||
2763
0
                        b0[0]->ref_frame[1] != b3[0]->ref_frame[1] ||
2764
0
                        b0[0]->ref_frame[1] > NONE_FRAME);
2765
0
  if (compound) return;
2766
2767
  // Intra modes aren't considered here.
2768
0
  const int different_ref = (b0[0]->ref_frame[0] != b1[0]->ref_frame[0] ||
2769
0
                             b0[0]->ref_frame[0] != b2[0]->ref_frame[0] ||
2770
0
                             b0[0]->ref_frame[0] != b3[0]->ref_frame[0] ||
2771
0
                             b0[0]->ref_frame[0] <= INTRA_FRAME);
2772
0
  if (different_ref) return;
2773
2774
0
  const int different_mode =
2775
0
      (b0[0]->mode != b1[0]->mode || b0[0]->mode != b2[0]->mode ||
2776
0
       b0[0]->mode != b3[0]->mode);
2777
0
  if (different_mode) return;
2778
2779
0
  const int unsupported_mode =
2780
0
      (b0[0]->mode != NEARESTMV && b0[0]->mode != GLOBALMV);
2781
0
  if (unsupported_mode) return;
2782
2783
0
  const int different_mv = (b0[0]->mv[0].as_int != b1[0]->mv[0].as_int ||
2784
0
                            b0[0]->mv[0].as_int != b2[0]->mv[0].as_int ||
2785
0
                            b0[0]->mv[0].as_int != b3[0]->mv[0].as_int);
2786
0
  if (different_mv) return;
2787
2788
0
  const int unsupported_motion_mode =
2789
0
      (b0[0]->motion_mode != b1[0]->motion_mode ||
2790
0
       b0[0]->motion_mode != b2[0]->motion_mode ||
2791
0
       b0[0]->motion_mode != b3[0]->motion_mode ||
2792
0
       b0[0]->motion_mode != SIMPLE_TRANSLATION);
2793
0
  if (unsupported_motion_mode) return;
2794
2795
0
  const int diffent_filter =
2796
0
      (b0[0]->interp_filters.as_int != b1[0]->interp_filters.as_int ||
2797
0
       b0[0]->interp_filters.as_int != b2[0]->interp_filters.as_int ||
2798
0
       b0[0]->interp_filters.as_int != b3[0]->interp_filters.as_int);
2799
0
  if (diffent_filter) return;
2800
2801
0
  const int different_seg = (b0[0]->segment_id != b1[0]->segment_id ||
2802
0
                             b0[0]->segment_id != b2[0]->segment_id ||
2803
0
                             b0[0]->segment_id != b3[0]->segment_id);
2804
0
  if (different_seg) return;
2805
2806
  // Evaluate the ref_mv.
2807
0
  MB_MODE_INFO **this_mi = mib;
2808
0
  BLOCK_SIZE orig_bsize = this_mi[0]->bsize;
2809
0
  const PARTITION_TYPE orig_partition = this_mi[0]->partition;
2810
2811
0
  this_mi[0]->bsize = bsize;
2812
0
  this_mi[0]->partition = PARTITION_NONE;
2813
0
  this_mi[0]->skip_txfm = 1;
2814
2815
  // TODO(yunqing): functions called below can be optimized by
2816
  // removing unrelated operations.
2817
0
  av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
2818
0
                                     mi_col, bsize);
2819
2820
0
  const MV_REFERENCE_FRAME ref_frame = this_mi[0]->ref_frame[0];
2821
0
  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
2822
0
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
2823
0
  int force_skip_low_temp_var = 0;
2824
0
  int skip_pred_mv = 0;
2825
0
  bool use_scaled_ref;
2826
2827
0
  for (int i = 0; i < MB_MODE_COUNT; ++i) {
2828
0
    for (int j = 0; j < REF_FRAMES; ++j) {
2829
0
      frame_mv[i][j].as_int = INVALID_MV;
2830
0
    }
2831
0
  }
2832
0
  av1_copy(x->color_sensitivity, x->color_sensitivity_sb);
2833
0
  skip_pred_mv = (x->nonrd_prune_ref_frame_search > 2 &&
2834
0
                  x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] != 2 &&
2835
0
                  x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)] != 2);
2836
2837
0
  find_predictors(cpi, x, ref_frame, frame_mv, yv12_mb, bsize,
2838
0
                  force_skip_low_temp_var, skip_pred_mv, &use_scaled_ref);
2839
2840
0
  int continue_merging = 1;
2841
0
  if (frame_mv[NEARESTMV][ref_frame].as_mv.row != b0[0]->mv[0].as_mv.row ||
2842
0
      frame_mv[NEARESTMV][ref_frame].as_mv.col != b0[0]->mv[0].as_mv.col)
2843
0
    continue_merging = 0;
2844
2845
0
  if (!continue_merging) {
2846
0
    this_mi[0]->bsize = orig_bsize;
2847
0
    this_mi[0]->partition = orig_partition;
2848
2849
    // TODO(yunqing): Store the results and restore here instead of
2850
    // calling find_predictors() again.
2851
0
    av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
2852
0
                                       mi_col, this_mi[0]->bsize);
2853
0
    find_predictors(cpi, x, ref_frame, frame_mv, yv12_mb, this_mi[0]->bsize,
2854
0
                    force_skip_low_temp_var, skip_pred_mv, &use_scaled_ref);
2855
0
  } else {
2856
0
    struct scale_factors *sf = get_ref_scale_factors(cm, ref_frame);
2857
0
    const int is_scaled = av1_is_scaled(sf);
2858
0
    const int is_y_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 8) ||
2859
0
                               (abs(this_mi[0]->mv[0].as_mv.col) % 8);
2860
0
    const int is_uv_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 16) ||
2861
0
                                (abs(this_mi[0]->mv[0].as_mv.col) % 16);
2862
2863
0
    if (cpi->ppi->use_svc || is_scaled || is_y_subpel_mv || is_uv_subpel_mv) {
2864
0
      const int num_planes = av1_num_planes(cm);
2865
0
      set_ref_ptrs(cm, xd, ref_frame, this_mi[0]->ref_frame[1]);
2866
0
      const YV12_BUFFER_CONFIG *cfg = get_ref_frame_yv12_buf(cm, ref_frame);
2867
0
      av1_setup_pre_planes(xd, 0, cfg, mi_row, mi_col,
2868
0
                           xd->block_ref_scale_factors[0], num_planes);
2869
2870
0
      if (!cpi->ppi->use_svc && !is_scaled && !is_y_subpel_mv) {
2871
0
        assert(is_uv_subpel_mv == 1);
2872
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 1,
2873
0
                                      num_planes - 1);
2874
0
      } else {
2875
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
2876
0
                                      num_planes - 1);
2877
0
      }
2878
0
    }
2879
2880
    // Copy out mbmi_ext information.
2881
0
    MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2882
0
    MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame = x->mbmi_ext_frame;
2883
0
    av1_copy_mbmi_ext_to_mbmi_ext_frame(
2884
0
        mbmi_ext_frame, mbmi_ext, av1_ref_frame_type(this_mi[0]->ref_frame));
2885
2886
0
    const BLOCK_SIZE this_subsize =
2887
0
        get_partition_subsize(bsize, this_mi[0]->partition);
2888
    // Update partition contexts.
2889
0
    update_ext_partition_context(xd, mi_row, mi_col, this_subsize, bsize,
2890
0
                                 this_mi[0]->partition);
2891
2892
0
    const int num_planes = av1_num_planes(cm);
2893
0
    av1_reset_entropy_context(xd, bsize, num_planes);
2894
2895
    // Note: use x->txfm_search_params.tx_mode_search_type instead of
2896
    // cm->features.tx_mode here.
2897
0
    TX_SIZE tx_size =
2898
0
        tx_size_from_tx_mode(bsize, x->txfm_search_params.tx_mode_search_type);
2899
0
    if (xd->lossless[this_mi[0]->segment_id]) tx_size = TX_4X4;
2900
0
    this_mi[0]->tx_size = tx_size;
2901
0
    memset(this_mi[0]->inter_tx_size, this_mi[0]->tx_size,
2902
0
           sizeof(this_mi[0]->inter_tx_size));
2903
2904
    // Update txfm contexts.
2905
0
    xd->above_txfm_context =
2906
0
        cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2907
0
    xd->left_txfm_context =
2908
0
        xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2909
0
    set_txfm_ctxs(this_mi[0]->tx_size, xd->width, xd->height,
2910
0
                  this_mi[0]->skip_txfm && is_inter_block(this_mi[0]), xd);
2911
2912
    // Update mi for this partition block.
2913
0
    for (int y = 0; y < bs; y++) {
2914
0
      for (int x_idx = 0; x_idx < bs; x_idx++) {
2915
0
        this_mi[x_idx + y * mi_params->mi_stride] = this_mi[0];
2916
0
      }
2917
0
    }
2918
0
  }
2919
0
}
2920
2921
/*!\brief AV1 block partition application (minimal RD search).
2922
*
2923
* \ingroup partition_search
2924
* \callgraph
2925
* \callergraph
2926
* Encode the block by applying pre-calculated partition patterns that are
2927
* represented by coding block sizes stored in the mbmi array. The only
2928
* partition adjustment allowed is merging leaf split nodes if it leads to a
2929
* lower rd cost. The partition types are limited to a basic set: none, horz,
2930
* vert, and split. This function is only used in the real-time mode.
2931
*
2932
* \param[in]    cpi       Top-level encoder structure
2933
* \param[in]    td        Pointer to thread data
2934
* \param[in]    tile_data Pointer to struct holding adaptive
2935
data/contexts/models for the tile during encoding
2936
* \param[in]    mib       Array representing MB_MODE_INFO pointers for mi
2937
blocks starting from the first pixel of the current
2938
block
2939
* \param[in]    tp        Pointer to the starting token
2940
* \param[in]    mi_row    Row coordinate of the block in a step size of MI_SIZE
2941
* \param[in]    mi_col    Column coordinate of the block in a step size of
2942
MI_SIZE
2943
* \param[in]    bsize     Current block size
2944
* \param[in]    pc_tree   Pointer to the PC_TREE node holding the picked
2945
partitions and mode info for the current block
2946
*
2947
* \remark Nothing is returned. The pc_tree struct is modified to store the
2948
* picked partition and modes.
2949
*/
2950
void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
2951
                             TileDataEnc *tile_data, MB_MODE_INFO **mib,
2952
                             TokenExtra **tp, int mi_row, int mi_col,
2953
3.74M
                             BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2954
3.74M
  AV1_COMMON *const cm = &cpi->common;
2955
3.74M
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
2956
3.74M
  TileInfo *const tile_info = &tile_data->tile_info;
2957
3.74M
  MACROBLOCK *const x = &td->mb;
2958
3.74M
  MACROBLOCKD *const xd = &x->e_mbd;
2959
3.74M
  const ModeCosts *mode_costs = &x->mode_costs;
2960
  // Only square blocks from 8x8 to 128x128 are supported
2961
3.74M
  assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128);
2962
3.74M
  const int bs = mi_size_wide[bsize];
2963
3.74M
  const int hbs = bs / 2;
2964
3.74M
  PARTITION_TYPE partition = (bsize >= BLOCK_8X8)
2965
3.74M
                                 ? get_partition(cm, mi_row, mi_col, bsize)
2966
18.4E
                                 : PARTITION_NONE;
2967
3.74M
  BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
2968
3.74M
  assert(subsize <= BLOCK_LARGEST);
2969
3.74M
  const int pl = (bsize >= BLOCK_8X8)
2970
3.74M
                     ? partition_plane_context(xd, mi_row, mi_col, bsize)
2971
18.4E
                     : 0;
2972
2973
3.74M
  RD_STATS dummy_cost;
2974
3.74M
  av1_invalid_rd_stats(&dummy_cost);
2975
2976
3.74M
  if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
2977
2978
3.74M
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2979
2980
3.74M
  xd->above_txfm_context =
2981
3.74M
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2982
3.74M
  xd->left_txfm_context =
2983
3.74M
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2984
2985
  // Initialize default mode evaluation params
2986
3.74M
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
2987
2988
3.74M
  x->reuse_inter_pred = cpi->sf.rt_sf.reuse_inter_pred_nonrd;
2989
2990
3.74M
  int change_none_to_split = 0;
2991
3.74M
  if (partition == PARTITION_NONE &&
2992
2.55M
      cpi->sf.rt_sf.nonrd_check_partition_split == 1) {
2993
0
    change_none_to_split =
2994
0
        try_split_partition(cpi, td, tile_data, tile_info, tp, x, xd, mi_params,
2995
0
                            mi_row, mi_col, bsize, pl, pc_tree);
2996
0
    if (change_none_to_split) {
2997
0
      partition = PARTITION_SPLIT;
2998
0
      subsize = get_partition_subsize(bsize, partition);
2999
0
      assert(subsize <= BLOCK_LARGEST);
3000
0
    }
3001
0
  }
3002
3003
3.74M
  pc_tree->partitioning = partition;
3004
3005
3.74M
  switch (partition) {
3006
2.55M
    case PARTITION_NONE:
3007
2.55M
      if (!pc_tree->none) {
3008
1.46M
        pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
3009
1.46M
        if (!pc_tree->none)
3010
0
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
3011
0
                             "Failed to allocate PICK_MODE_CONTEXT");
3012
1.46M
      } else {
3013
1.09M
        av1_reset_pmc(pc_tree->none);
3014
1.09M
      }
3015
2.55M
      pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost, bsize,
3016
2.55M
                          pc_tree->none);
3017
2.55M
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize,
3018
2.55M
                     partition, pc_tree->none, NULL);
3019
2.55M
      break;
3020
11.8k
    case PARTITION_VERT:
3021
35.5k
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
3022
23.6k
        if (!pc_tree->vertical[i]) {
3023
17.5k
          pc_tree->vertical[i] =
3024
17.5k
              av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
3025
17.5k
          if (!pc_tree->vertical[i])
3026
0
            aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
3027
0
                               "Failed to allocate PICK_MODE_CONTEXT");
3028
17.5k
        } else {
3029
6.14k
          av1_reset_pmc(pc_tree->vertical[i]);
3030
6.14k
        }
3031
23.6k
      }
3032
11.8k
      pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
3033
11.8k
                          subsize, pc_tree->vertical[0]);
3034
11.8k
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
3035
11.8k
                     PARTITION_VERT, pc_tree->vertical[0], NULL);
3036
11.8k
      if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) {
3037
21
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col + hbs,
3038
21
                            &dummy_cost, subsize, pc_tree->vertical[1]);
3039
21
        encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
3040
21
                       PARTITION_VERT, pc_tree->vertical[1], NULL);
3041
21
      }
3042
11.8k
      break;
3043
14.2k
    case PARTITION_HORZ:
3044
42.7k
      for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
3045
28.4k
        if (!pc_tree->horizontal[i]) {
3046
22.2k
          pc_tree->horizontal[i] =
3047
22.2k
              av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
3048
22.2k
          if (!pc_tree->horizontal[i])
3049
0
            aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
3050
0
                               "Failed to allocate PICK_MODE_CONTEXT");
3051
22.2k
        } else {
3052
6.20k
          av1_reset_pmc(pc_tree->horizontal[i]);
3053
6.20k
        }
3054
28.4k
      }
3055
14.2k
      pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
3056
14.2k
                          subsize, pc_tree->horizontal[0]);
3057
14.2k
      encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
3058
14.2k
                     PARTITION_HORZ, pc_tree->horizontal[0], NULL);
3059
3060
14.2k
      if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) {
3061
32
        pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + hbs, mi_col,
3062
32
                            &dummy_cost, subsize, pc_tree->horizontal[1]);
3063
32
        encode_b_nonrd(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
3064
32
                       PARTITION_HORZ, pc_tree->horizontal[1], NULL);
3065
32
      }
3066
14.2k
      break;
3067
1.16M
    case PARTITION_SPLIT:
3068
5.81M
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
3069
4.65M
        if (!pc_tree->split[i]) {
3070
2.72M
          pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
3071
2.72M
          if (!pc_tree->split[i])
3072
0
            aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
3073
0
                               "Failed to allocate PC_TREE");
3074
2.72M
        }
3075
4.65M
        pc_tree->split[i]->index = i;
3076
4.65M
      }
3077
1.16M
      if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
3078
866k
          av1_is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
3079
443k
          !frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3080
50.2k
        try_merge(cpi, td, tile_data, mib, tp, mi_row, mi_col, bsize, pc_tree,
3081
50.2k
                  partition, subsize, pl);
3082
1.11M
      } else {
3083
5.55M
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
3084
4.44M
          int x_idx = (i & 1) * hbs;
3085
4.44M
          int y_idx = (i >> 1) * hbs;
3086
4.44M
          int jj = i >> 1, ii = i & 0x01;
3087
4.44M
          if ((mi_row + y_idx >= mi_params->mi_rows) ||
3088
3.99M
              (mi_col + x_idx >= mi_params->mi_cols))
3089
829k
            continue;
3090
3.61M
          av1_nonrd_use_partition(
3091
3.61M
              cpi, td, tile_data,
3092
3.61M
              mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
3093
3.61M
              mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
3094
3.61M
        }
3095
3096
1.11M
        if (!change_none_to_split) {
3097
          // Note: Palette, cfl are not supported.
3098
1.11M
          if (!frame_is_intra_only(cm) && !tile_data->allow_update_cdf &&
3099
0
              cpi->sf.rt_sf.partition_direct_merging &&
3100
0
              mode_costs->partition_cost[pl][PARTITION_NONE] <
3101
0
                  mode_costs->partition_cost[pl][PARTITION_SPLIT] &&
3102
0
              (mi_row + bs <= mi_params->mi_rows) &&
3103
0
              (mi_col + bs <= mi_params->mi_cols)) {
3104
0
            direct_partition_merging(cpi, td, tile_data, mib, mi_row, mi_col,
3105
0
                                     bsize);
3106
0
          }
3107
1.11M
        }
3108
1.11M
      }
3109
1.16M
      break;
3110
0
    case PARTITION_VERT_A:
3111
0
    case PARTITION_VERT_B:
3112
0
    case PARTITION_HORZ_A:
3113
0
    case PARTITION_HORZ_B:
3114
0
    case PARTITION_HORZ_4:
3115
0
    case PARTITION_VERT_4:
3116
0
      assert(0 && "Cannot handle extended partition types");
3117
0
    default: assert(0); break;
3118
3.74M
  }
3119
3.74M
}
3120
3121
#if !CONFIG_REALTIME_ONLY
3122
// Try searching for an encoding for the given subblock. Returns zero if the
3123
// rdcost is already too high (to tell the caller not to bother searching for
3124
// encodings of further subblocks).
3125
static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
3126
                           TileDataEnc *tile_data, TokenExtra **tp, int is_last,
3127
                           int mi_row, int mi_col, BLOCK_SIZE subsize,
3128
                           RD_STATS best_rdcost, RD_STATS *sum_rdc,
3129
                           PARTITION_TYPE partition,
3130
0
                           PICK_MODE_CONTEXT *this_ctx) {
3131
0
  MACROBLOCK *const x = &td->mb;
3132
0
  const int orig_mult = x->rdmult;
3133
0
  setup_block_rdmult(cpi, x, mi_row, mi_col, subsize, NO_AQ, NULL);
3134
3135
0
  av1_rd_cost_update(x->rdmult, &best_rdcost);
3136
3137
0
  RD_STATS rdcost_remaining;
3138
0
  av1_rd_stats_subtraction(x->rdmult, &best_rdcost, sum_rdc, &rdcost_remaining);
3139
0
  RD_STATS this_rdc;
3140
0
  pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, partition,
3141
0
                subsize, this_ctx, rdcost_remaining);
3142
3143
0
  if (this_rdc.rate == INT_MAX) {
3144
0
    sum_rdc->rdcost = INT64_MAX;
3145
0
  } else {
3146
0
    sum_rdc->rate += this_rdc.rate;
3147
0
    sum_rdc->dist += this_rdc.dist;
3148
0
    av1_rd_cost_update(x->rdmult, sum_rdc);
3149
0
  }
3150
3151
0
  if (sum_rdc->rdcost >= best_rdcost.rdcost) {
3152
0
    x->rdmult = orig_mult;
3153
0
    return 0;
3154
0
  }
3155
3156
0
  if (!is_last) {
3157
0
    av1_update_state(cpi, td, this_ctx, mi_row, mi_col, subsize, 1);
3158
0
    encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
3159
0
  }
3160
3161
0
  x->rdmult = orig_mult;
3162
0
  return 1;
3163
0
}
3164
3165
// Tests an AB partition, and updates the encoder status, the pick mode
3166
// contexts, the best rdcost, and the best partition.
3167
static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
3168
                               TileDataEnc *tile_data, TokenExtra **tp,
3169
                               PC_TREE *pc_tree, RD_STATS *best_rdc,
3170
                               int64_t *this_rdcost,
3171
                               PICK_MODE_CONTEXT *ctxs[SUB_PARTITIONS_AB],
3172
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
3173
                               PARTITION_TYPE partition,
3174
                               const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
3175
                               const int ab_mi_pos[SUB_PARTITIONS_AB][2],
3176
0
                               const MB_MODE_INFO **mode_cache) {
3177
0
  MACROBLOCK *const x = &td->mb;
3178
0
  const MACROBLOCKD *const xd = &x->e_mbd;
3179
0
  const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3180
0
  RD_STATS sum_rdc;
3181
0
  av1_init_rd_stats(&sum_rdc);
3182
0
  sum_rdc.rate = x->mode_costs.partition_cost[pl][partition];
3183
0
  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3184
  // Loop over sub-partitions in AB partition type.
3185
0
  for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
3186
0
    if (mode_cache && mode_cache[i]) {
3187
0
      x->use_mb_mode_cache = 1;
3188
0
      x->mb_mode_cache = mode_cache[i];
3189
0
    }
3190
0
    const int mode_search_success =
3191
0
        rd_try_subblock(cpi, td, tile_data, tp, i == SUB_PARTITIONS_AB - 1,
3192
0
                        ab_mi_pos[i][0], ab_mi_pos[i][1], ab_subsize[i],
3193
0
                        *best_rdc, &sum_rdc, partition, ctxs[i]);
3194
0
    x->use_mb_mode_cache = 0;
3195
0
    x->mb_mode_cache = NULL;
3196
0
    if (!mode_search_success) {
3197
0
      return false;
3198
0
    }
3199
0
  }
3200
3201
0
  av1_rd_cost_update(x->rdmult, &sum_rdc);
3202
0
  *this_rdcost = sum_rdc.rdcost;
3203
0
  if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
3204
0
  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3205
0
  *this_rdcost = sum_rdc.rdcost;
3206
0
  if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
3207
3208
0
  *best_rdc = sum_rdc;
3209
0
  pc_tree->partitioning = partition;
3210
0
  return true;
3211
0
}
3212
3213
#if CONFIG_COLLECT_PARTITION_STATS
3214
static void init_partition_block_timing_stats(
3215
    PartitionTimingStats *part_timing_stats) {
3216
  av1_zero(*part_timing_stats);
3217
}
3218
3219
static inline void start_partition_block_timer(
3220
    PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type) {
3221
  assert(!part_timing_stats->timer_is_on);
3222
  part_timing_stats->partition_attempts[partition_type] += 1;
3223
  aom_usec_timer_start(&part_timing_stats->timer);
3224
  part_timing_stats->timer_is_on = 1;
3225
}
3226
3227
static inline void end_partition_block_timer(
3228
    PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type,
3229
    int64_t rdcost) {
3230
  if (part_timing_stats->timer_is_on) {
3231
    aom_usec_timer_mark(&part_timing_stats->timer);
3232
    const int64_t time = aom_usec_timer_elapsed(&part_timing_stats->timer);
3233
    part_timing_stats->partition_times[partition_type] += time;
3234
    part_timing_stats->partition_rdcost[partition_type] = rdcost;
3235
    part_timing_stats->timer_is_on = 0;
3236
  }
3237
}
3238
static inline void print_partition_timing_stats_with_rdcost(
3239
    const PartitionTimingStats *part_timing_stats, int mi_row, int mi_col,
3240
    BLOCK_SIZE bsize, FRAME_UPDATE_TYPE frame_update_type, int frame_number,
3241
    const RD_STATS *best_rdc, const char *filename) {
3242
  FILE *f = fopen(filename, "a");
3243
  fprintf(f, "%d,%d,%d,%d,%d,%d,%" PRId64 ",%" PRId64 ",", bsize, frame_number,
3244
          frame_update_type, mi_row, mi_col, best_rdc->rate, best_rdc->dist,
3245
          best_rdc->rdcost);
3246
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3247
    fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]);
3248
  }
3249
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3250
    fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]);
3251
  }
3252
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3253
    fprintf(f, "%" PRId64 ",", part_timing_stats->partition_times[idx]);
3254
  }
3255
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3256
    if (part_timing_stats->partition_rdcost[idx] == INT64_MAX) {
3257
      fprintf(f, "%d,", -1);
3258
    } else {
3259
      fprintf(f, "%" PRId64 ",", part_timing_stats->partition_rdcost[idx]);
3260
    }
3261
  }
3262
  fprintf(f, "\n");
3263
  fclose(f);
3264
}
3265
3266
static inline void print_partition_timing_stats(
3267
    const PartitionTimingStats *part_timing_stats, int intra_only,
3268
    int show_frame, const BLOCK_SIZE bsize, const char *filename) {
3269
  FILE *f = fopen(filename, "a");
3270
  fprintf(f, "%d,%d,%d,", bsize, show_frame, intra_only);
3271
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3272
    fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]);
3273
  }
3274
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3275
    fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]);
3276
  }
3277
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3278
    fprintf(f, "%" PRId64 ",", part_timing_stats->partition_times[idx]);
3279
  }
3280
  fprintf(f, "\n");
3281
  fclose(f);
3282
}
3283
3284
static inline void accumulate_partition_timing_stats(
3285
    FramePartitionTimingStats *fr_part_timing_stats,
3286
    const PartitionTimingStats *part_timing_stats, BLOCK_SIZE bsize) {
3287
  const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
3288
  int *agg_attempts = fr_part_timing_stats->partition_attempts[bsize_idx];
3289
  int *agg_decisions = fr_part_timing_stats->partition_decisions[bsize_idx];
3290
  int64_t *agg_times = fr_part_timing_stats->partition_times[bsize_idx];
3291
  for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3292
    agg_attempts[idx] += part_timing_stats->partition_attempts[idx];
3293
    agg_decisions[idx] += part_timing_stats->partition_decisions[idx];
3294
    agg_times[idx] += part_timing_stats->partition_times[idx];
3295
  }
3296
}
3297
#endif  // CONFIG_COLLECT_PARTITION_STATS
3298
3299
// Initialize state variables of partition search used in
3300
// av1_rd_pick_partition().
3301
static void init_partition_search_state_params(
3302
    MACROBLOCK *x, AV1_COMP *const cpi, PartitionSearchState *part_search_state,
3303
8.65M
    int mi_row, int mi_col, BLOCK_SIZE bsize) {
3304
8.65M
  MACROBLOCKD *const xd = &x->e_mbd;
3305
8.65M
  const AV1_COMMON *const cm = &cpi->common;
3306
8.65M
  PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
3307
8.65M
  const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
3308
3309
  // Initialization of block size related parameters.
3310
8.65M
  blk_params->mi_step = mi_size_wide[bsize] / 2;
3311
8.65M
  blk_params->mi_row = mi_row;
3312
8.65M
  blk_params->mi_col = mi_col;
3313
8.65M
  blk_params->mi_row_edge = mi_row + blk_params->mi_step;
3314
8.65M
  blk_params->mi_col_edge = mi_col + blk_params->mi_step;
3315
8.65M
  blk_params->width = block_size_wide[bsize];
3316
8.65M
  blk_params->min_partition_size_1d =
3317
8.65M
      block_size_wide[x->sb_enc.min_partition_size];
3318
8.65M
  blk_params->subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3319
8.65M
  blk_params->split_bsize2 = blk_params->subsize;
3320
8.65M
  blk_params->bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
3321
8.65M
  blk_params->bsize = bsize;
3322
3323
  // Check if the partition corresponds to edge block.
3324
8.65M
  blk_params->has_rows = (blk_params->mi_row_edge < mi_params->mi_rows);
3325
8.65M
  blk_params->has_cols = (blk_params->mi_col_edge < mi_params->mi_cols);
3326
3327
  // Update intra partitioning related info.
3328
8.65M
  part_search_state->intra_part_info = &x->part_search_info;
3329
  // Prepare for segmentation CNN-based partitioning for intra-frame.
3330
8.65M
  if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
3331
162k
    part_search_state->intra_part_info->quad_tree_idx = 0;
3332
162k
    part_search_state->intra_part_info->cnn_output_valid = 0;
3333
162k
  }
3334
3335
  // Set partition plane context index.
3336
8.65M
  part_search_state->pl_ctx_idx =
3337
8.65M
      blk_params->bsize_at_least_8x8
3338
8.65M
          ? partition_plane_context(xd, mi_row, mi_col, bsize)
3339
8.65M
          : 0;
3340
3341
  // Partition cost buffer update
3342
8.65M
  ModeCosts *mode_costs = &x->mode_costs;
3343
8.65M
  part_search_state->partition_cost =
3344
8.65M
      mode_costs->partition_cost[part_search_state->pl_ctx_idx];
3345
3346
  // Initialize HORZ and VERT win flags as true for all split partitions.
3347
43.2M
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
3348
34.6M
    part_search_state->split_part_rect_win[i].rect_part_win[HORZ] = true;
3349
34.6M
    part_search_state->split_part_rect_win[i].rect_part_win[VERT] = true;
3350
34.6M
  }
3351
3352
  // Initialize the rd cost.
3353
8.65M
  av1_init_rd_stats(&part_search_state->this_rdc);
3354
3355
  // Initialize RD costs for partition types to 0.
3356
8.65M
  part_search_state->none_rd = 0;
3357
8.65M
  av1_zero(part_search_state->split_rd);
3358
8.65M
  av1_zero(part_search_state->rect_part_rd);
3359
3360
  // Initialize SPLIT partition to be not ready.
3361
8.65M
  av1_zero(part_search_state->is_split_ctx_is_ready);
3362
  // Initialize HORZ and VERT partitions to be not ready.
3363
8.65M
  av1_zero(part_search_state->is_rect_ctx_is_ready);
3364
3365
  // Chroma subsampling.
3366
8.65M
  part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x;
3367
8.65M
  part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y;
3368
3369
  // Initialize partition search flags to defaults.
3370
8.65M
  part_search_state->terminate_partition_search = 0;
3371
8.65M
  part_search_state->do_square_split = blk_params->bsize_at_least_8x8;
3372
8.65M
  part_search_state->do_rectangular_split =
3373
8.65M
      cpi->oxcf.part_cfg.enable_rect_partitions &&
3374
8.65M
      blk_params->bsize_at_least_8x8;
3375
8.65M
  av1_zero(part_search_state->prune_rect_part);
3376
3377
  // Initialize allowed partition types for the partition block.
3378
8.65M
  part_search_state->partition_none_allowed =
3379
8.65M
      av1_blk_has_rows_and_cols(blk_params);
3380
8.65M
  part_search_state->partition_rect_allowed[HORZ] =
3381
8.65M
      part_search_state->do_rectangular_split && blk_params->has_cols &&
3382
4.60M
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ),
3383
4.60M
                           part_search_state->ss_x,
3384
4.60M
                           part_search_state->ss_y) != BLOCK_INVALID;
3385
8.65M
  part_search_state->partition_rect_allowed[VERT] =
3386
8.65M
      part_search_state->do_rectangular_split && blk_params->has_rows &&
3387
4.58M
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT),
3388
4.58M
                           part_search_state->ss_x,
3389
4.58M
                           part_search_state->ss_y) != BLOCK_INVALID;
3390
3391
  // Reset the flag indicating whether a partition leading to a rdcost lower
3392
  // than the bound best_rdc has been found.
3393
8.65M
  part_search_state->found_best_partition = false;
3394
3395
#if CONFIG_COLLECT_PARTITION_STATS
3396
  init_partition_block_timing_stats(&part_search_state->part_timing_stats);
3397
#endif  // CONFIG_COLLECT_PARTITION_STATS
3398
8.65M
}
3399
3400
// Override partition cost buffer for the edge blocks.
3401
static void set_partition_cost_for_edge_blk(
3402
807k
    AV1_COMMON const *cm, PartitionSearchState *part_search_state) {
3403
807k
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3404
807k
  assert(blk_params.bsize_at_least_8x8 && part_search_state->pl_ctx_idx >= 0);
3405
807k
  const aom_cdf_prob *partition_cdf =
3406
807k
      cm->fc->partition_cdf[part_search_state->pl_ctx_idx];
3407
807k
  const int max_cost = av1_cost_symbol(0);
3408
4.03M
  for (PARTITION_TYPE i = 0; i < PARTITION_TYPES; ++i)
3409
3.23M
    part_search_state->tmp_partition_cost[i] = max_cost;
3410
807k
  if (blk_params.has_cols) {
3411
    // At the bottom, the two possibilities are HORZ and SPLIT.
3412
358k
    aom_cdf_prob bot_cdf[2];
3413
358k
    partition_gather_vert_alike(bot_cdf, partition_cdf, blk_params.bsize);
3414
358k
    static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
3415
358k
    av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, bot_cdf,
3416
358k
                             bot_inv_map);
3417
448k
  } else if (blk_params.has_rows) {
3418
    // At the right, the two possibilities are VERT and SPLIT.
3419
342k
    aom_cdf_prob rhs_cdf[2];
3420
342k
    partition_gather_horz_alike(rhs_cdf, partition_cdf, blk_params.bsize);
3421
342k
    static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
3422
342k
    av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, rhs_cdf,
3423
342k
                             rhs_inv_map);
3424
342k
  } else {
3425
    // At the bottom right, we always split.
3426
106k
    part_search_state->tmp_partition_cost[PARTITION_SPLIT] = 0;
3427
106k
  }
3428
  // Override the partition cost buffer.
3429
807k
  part_search_state->partition_cost = part_search_state->tmp_partition_cost;
3430
807k
}
3431
3432
// Reset the partition search state flags when
3433
// must_find_valid_partition is equal to 1.
3434
static inline void reset_part_limitations(
3435
0
    AV1_COMP *const cpi, PartitionSearchState *part_search_state) {
3436
0
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3437
0
  const int is_rect_part_allowed =
3438
0
      blk_params.bsize_at_least_8x8 &&
3439
0
      cpi->oxcf.part_cfg.enable_rect_partitions &&
3440
0
      (blk_params.width > blk_params.min_partition_size_1d);
3441
0
  part_search_state->do_square_split =
3442
0
      blk_params.bsize_at_least_8x8 &&
3443
0
      (blk_params.width > blk_params.min_partition_size_1d);
3444
0
  part_search_state->partition_none_allowed =
3445
0
      av1_blk_has_rows_and_cols(&blk_params) &&
3446
0
      (blk_params.width >= blk_params.min_partition_size_1d);
3447
0
  part_search_state->partition_rect_allowed[HORZ] =
3448
0
      blk_params.has_cols && is_rect_part_allowed &&
3449
0
      get_plane_block_size(
3450
0
          get_partition_subsize(blk_params.bsize, PARTITION_HORZ),
3451
0
          part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID;
3452
0
  part_search_state->partition_rect_allowed[VERT] =
3453
0
      blk_params.has_rows && is_rect_part_allowed &&
3454
0
      get_plane_block_size(
3455
0
          get_partition_subsize(blk_params.bsize, PARTITION_VERT),
3456
0
          part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID;
3457
0
  part_search_state->terminate_partition_search = 0;
3458
0
}
3459
3460
// Rectangular partitions evaluation at sub-block level.
3461
static void rd_pick_rect_partition(AV1_COMP *const cpi, TileDataEnc *tile_data,
3462
                                   MACROBLOCK *x,
3463
                                   PICK_MODE_CONTEXT *cur_partition_ctx,
3464
                                   PartitionSearchState *part_search_state,
3465
                                   RD_STATS *best_rdc, const int idx,
3466
                                   int mi_row, int mi_col, BLOCK_SIZE bsize,
3467
884k
                                   PARTITION_TYPE partition_type) {
3468
  // Obtain the remainder from the best rd cost
3469
  // for further processing of partition.
3470
884k
  RD_STATS best_remain_rdcost;
3471
884k
  av1_rd_stats_subtraction(x->rdmult, best_rdc, &part_search_state->sum_rdc,
3472
884k
                           &best_remain_rdcost);
3473
3474
  // Obtain the best mode for the partition sub-block.
3475
884k
  pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &part_search_state->this_rdc,
3476
884k
                partition_type, bsize, cur_partition_ctx, best_remain_rdcost);
3477
884k
  av1_rd_cost_update(x->rdmult, &part_search_state->this_rdc);
3478
3479
  // Update the partition rd cost with the current sub-block rd.
3480
884k
  if (part_search_state->this_rdc.rate == INT_MAX) {
3481
339k
    part_search_state->sum_rdc.rdcost = INT64_MAX;
3482
545k
  } else {
3483
545k
    part_search_state->sum_rdc.rate += part_search_state->this_rdc.rate;
3484
545k
    part_search_state->sum_rdc.dist += part_search_state->this_rdc.dist;
3485
545k
    av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc);
3486
545k
  }
3487
884k
  const RECT_PART_TYPE rect_part =
3488
884k
      partition_type == PARTITION_HORZ ? HORZ : VERT;
3489
884k
  part_search_state->rect_part_rd[rect_part][idx] =
3490
884k
      part_search_state->this_rdc.rdcost;
3491
884k
}
3492
3493
typedef int (*active_edge_info)(const AV1_COMP *cpi, int mi_col, int mi_step);
3494
3495
// Checks if HORZ / VERT partition search is allowed.
3496
static inline int is_rect_part_allowed(
3497
    const AV1_COMP *cpi, const PartitionSearchState *part_search_state,
3498
    const active_edge_info *active_edge, RECT_PART_TYPE rect_part,
3499
17.3M
    const int mi_pos) {
3500
17.3M
  const PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
3501
17.3M
  const int is_part_allowed =
3502
17.3M
      (!part_search_state->terminate_partition_search &&
3503
15.9M
       part_search_state->partition_rect_allowed[rect_part] &&
3504
1.48M
       !part_search_state->prune_rect_part[rect_part] &&
3505
1.16M
       (part_search_state->do_rectangular_split ||
3506
827k
        active_edge[rect_part](cpi, mi_pos, blk_params->mi_step)));
3507
17.3M
  return is_part_allowed;
3508
17.3M
}
3509
3510
static void rectangular_partition_search(
3511
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3512
    TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree,
3513
    RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3514
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3515
    RD_RECT_PART_WIN_INFO *rect_part_win_info, const RECT_PART_TYPE start_type,
3516
8.65M
    const RECT_PART_TYPE end_type) {
3517
8.65M
  const AV1_COMMON *const cm = &cpi->common;
3518
8.65M
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3519
8.65M
  RD_STATS *sum_rdc = &part_search_state->sum_rdc;
3520
8.65M
  const int rect_partition_type[NUM_RECT_PARTS] = { PARTITION_HORZ,
3521
8.65M
                                                    PARTITION_VERT };
3522
3523
  // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][0]: mi_row postion of
3524
  //                                           HORZ and VERT partition types.
3525
  // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][1]: mi_col postion of
3526
  //                                           HORZ and VERT partition types.
3527
8.65M
  const int mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][2] = {
3528
8.65M
    { { blk_params.mi_row, blk_params.mi_col },
3529
8.65M
      { blk_params.mi_row_edge, blk_params.mi_col } },
3530
8.65M
    { { blk_params.mi_row, blk_params.mi_col },
3531
8.65M
      { blk_params.mi_row, blk_params.mi_col_edge } }
3532
8.65M
  };
3533
3534
  // Initialize active edge_type function pointer
3535
  // for HOZR and VERT partition types.
3536
8.65M
  active_edge_info active_edge_type[NUM_RECT_PARTS] = { av1_active_h_edge,
3537
8.65M
                                                        av1_active_v_edge };
3538
3539
  // Indicates edge blocks for HORZ and VERT partition types.
3540
8.65M
  const int is_not_edge_block[NUM_RECT_PARTS] = { blk_params.has_rows,
3541
8.65M
                                                  blk_params.has_cols };
3542
3543
  // Initialize pc tree context for HORZ and VERT partition types.
3544
8.65M
  PICK_MODE_CONTEXT **cur_ctx[NUM_RECT_PARTS][SUB_PARTITIONS_RECT] = {
3545
8.65M
    { &pc_tree->horizontal[0], &pc_tree->horizontal[1] },
3546
8.65M
    { &pc_tree->vertical[0], &pc_tree->vertical[1] }
3547
8.65M
  };
3548
3549
  // Loop over rectangular partition types.
3550
25.9M
  for (RECT_PART_TYPE i = start_type; i <= end_type; i++) {
3551
17.3M
    assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
3552
17.3M
                   !part_search_state->partition_rect_allowed[i]));
3553
3554
    // Check if the HORZ / VERT partition search is to be performed.
3555
17.3M
    if (!is_rect_part_allowed(cpi, part_search_state, active_edge_type, i,
3556
17.3M
                              mi_pos_rect[i][0][i]))
3557
16.6M
      continue;
3558
3559
    // Sub-partition idx.
3560
641k
    int sub_part_idx = 0;
3561
641k
    PARTITION_TYPE partition_type = rect_partition_type[i];
3562
641k
    blk_params.subsize =
3563
641k
        get_partition_subsize(blk_params.bsize, partition_type);
3564
641k
    assert(blk_params.subsize <= BLOCK_LARGEST);
3565
641k
    av1_init_rd_stats(sum_rdc);
3566
1.92M
    for (int j = 0; j < SUB_PARTITIONS_RECT; j++) {
3567
1.28M
      if (cur_ctx[i][j][0] == NULL) {
3568
1.28M
        cur_ctx[i][j][0] =
3569
1.28M
            av1_alloc_pmc(cpi, blk_params.subsize, &td->shared_coeff_buf);
3570
1.28M
        if (!cur_ctx[i][j][0])
3571
0
          aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
3572
0
                             "Failed to allocate PICK_MODE_CONTEXT");
3573
1.28M
      }
3574
1.28M
    }
3575
641k
    sum_rdc->rate = part_search_state->partition_cost[partition_type];
3576
641k
    sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, 0);
3577
#if CONFIG_COLLECT_PARTITION_STATS
3578
    PartitionTimingStats *part_timing_stats =
3579
        &part_search_state->part_timing_stats;
3580
    if (best_rdc->rdcost - sum_rdc->rdcost >= 0) {
3581
      start_partition_block_timer(part_timing_stats, partition_type);
3582
    }
3583
#endif
3584
3585
    // First sub-partition evaluation in HORZ / VERT partition type.
3586
641k
    rd_pick_rect_partition(
3587
641k
        cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state,
3588
641k
        best_rdc, 0, mi_pos_rect[i][sub_part_idx][0],
3589
641k
        mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
3590
3591
    // Start of second sub-partition evaluation.
3592
    // Evaluate second sub-partition if the first sub-partition cost
3593
    // is less than the best cost and if it is not an edge block.
3594
641k
    if (sum_rdc->rdcost < best_rdc->rdcost && is_not_edge_block[i]) {
3595
240k
      const MB_MODE_INFO *const mbmi = &cur_ctx[i][sub_part_idx][0]->mic;
3596
240k
      const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3597
      // Neither palette mode nor cfl predicted.
3598
240k
      if (pmi->palette_size[PLANE_TYPE_Y] == 0 &&
3599
240k
          pmi->palette_size[PLANE_TYPE_UV] == 0) {
3600
240k
        if (mbmi->uv_mode != UV_CFL_PRED)
3601
210k
          part_search_state->is_rect_ctx_is_ready[i] = 1;
3602
240k
      }
3603
240k
      av1_update_state(cpi, td, cur_ctx[i][sub_part_idx][0], blk_params.mi_row,
3604
240k
                       blk_params.mi_col, blk_params.subsize, DRY_RUN_NORMAL);
3605
240k
      encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL,
3606
240k
                        blk_params.subsize, NULL);
3607
3608
      // Second sub-partition evaluation in HORZ / VERT partition type.
3609
240k
      sub_part_idx = 1;
3610
240k
      rd_pick_rect_partition(
3611
240k
          cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state,
3612
240k
          best_rdc, 1, mi_pos_rect[i][sub_part_idx][0],
3613
240k
          mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type);
3614
240k
    }
3615
    // Update HORZ / VERT best partition.
3616
641k
    if (sum_rdc->rdcost < best_rdc->rdcost) {
3617
164k
      sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, sum_rdc->dist);
3618
164k
      if (sum_rdc->rdcost < best_rdc->rdcost) {
3619
164k
        *best_rdc = *sum_rdc;
3620
164k
        part_search_state->found_best_partition = true;
3621
164k
        pc_tree->partitioning = partition_type;
3622
164k
      }
3623
476k
    } else {
3624
      // Update HORZ / VERT win flag.
3625
476k
      if (rect_part_win_info != NULL)
3626
450k
        rect_part_win_info->rect_part_win[i] = false;
3627
476k
    }
3628
#if CONFIG_COLLECT_PARTITION_STATS
3629
    if (part_timing_stats->timer_is_on) {
3630
      end_partition_block_timer(part_timing_stats, partition_type,
3631
                                sum_rdc->rdcost);
3632
    }
3633
#endif
3634
641k
    av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
3635
641k
                        blk_params.bsize, av1_num_planes(cm));
3636
641k
  }
3637
8.65M
}
3638
3639
// AB partition type evaluation.
3640
static void rd_pick_ab_part(
3641
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3642
    TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3643
    PC_TREE *pc_tree, PICK_MODE_CONTEXT *dst_ctxs[SUB_PARTITIONS_AB],
3644
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3645
    const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
3646
    const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type,
3647
0
    const MB_MODE_INFO **mode_cache) {
3648
0
  const AV1_COMMON *const cm = &cpi->common;
3649
0
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3650
0
  const int mi_row = blk_params.mi_row;
3651
0
  const int mi_col = blk_params.mi_col;
3652
0
  const BLOCK_SIZE bsize = blk_params.bsize;
3653
0
  int64_t this_rdcost = 0;
3654
3655
#if CONFIG_COLLECT_PARTITION_STATS
3656
  PartitionTimingStats *part_timing_stats =
3657
      &part_search_state->part_timing_stats;
3658
  {
3659
    RD_STATS tmp_sum_rdc;
3660
    av1_init_rd_stats(&tmp_sum_rdc);
3661
    tmp_sum_rdc.rate = part_search_state->partition_cost[part_type];
3662
    tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3663
    if (best_rdc->rdcost - tmp_sum_rdc.rdcost >= 0) {
3664
      start_partition_block_timer(part_timing_stats, part_type);
3665
    }
3666
  }
3667
#endif
3668
3669
  // Test this partition and update the best partition.
3670
0
  const bool find_best_ab_part = rd_test_partition3(
3671
0
      cpi, td, tile_data, tp, pc_tree, best_rdc, &this_rdcost, dst_ctxs, mi_row,
3672
0
      mi_col, bsize, part_type, ab_subsize, ab_mi_pos, mode_cache);
3673
0
  part_search_state->found_best_partition |= find_best_ab_part;
3674
3675
#if CONFIG_COLLECT_PARTITION_STATS
3676
  if (part_timing_stats->timer_is_on) {
3677
    if (!find_best_ab_part) this_rdcost = INT64_MAX;
3678
    end_partition_block_timer(part_timing_stats, part_type, this_rdcost);
3679
  }
3680
#endif
3681
0
  av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
3682
0
}
3683
3684
// Set mode search context.
3685
static inline void set_mode_search_ctx(
3686
    PC_TREE *pc_tree, const int is_ctx_ready[NUM_AB_PARTS][2],
3687
7.96M
    PICK_MODE_CONTEXT **mode_srch_ctx[NUM_AB_PARTS][2]) {
3688
7.96M
  mode_srch_ctx[HORZ_B][0] = &pc_tree->horizontal[0];
3689
7.96M
  mode_srch_ctx[VERT_B][0] = &pc_tree->vertical[0];
3690
3691
7.96M
  if (is_ctx_ready[HORZ_A][0])
3692
1.76M
    mode_srch_ctx[HORZ_A][0] = &pc_tree->split[0]->none;
3693
3694
7.96M
  if (is_ctx_ready[VERT_A][0])
3695
1.76M
    mode_srch_ctx[VERT_A][0] = &pc_tree->split[0]->none;
3696
3697
7.96M
  if (is_ctx_ready[HORZ_A][1])
3698
1.48M
    mode_srch_ctx[HORZ_A][1] = &pc_tree->split[1]->none;
3699
7.96M
}
3700
3701
static inline void copy_partition_mode_from_mode_context(
3702
0
    const MB_MODE_INFO **dst_mode, const PICK_MODE_CONTEXT *ctx) {
3703
0
  if (ctx && ctx->rd_stats.rate < INT_MAX) {
3704
0
    *dst_mode = &ctx->mic;
3705
0
  } else {
3706
0
    *dst_mode = NULL;
3707
0
  }
3708
0
}
3709
3710
static inline void copy_partition_mode_from_pc_tree(
3711
0
    const MB_MODE_INFO **dst_mode, const PC_TREE *pc_tree) {
3712
0
  if (pc_tree) {
3713
0
    copy_partition_mode_from_mode_context(dst_mode, pc_tree->none);
3714
0
  } else {
3715
0
    *dst_mode = NULL;
3716
0
  }
3717
0
}
3718
3719
static inline void set_mode_cache_for_partition_ab(
3720
    const MB_MODE_INFO **mode_cache, const PC_TREE *pc_tree,
3721
0
    AB_PART_TYPE ab_part_type) {
3722
0
  switch (ab_part_type) {
3723
0
    case HORZ_A:
3724
0
      copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]);
3725
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]);
3726
0
      copy_partition_mode_from_mode_context(&mode_cache[2],
3727
0
                                            pc_tree->horizontal[1]);
3728
0
      break;
3729
0
    case HORZ_B:
3730
0
      copy_partition_mode_from_mode_context(&mode_cache[0],
3731
0
                                            pc_tree->horizontal[0]);
3732
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]);
3733
0
      copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]);
3734
0
      break;
3735
0
    case VERT_A:
3736
0
      copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]);
3737
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]);
3738
0
      copy_partition_mode_from_mode_context(&mode_cache[2],
3739
0
                                            pc_tree->vertical[1]);
3740
0
      break;
3741
0
    case VERT_B:
3742
0
      copy_partition_mode_from_mode_context(&mode_cache[0],
3743
0
                                            pc_tree->vertical[0]);
3744
0
      copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]);
3745
0
      copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]);
3746
0
      break;
3747
0
    default: assert(0 && "Invalid ab partition type!\n");
3748
0
  }
3749
0
}
3750
3751
// AB Partitions type search.
3752
static void ab_partitions_search(
3753
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3754
    TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3755
    PC_TREE *pc_tree, PartitionSearchState *part_search_state,
3756
    RD_STATS *best_rdc, RD_RECT_PART_WIN_INFO *rect_part_win_info,
3757
    int pb_source_variance, int ext_partition_allowed,
3758
8.65M
    const AB_PART_TYPE start_type, const AB_PART_TYPE end_type) {
3759
8.65M
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3760
8.65M
  const int mi_row = blk_params.mi_row;
3761
8.65M
  const int mi_col = blk_params.mi_col;
3762
8.65M
  const BLOCK_SIZE bsize = blk_params.bsize;
3763
3764
8.65M
  if (part_search_state->terminate_partition_search) {
3765
690k
    return;
3766
690k
  }
3767
3768
7.96M
  int ab_partitions_allowed[NUM_AB_PARTS];
3769
  // Prune AB partitions
3770
7.96M
  av1_prune_ab_partitions(cpi, x, pc_tree, pb_source_variance, best_rdc->rdcost,
3771
7.96M
                          rect_part_win_info, ext_partition_allowed,
3772
7.96M
                          part_search_state, ab_partitions_allowed);
3773
3774
  // Flags to indicate whether the mode search is done.
3775
7.96M
  const int is_ctx_ready[NUM_AB_PARTS][2] = {
3776
7.96M
    { part_search_state->is_split_ctx_is_ready[0],
3777
7.96M
      part_search_state->is_split_ctx_is_ready[1] },
3778
7.96M
    { part_search_state->is_rect_ctx_is_ready[HORZ], 0 },
3779
7.96M
    { part_search_state->is_split_ctx_is_ready[0], 0 },
3780
7.96M
    { part_search_state->is_rect_ctx_is_ready[VERT], 0 }
3781
7.96M
  };
3782
3783
  // Current partition context.
3784
7.96M
  PICK_MODE_CONTEXT **cur_part_ctxs[NUM_AB_PARTS] = { pc_tree->horizontala,
3785
7.96M
                                                      pc_tree->horizontalb,
3786
7.96M
                                                      pc_tree->verticala,
3787
7.96M
                                                      pc_tree->verticalb };
3788
3789
  // Context of already evaluted partition types.
3790
7.96M
  PICK_MODE_CONTEXT **mode_srch_ctx[NUM_AB_PARTS][2];
3791
  // Set context of already evaluted partition types.
3792
7.96M
  set_mode_search_ctx(pc_tree, is_ctx_ready, mode_srch_ctx);
3793
3794
  // Array of sub-partition size of AB partition types.
3795
7.96M
  const BLOCK_SIZE ab_subsize[NUM_AB_PARTS][SUB_PARTITIONS_AB] = {
3796
7.96M
    { blk_params.split_bsize2, blk_params.split_bsize2,
3797
7.96M
      get_partition_subsize(bsize, PARTITION_HORZ_A) },
3798
7.96M
    { get_partition_subsize(bsize, PARTITION_HORZ_B), blk_params.split_bsize2,
3799
7.96M
      blk_params.split_bsize2 },
3800
7.96M
    { blk_params.split_bsize2, blk_params.split_bsize2,
3801
7.96M
      get_partition_subsize(bsize, PARTITION_VERT_A) },
3802
7.96M
    { get_partition_subsize(bsize, PARTITION_VERT_B), blk_params.split_bsize2,
3803
7.96M
      blk_params.split_bsize2 }
3804
7.96M
  };
3805
3806
  // Array of mi_row, mi_col positions corresponds to each sub-partition in AB
3807
  // partition types.
3808
7.96M
  const int ab_mi_pos[NUM_AB_PARTS][SUB_PARTITIONS_AB][2] = {
3809
7.96M
    { { mi_row, mi_col },
3810
7.96M
      { mi_row, blk_params.mi_col_edge },
3811
7.96M
      { blk_params.mi_row_edge, mi_col } },
3812
7.96M
    { { mi_row, mi_col },
3813
7.96M
      { blk_params.mi_row_edge, mi_col },
3814
7.96M
      { blk_params.mi_row_edge, blk_params.mi_col_edge } },
3815
7.96M
    { { mi_row, mi_col },
3816
7.96M
      { blk_params.mi_row_edge, mi_col },
3817
7.96M
      { mi_row, blk_params.mi_col_edge } },
3818
7.96M
    { { mi_row, mi_col },
3819
7.96M
      { mi_row, blk_params.mi_col_edge },
3820
7.96M
      { blk_params.mi_row_edge, blk_params.mi_col_edge } }
3821
7.96M
  };
3822
3823
  // Loop over AB partition types.
3824
39.8M
  for (AB_PART_TYPE ab_part_type = start_type; ab_part_type <= end_type;
3825
31.8M
       ab_part_type++) {
3826
31.8M
    const PARTITION_TYPE part_type = ab_part_type + PARTITION_HORZ_A;
3827
3828
    // Check if the AB partition search is to be performed.
3829
31.8M
    if (!ab_partitions_allowed[ab_part_type]) {
3830
31.8M
      continue;
3831
31.8M
    }
3832
3833
18.4E
    blk_params.subsize = get_partition_subsize(bsize, part_type);
3834
18.4E
    for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
3835
      // Set AB partition context.
3836
0
      cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc(
3837
0
          cpi, ab_subsize[ab_part_type][i], &td->shared_coeff_buf);
3838
0
      if (!cur_part_ctxs[ab_part_type][i])
3839
0
        aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
3840
0
                           "Failed to allocate PICK_MODE_CONTEXT");
3841
      // Set mode as not ready.
3842
0
      cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
3843
0
    }
3844
3845
18.4E
    if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab) {
3846
      // We can copy directly the mode search results if we have already
3847
      // searched the current block and the contexts match.
3848
0
      if (is_ctx_ready[ab_part_type][0]) {
3849
0
        av1_copy_tree_context(cur_part_ctxs[ab_part_type][0],
3850
0
                              mode_srch_ctx[ab_part_type][0][0]);
3851
0
        cur_part_ctxs[ab_part_type][0]->mic.partition = part_type;
3852
0
        cur_part_ctxs[ab_part_type][0]->rd_mode_is_ready = 1;
3853
0
        if (is_ctx_ready[ab_part_type][1]) {
3854
0
          av1_copy_tree_context(cur_part_ctxs[ab_part_type][1],
3855
0
                                mode_srch_ctx[ab_part_type][1][0]);
3856
0
          cur_part_ctxs[ab_part_type][1]->mic.partition = part_type;
3857
0
          cur_part_ctxs[ab_part_type][1]->rd_mode_is_ready = 1;
3858
0
        }
3859
0
      }
3860
0
    }
3861
3862
    // Even if the contexts don't match, we can still speed up by reusing the
3863
    // previous prediction mode.
3864
18.4E
    const MB_MODE_INFO *mode_cache[3] = { NULL, NULL, NULL };
3865
18.4E
    if (cpi->sf.part_sf.reuse_best_prediction_for_part_ab) {
3866
0
      set_mode_cache_for_partition_ab(mode_cache, pc_tree, ab_part_type);
3867
0
    }
3868
3869
    // Evaluation of AB partition type.
3870
18.4E
    rd_pick_ab_part(cpi, td, tile_data, tp, x, x_ctx, pc_tree,
3871
18.4E
                    cur_part_ctxs[ab_part_type], part_search_state, best_rdc,
3872
18.4E
                    ab_subsize[ab_part_type], ab_mi_pos[ab_part_type],
3873
18.4E
                    part_type, mode_cache);
3874
18.4E
  }
3875
7.96M
}
3876
3877
// Set mi positions for HORZ4 / VERT4 sub-block partitions.
3878
static void set_mi_pos_partition4(const int inc_step[NUM_PART4_TYPES],
3879
                                  int mi_pos[SUB_PARTITIONS_PART4][2],
3880
0
                                  const int mi_row, const int mi_col) {
3881
0
  for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; i++) {
3882
0
    mi_pos[i][0] = mi_row + i * inc_step[HORZ4];
3883
0
    mi_pos[i][1] = mi_col + i * inc_step[VERT4];
3884
0
  }
3885
0
}
3886
3887
// Set context and RD cost for HORZ4 / VERT4 partition types.
3888
static void set_4_part_ctx_and_rdcost(
3889
    MACROBLOCK *x, const AV1_COMP *const cpi, ThreadData *td,
3890
    PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4],
3891
    PartitionSearchState *part_search_state, PARTITION_TYPE partition_type,
3892
0
    BLOCK_SIZE bsize) {
3893
  // Initialize sum_rdc RD cost structure.
3894
0
  av1_init_rd_stats(&part_search_state->sum_rdc);
3895
0
  const int subsize = get_partition_subsize(bsize, partition_type);
3896
0
  part_search_state->sum_rdc.rate =
3897
0
      part_search_state->partition_cost[partition_type];
3898
0
  part_search_state->sum_rdc.rdcost =
3899
0
      RDCOST(x->rdmult, part_search_state->sum_rdc.rate, 0);
3900
0
  for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) {
3901
0
    cur_part_ctx[i] = av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
3902
0
    if (!cur_part_ctx[i])
3903
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
3904
0
                         "Failed to allocate PICK_MODE_CONTEXT");
3905
0
  }
3906
0
}
3907
3908
// Partition search of HORZ4 / VERT4 partition types.
3909
static void rd_pick_4partition(
3910
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
3911
    TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
3912
    PC_TREE *pc_tree, PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4],
3913
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
3914
0
    const int inc_step[NUM_PART4_TYPES], PARTITION_TYPE partition_type) {
3915
0
  const AV1_COMMON *const cm = &cpi->common;
3916
0
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
3917
  // mi positions needed for HORZ4 and VERT4 partition types.
3918
0
  int mi_pos_check[NUM_PART4_TYPES] = { cm->mi_params.mi_rows,
3919
0
                                        cm->mi_params.mi_cols };
3920
0
  const PART4_TYPES part4_idx = (partition_type != PARTITION_HORZ_4);
3921
0
  int mi_pos[SUB_PARTITIONS_PART4][2];
3922
3923
0
  blk_params.subsize = get_partition_subsize(blk_params.bsize, partition_type);
3924
  // Set partition context and RD cost.
3925
0
  set_4_part_ctx_and_rdcost(x, cpi, td, cur_part_ctx, part_search_state,
3926
0
                            partition_type, blk_params.bsize);
3927
  // Set mi positions for sub-block sizes.
3928
0
  set_mi_pos_partition4(inc_step, mi_pos, blk_params.mi_row, blk_params.mi_col);
3929
#if CONFIG_COLLECT_PARTITION_STATS
3930
  PartitionTimingStats *part_timing_stats =
3931
      &part_search_state->part_timing_stats;
3932
  if (best_rdc->rdcost - part_search_state->sum_rdc.rdcost >= 0) {
3933
    start_partition_block_timer(part_timing_stats, partition_type);
3934
  }
3935
#endif
3936
  // Loop over sub-block partitions.
3937
0
  for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) {
3938
0
    if (i > 0 && mi_pos[i][part4_idx] >= mi_pos_check[part4_idx]) break;
3939
3940
    // Sub-block evaluation of Horz4 / Vert4 partition type.
3941
0
    cur_part_ctx[i]->rd_mode_is_ready = 0;
3942
0
    if (!rd_try_subblock(
3943
0
            cpi, td, tile_data, tp, (i == SUB_PARTITIONS_PART4 - 1),
3944
0
            mi_pos[i][0], mi_pos[i][1], blk_params.subsize, *best_rdc,
3945
0
            &part_search_state->sum_rdc, partition_type, cur_part_ctx[i])) {
3946
0
      av1_invalid_rd_stats(&part_search_state->sum_rdc);
3947
0
      break;
3948
0
    }
3949
0
  }
3950
3951
  // Calculate the total cost and update the best partition.
3952
0
  av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc);
3953
0
  if (part_search_state->sum_rdc.rdcost < best_rdc->rdcost) {
3954
0
    *best_rdc = part_search_state->sum_rdc;
3955
0
    part_search_state->found_best_partition = true;
3956
0
    pc_tree->partitioning = partition_type;
3957
0
  }
3958
#if CONFIG_COLLECT_PARTITION_STATS
3959
  if (part_timing_stats->timer_is_on) {
3960
    end_partition_block_timer(part_timing_stats, partition_type,
3961
                              part_search_state->sum_rdc.rdcost);
3962
  }
3963
#endif
3964
0
  av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col,
3965
0
                      blk_params.bsize, av1_num_planes(cm));
3966
0
}
3967
3968
// Do not evaluate extended partitions if NONE partition is skippable.
3969
static inline int prune_ext_part_none_skippable(
3970
    PICK_MODE_CONTEXT *part_none, int must_find_valid_partition,
3971
8.65M
    int skip_non_sq_part_based_on_none, BLOCK_SIZE bsize) {
3972
8.65M
  if ((skip_non_sq_part_based_on_none >= 1) && (part_none != NULL)) {
3973
366k
    if (part_none->skippable && !must_find_valid_partition &&
3974
596
        bsize >= BLOCK_16X16) {
3975
10
      return 1;
3976
10
    }
3977
366k
  }
3978
8.65M
  return 0;
3979
8.65M
}
3980
3981
// Allow ab partition search
3982
static int allow_ab_partition_search(PartitionSearchState *part_search_state,
3983
                                     PARTITION_SPEED_FEATURES *part_sf,
3984
                                     PARTITION_TYPE curr_best_part,
3985
                                     int must_find_valid_partition,
3986
                                     int prune_ext_part_state,
3987
8.65M
                                     int64_t best_rdcost) {
3988
8.65M
  const PartitionBlkParams blk_params = part_search_state->part_blk_params;
3989
8.65M
  const BLOCK_SIZE bsize = blk_params.bsize;
3990
3991
  // Do not prune if there is no valid partition
3992
8.65M
  if (best_rdcost == INT64_MAX) return 1;
3993
3994
  // Determine bsize threshold to evaluate ab partitions
3995
8.65M
  BLOCK_SIZE ab_bsize_thresh = part_sf->ext_partition_eval_thresh;
3996
8.65M
  if (part_sf->ext_part_eval_based_on_cur_best && !must_find_valid_partition &&
3997
0
      !(curr_best_part == PARTITION_HORZ || curr_best_part == PARTITION_VERT))
3998
0
    ab_bsize_thresh = BLOCK_128X128;
3999
4000
  // ab partitions are only allowed for square block sizes BLOCK_16X16 or
4001
  // higher, so ab_bsize_thresh must be large enough to exclude BLOCK_4X4 and
4002
  // BLOCK_8X8.
4003
8.65M
  assert(ab_bsize_thresh >= BLOCK_8X8);
4004
4005
8.65M
  int ab_partition_allowed =
4006
8.65M
      part_search_state->do_rectangular_split && bsize > ab_bsize_thresh &&
4007
0
      av1_blk_has_rows_and_cols(&blk_params) && !prune_ext_part_state;
4008
4009
8.65M
  return ab_partition_allowed;
4010
8.65M
}
4011
4012
// Prune 4-way partitions based on the number of horz/vert wins
4013
// in the current block and sub-blocks in PARTITION_SPLIT.
4014
static void prune_4_partition_using_split_info(
4015
    AV1_COMP *const cpi, MACROBLOCK *x, PartitionSearchState *part_search_state,
4016
696k
    int part4_search_allowed[NUM_PART4_TYPES]) {
4017
696k
  PART4_TYPES cur_part[NUM_PART4_TYPES] = { HORZ4, VERT4 };
4018
  // Count of child blocks in which HORZ or VERT partition has won
4019
696k
  int num_child_rect_win[NUM_RECT_PARTS] = { 0, 0 };
4020
  // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of
4021
  // split partiitons.
4022
  // Conservative pruning for high quantizers.
4023
696k
  const int num_win_thresh = AOMMIN(3 * (MAXQ - x->qindex) / MAXQ + 1, 3);
4024
4025
2.09M
  for (RECT_PART_TYPE i = HORZ; i < NUM_RECT_PARTS; i++) {
4026
1.39M
    if (!(cpi->sf.part_sf.prune_ext_part_using_split_info &&
4027
1.39M
          part4_search_allowed[cur_part[i]]))
4028
1.39M
      continue;
4029
    // Loop over split partitions.
4030
    // Get rectangular partitions winner info of split partitions.
4031
18.4E
    for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; idx++)
4032
0
      num_child_rect_win[i] +=
4033
0
          (part_search_state->split_part_rect_win[idx].rect_part_win[i]) ? 1
4034
0
                                                                         : 0;
4035
18.4E
    if (num_child_rect_win[i] < num_win_thresh) {
4036
0
      part4_search_allowed[cur_part[i]] = 0;
4037
0
    }
4038
18.4E
  }
4039
696k
}
4040
4041
// Prune 4-way partition search.
4042
static void prune_4_way_partition_search(
4043
    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree,
4044
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
4045
    int pb_source_variance, int prune_ext_part_state,
4046
8.65M
    int part4_search_allowed[NUM_PART4_TYPES]) {
4047
8.65M
  const PartitionBlkParams blk_params = part_search_state->part_blk_params;
4048
8.65M
  const BLOCK_SIZE bsize = blk_params.bsize;
4049
4050
8.65M
  const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg;
4051
4052
  // Do not prune if there is no valid partition
4053
8.65M
  if (best_rdc->rdcost == INT64_MAX && part_cfg->enable_1to4_partitions &&
4054
0
      bsize != BLOCK_128X128)
4055
0
    return;
4056
4057
  // Determine bsize threshold to evaluate 4-way partitions
4058
8.65M
  BLOCK_SIZE part4_bsize_thresh = cpi->sf.part_sf.ext_partition_eval_thresh;
4059
8.65M
  if (cpi->sf.part_sf.ext_part_eval_based_on_cur_best &&
4060
0
      !x->must_find_valid_partition && pc_tree->partitioning == PARTITION_NONE)
4061
0
    part4_bsize_thresh = BLOCK_128X128;
4062
4063
  // 4-way partitions are only allowed for BLOCK_16X16, BLOCK_32X32, and
4064
  // BLOCK_64X64, so part4_bsize_thresh must be large enough to exclude
4065
  // BLOCK_4X4 and BLOCK_8X8.
4066
8.65M
  assert(part4_bsize_thresh >= BLOCK_8X8);
4067
4068
8.65M
  bool partition4_allowed =
4069
8.65M
      part_search_state->do_rectangular_split && bsize > part4_bsize_thresh &&
4070
0
      av1_blk_has_rows_and_cols(&blk_params) && !prune_ext_part_state;
4071
4072
  // Disable 4-way partition search flags for width less than a multiple of the
4073
  // minimum partition width.
4074
8.65M
  if (blk_params.width < (blk_params.min_partition_size_1d
4075
8.65M
                          << cpi->sf.part_sf.prune_part4_search)) {
4076
7.95M
    part4_search_allowed[HORZ4] = 0;
4077
7.95M
    part4_search_allowed[VERT4] = 0;
4078
7.95M
    return;
4079
7.95M
  }
4080
4081
696k
  PARTITION_TYPE cur_part[NUM_PART4_TYPES] = { PARTITION_HORZ_4,
4082
696k
                                               PARTITION_VERT_4 };
4083
  // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
4084
  // PARTITION_VERT_4 for this block. This is almost the same as
4085
  // partition4_allowed, except that we don't allow 128x32 or 32x128
4086
  // blocks, so we require that bsize is not BLOCK_128X128.
4087
696k
  partition4_allowed &=
4088
696k
      part_cfg->enable_1to4_partitions && bsize != BLOCK_128X128;
4089
4090
2.09M
  for (PART4_TYPES i = HORZ4; i < NUM_PART4_TYPES; i++) {
4091
1.39M
    part4_search_allowed[i] =
4092
1.39M
        partition4_allowed && part_search_state->partition_rect_allowed[i] &&
4093
0
        get_plane_block_size(get_partition_subsize(bsize, cur_part[i]),
4094
0
                             part_search_state->ss_x,
4095
0
                             part_search_state->ss_y) != BLOCK_INVALID;
4096
1.39M
  }
4097
  // Pruning: pruning out 4-way partitions based on the current best partition.
4098
696k
  if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) {
4099
0
    part4_search_allowed[HORZ4] &= (pc_tree->partitioning == PARTITION_HORZ ||
4100
0
                                    pc_tree->partitioning == PARTITION_HORZ_A ||
4101
0
                                    pc_tree->partitioning == PARTITION_HORZ_B ||
4102
0
                                    pc_tree->partitioning == PARTITION_SPLIT ||
4103
0
                                    pc_tree->partitioning == PARTITION_NONE);
4104
0
    part4_search_allowed[VERT4] &= (pc_tree->partitioning == PARTITION_VERT ||
4105
0
                                    pc_tree->partitioning == PARTITION_VERT_A ||
4106
0
                                    pc_tree->partitioning == PARTITION_VERT_B ||
4107
0
                                    pc_tree->partitioning == PARTITION_SPLIT ||
4108
0
                                    pc_tree->partitioning == PARTITION_NONE);
4109
0
  }
4110
4111
  // Pruning: pruning out some 4-way partitions using a DNN taking rd costs of
4112
  // sub-blocks from basic partition types.
4113
696k
  if (cpi->sf.part_sf.ml_prune_partition && partition4_allowed &&
4114
0
      part_search_state->partition_rect_allowed[HORZ] &&
4115
0
      part_search_state->partition_rect_allowed[VERT]) {
4116
0
    av1_ml_prune_4_partition(cpi, x, pc_tree->partitioning, best_rdc->rdcost,
4117
0
                             part_search_state, part4_search_allowed,
4118
0
                             pb_source_variance);
4119
0
  }
4120
4121
  // Pruning: pruning out 4-way partitions based on the number of horz/vert wins
4122
  // in the current block and sub-blocks in PARTITION_SPLIT.
4123
696k
  prune_4_partition_using_split_info(cpi, x, part_search_state,
4124
696k
                                     part4_search_allowed);
4125
696k
}
4126
4127
// Set params needed for PARTITION_NONE search.
4128
static void set_none_partition_params(const AV1_COMP *const cpi, ThreadData *td,
4129
                                      MACROBLOCK *x, PC_TREE *pc_tree,
4130
                                      PartitionSearchState *part_search_state,
4131
                                      RD_STATS *best_remain_rdcost,
4132
7.48M
                                      RD_STATS *best_rdc, int *pt_cost) {
4133
7.48M
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
4134
7.48M
  RD_STATS partition_rdcost;
4135
  // Set PARTITION_NONE context.
4136
7.48M
  if (pc_tree->none == NULL)
4137
7.48M
    pc_tree->none = av1_alloc_pmc(cpi, blk_params.bsize, &td->shared_coeff_buf);
4138
7.48M
  if (!pc_tree->none)
4139
0
    aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
4140
0
                       "Failed to allocate PICK_MODE_CONTEXT");
4141
4142
  // Set PARTITION_NONE type cost.
4143
7.48M
  if (part_search_state->partition_none_allowed) {
4144
7.48M
    if (blk_params.bsize_at_least_8x8) {
4145
3.88M
      *pt_cost = part_search_state->partition_cost[PARTITION_NONE] < INT_MAX
4146
3.88M
                     ? part_search_state->partition_cost[PARTITION_NONE]
4147
18.4E
                     : 0;
4148
3.88M
    }
4149
4150
    // Initialize the RD stats structure.
4151
7.48M
    av1_init_rd_stats(&partition_rdcost);
4152
7.48M
    partition_rdcost.rate = *pt_cost;
4153
7.48M
    av1_rd_cost_update(x->rdmult, &partition_rdcost);
4154
7.48M
    av1_rd_stats_subtraction(x->rdmult, best_rdc, &partition_rdcost,
4155
7.48M
                             best_remain_rdcost);
4156
7.48M
  }
4157
7.48M
}
4158
4159
// Skip other partitions based on PARTITION_NONE rd cost.
4160
static void prune_partitions_after_none(AV1_COMP *const cpi, MACROBLOCK *x,
4161
                                        SIMPLE_MOTION_DATA_TREE *sms_tree,
4162
                                        PICK_MODE_CONTEXT *ctx_none,
4163
                                        PartitionSearchState *part_search_state,
4164
                                        RD_STATS *best_rdc,
4165
6.45M
                                        unsigned int *pb_source_variance) {
4166
6.45M
  const AV1_COMMON *const cm = &cpi->common;
4167
6.45M
  MACROBLOCKD *const xd = &x->e_mbd;
4168
6.45M
  const PartitionBlkParams blk_params = part_search_state->part_blk_params;
4169
6.45M
  RD_STATS *this_rdc = &part_search_state->this_rdc;
4170
6.45M
  const BLOCK_SIZE bsize = blk_params.bsize;
4171
6.45M
  assert(bsize < BLOCK_SIZES_ALL);
4172
4173
6.45M
  if (!frame_is_intra_only(cm) &&
4174
931k
      (part_search_state->do_square_split ||
4175
918k
       part_search_state->do_rectangular_split) &&
4176
13.3k
      !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
4177
81
    const int use_ml_based_breakout =
4178
81
        bsize <= cpi->sf.part_sf.use_square_partition_only_threshold &&
4179
9
        bsize > BLOCK_4X4 && cpi->sf.part_sf.ml_predict_breakout_level >= 1;
4180
81
    if (use_ml_based_breakout) {
4181
9
      av1_ml_predict_breakout(cpi, x, this_rdc, *pb_source_variance, xd->bd,
4182
9
                              part_search_state);
4183
9
    }
4184
4185
    // Adjust dist breakout threshold according to the partition size.
4186
81
    const int64_t dist_breakout_thr =
4187
81
        cpi->sf.part_sf.partition_search_breakout_dist_thr >>
4188
81
        ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
4189
81
         (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
4190
81
    const int rate_breakout_thr =
4191
81
        cpi->sf.part_sf.partition_search_breakout_rate_thr *
4192
81
        num_pels_log2_lookup[bsize];
4193
    // If all y, u, v transform blocks in this partition are skippable,
4194
    // and the dist & rate are within the thresholds, the partition
4195
    // search is terminated for current branch of the partition search
4196
    // tree. The dist & rate thresholds are set to 0 at speed 0 to
4197
    // disable the early termination at that speed.
4198
81
    if (best_rdc->dist < dist_breakout_thr &&
4199
0
        best_rdc->rate < rate_breakout_thr) {
4200
0
      part_search_state->do_square_split = 0;
4201
0
      part_search_state->do_rectangular_split = 0;
4202
0
    }
4203
81
  }
4204
4205
  // Early termination: using simple_motion_search features and the
4206
  // rate, distortion, and rdcost of PARTITION_NONE, a DNN will make a
4207
  // decision on early terminating at PARTITION_NONE.
4208
6.45M
  if (cpi->sf.part_sf.simple_motion_search_early_term_none && cm->show_frame &&
4209
6.45M
      !frame_is_intra_only(cm) && bsize >= BLOCK_16X16 &&
4210
13.3k
      av1_blk_has_rows_and_cols(&blk_params) && this_rdc->rdcost < INT64_MAX &&
4211
13.3k
      this_rdc->rdcost >= 0 && this_rdc->rate < INT_MAX &&
4212
13.3k
      this_rdc->rate >= 0 &&
4213
13.3k
      (part_search_state->do_square_split ||
4214
13.3k
       part_search_state->do_rectangular_split)) {
4215
13.3k
    av1_simple_motion_search_early_term_none(cpi, x, sms_tree, this_rdc,
4216
13.3k
                                             part_search_state);
4217
13.3k
  }
4218
6.45M
}
4219
4220
// Decide early termination and rectangular partition pruning
4221
// based on PARTITION_NONE and PARTITION_SPLIT costs.
4222
static void prune_partitions_after_split(
4223
    AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
4224
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
4225
8.65M
    int64_t part_none_rd, int64_t part_split_rd) {
4226
8.65M
  const AV1_COMMON *const cm = &cpi->common;
4227
8.65M
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
4228
8.65M
  const int mi_row = blk_params.mi_row;
4229
8.65M
  const int mi_col = blk_params.mi_col;
4230
8.65M
  const BLOCK_SIZE bsize = blk_params.bsize;
4231
8.65M
  assert(bsize < BLOCK_SIZES_ALL);
4232
4233
  // Early termination: using the rd costs of PARTITION_NONE and subblocks
4234
  // from PARTITION_SPLIT to determine an early breakout.
4235
8.65M
  if (cpi->sf.part_sf.ml_early_term_after_part_split_level &&
4236
0
      !frame_is_intra_only(cm) &&
4237
0
      !part_search_state->terminate_partition_search &&
4238
0
      part_search_state->do_rectangular_split &&
4239
0
      (part_search_state->partition_rect_allowed[HORZ] ||
4240
0
       part_search_state->partition_rect_allowed[VERT])) {
4241
0
    av1_ml_early_term_after_split(
4242
0
        cpi, x, sms_tree, best_rdc->rdcost, part_none_rd, part_split_rd,
4243
0
        part_search_state->split_rd, part_search_state);
4244
0
  }
4245
4246
  // Use the rd costs of PARTITION_NONE and subblocks from PARTITION_SPLIT
4247
  // to prune out rectangular partitions in some directions.
4248
8.65M
  if (!cpi->sf.part_sf.ml_early_term_after_part_split_level &&
4249
8.65M
      cpi->sf.part_sf.ml_prune_partition && !frame_is_intra_only(cm) &&
4250
1.33M
      (part_search_state->partition_rect_allowed[HORZ] ||
4251
1.27M
       part_search_state->partition_rect_allowed[VERT]) &&
4252
133k
      !(part_search_state->prune_rect_part[HORZ] ||
4253
10
        part_search_state->prune_rect_part[VERT]) &&
4254
0
      !part_search_state->terminate_partition_search) {
4255
0
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, av1_num_planes(cm),
4256
0
                         bsize);
4257
0
    av1_ml_prune_rect_partition(cpi, x, best_rdc->rdcost,
4258
0
                                part_search_state->none_rd,
4259
0
                                part_search_state->split_rd, part_search_state);
4260
0
  }
4261
8.65M
}
4262
4263
// Returns true if either of the left and top neighbor blocks is larger than
4264
// the current block; false otherwise.
4265
static inline bool is_neighbor_blk_larger_than_cur_blk(const MACROBLOCKD *xd,
4266
4.13M
                                                       BLOCK_SIZE bsize) {
4267
4.13M
  const int cur_blk_area = (block_size_high[bsize] * block_size_wide[bsize]);
4268
4.13M
  if (xd->left_available) {
4269
3.25M
    const BLOCK_SIZE left_bsize = xd->left_mbmi->bsize;
4270
3.25M
    if (block_size_high[left_bsize] * block_size_wide[left_bsize] >
4271
3.25M
        cur_blk_area)
4272
1.13M
      return true;
4273
3.25M
  }
4274
4275
3.00M
  if (xd->up_available) {
4276
2.35M
    const BLOCK_SIZE above_bsize = xd->above_mbmi->bsize;
4277
2.35M
    if (block_size_high[above_bsize] * block_size_wide[above_bsize] >
4278
2.35M
        cur_blk_area)
4279
876k
      return true;
4280
2.35M
  }
4281
2.12M
  return false;
4282
3.00M
}
4283
4284
static inline void prune_rect_part_using_none_pred_mode(
4285
    const MACROBLOCKD *xd, PartitionSearchState *part_state,
4286
4.91M
    PREDICTION_MODE mode, BLOCK_SIZE bsize) {
4287
4.91M
  if (mode == DC_PRED || mode == SMOOTH_PRED) {
4288
    // If the prediction mode of NONE partition is either DC_PRED or
4289
    // SMOOTH_PRED, it indicates that the current block has less variation. In
4290
    // this case, HORZ and VERT partitions are pruned if at least one of left
4291
    // and top neighbor blocks is larger than the current block.
4292
4.13M
    if (is_neighbor_blk_larger_than_cur_blk(xd, bsize)) {
4293
2.01M
      part_state->prune_rect_part[HORZ] = 1;
4294
2.01M
      part_state->prune_rect_part[VERT] = 1;
4295
2.01M
    }
4296
4.13M
  } else if (mode == D67_PRED || mode == V_PRED || mode == D113_PRED) {
4297
    // If the prediction mode chosen by NONE partition is close to 90 degrees,
4298
    // it implies a dominant vertical pattern, and the chance of choosing a
4299
    // vertical rectangular partition is high. Hence, horizontal partition is
4300
    // pruned in these cases.
4301
206k
    part_state->prune_rect_part[HORZ] = 1;
4302
571k
  } else if (mode == D157_PRED || mode == H_PRED || mode == D203_PRED) {
4303
    // If the prediction mode chosen by NONE partition is close to 180 degrees,
4304
    // it implies a dominant horizontal pattern, and the chance of choosing a
4305
    // horizontal rectangular partition is high. Hence, vertical partition is
4306
    // pruned in these cases.
4307
364k
    part_state->prune_rect_part[VERT] = 1;
4308
364k
  }
4309
4.91M
}
4310
4311
// PARTITION_NONE search.
4312
static void none_partition_search(
4313
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, MACROBLOCK *x,
4314
    PC_TREE *pc_tree, SIMPLE_MOTION_DATA_TREE *sms_tree,
4315
    RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
4316
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
4317
8.65M
    unsigned int *pb_source_variance, int64_t *none_rd, int64_t *part_none_rd) {
4318
8.65M
  const AV1_COMMON *const cm = &cpi->common;
4319
8.65M
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
4320
8.65M
  RD_STATS *this_rdc = &part_search_state->this_rdc;
4321
8.65M
  const int mi_row = blk_params.mi_row;
4322
8.65M
  const int mi_col = blk_params.mi_col;
4323
8.65M
  const BLOCK_SIZE bsize = blk_params.bsize;
4324
8.65M
  assert(bsize < BLOCK_SIZES_ALL);
4325
4326
8.65M
  if (part_search_state->terminate_partition_search ||
4327
8.65M
      !part_search_state->partition_none_allowed)
4328
1.17M
    return;
4329
4330
7.48M
  int pt_cost = 0;
4331
7.48M
  RD_STATS best_remain_rdcost;
4332
7.48M
  av1_invalid_rd_stats(&best_remain_rdcost);
4333
4334
  // Set PARTITION_NONE context and cost.
4335
7.48M
  set_none_partition_params(cpi, td, x, pc_tree, part_search_state,
4336
7.48M
                            &best_remain_rdcost, best_rdc, &pt_cost);
4337
4338
#if CONFIG_COLLECT_PARTITION_STATS
4339
  // Timer start for partition None.
4340
  PartitionTimingStats *part_timing_stats =
4341
      &part_search_state->part_timing_stats;
4342
  if (best_remain_rdcost.rdcost >= 0) {
4343
    start_partition_block_timer(part_timing_stats, PARTITION_NONE);
4344
  }
4345
#endif
4346
  // PARTITION_NONE evaluation and cost update.
4347
7.48M
  pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, PARTITION_NONE,
4348
7.48M
                bsize, pc_tree->none, best_remain_rdcost);
4349
4350
7.48M
  av1_rd_cost_update(x->rdmult, this_rdc);
4351
4352
#if CONFIG_COLLECT_PARTITION_STATS
4353
  // Timer end for partition None.
4354
  if (part_timing_stats->timer_is_on) {
4355
    RD_STATS tmp_rdc;
4356
    av1_init_rd_stats(&tmp_rdc);
4357
    if (this_rdc->rate != INT_MAX) {
4358
      tmp_rdc.rate = this_rdc->rate;
4359
      tmp_rdc.dist = this_rdc->dist;
4360
      tmp_rdc.rdcost = this_rdc->rdcost;
4361
      if (blk_params.bsize_at_least_8x8) {
4362
        tmp_rdc.rate += pt_cost;
4363
        tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist);
4364
      }
4365
    }
4366
    end_partition_block_timer(part_timing_stats, PARTITION_NONE,
4367
                              tmp_rdc.rdcost);
4368
  }
4369
#endif
4370
7.48M
  *pb_source_variance = x->source_variance;
4371
7.48M
  if (none_rd) *none_rd = this_rdc->rdcost;
4372
7.48M
  part_search_state->none_rd = this_rdc->rdcost;
4373
7.48M
  if (this_rdc->rate != INT_MAX) {
4374
    // Record picked ref frame to prune ref frames for other partition types.
4375
6.79M
    if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions) {
4376
931k
      const int ref_type = av1_ref_frame_type(pc_tree->none->mic.ref_frame);
4377
931k
      av1_update_picked_ref_frames_mask(
4378
931k
          x, ref_type, bsize, cm->seq_params->mib_size, mi_row, mi_col);
4379
931k
    }
4380
4381
    // Calculate the total cost and update the best partition.
4382
6.79M
    if (blk_params.bsize_at_least_8x8) {
4383
3.64M
      this_rdc->rate += pt_cost;
4384
3.64M
      this_rdc->rdcost = RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist);
4385
3.64M
    }
4386
6.79M
    *part_none_rd = this_rdc->rdcost;
4387
6.79M
    if (this_rdc->rdcost < best_rdc->rdcost) {
4388
6.45M
      *best_rdc = *this_rdc;
4389
6.45M
      part_search_state->found_best_partition = true;
4390
6.45M
      if (blk_params.bsize_at_least_8x8) {
4391
3.53M
        pc_tree->partitioning = PARTITION_NONE;
4392
3.53M
      }
4393
4394
      // Disable split and rectangular partition search
4395
      // based on PARTITION_NONE cost.
4396
6.45M
      prune_partitions_after_none(cpi, x, sms_tree, pc_tree->none,
4397
6.45M
                                  part_search_state, best_rdc,
4398
6.45M
                                  pb_source_variance);
4399
6.45M
    }
4400
4401
6.79M
    if (cpi->sf.part_sf.prune_rect_part_using_none_pred_mode)
4402
4.91M
      prune_rect_part_using_none_pred_mode(&x->e_mbd, part_search_state,
4403
4.91M
                                           pc_tree->none->mic.mode, bsize);
4404
6.79M
  }
4405
7.48M
  av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
4406
7.48M
}
4407
4408
static inline double get_split_partition_penalty(
4409
1.58M
    BLOCK_SIZE bsize, int split_partition_penalty_level) {
4410
1.58M
  if (!split_partition_penalty_level) return 1.00;
4411
4412
  // Higher penalty for smaller block sizes.
4413
9
  static const double penalty_factors[2][SQR_BLOCK_SIZES - 1] = {
4414
9
    { 1.080, 1.040, 1.020, 1.010, 1.000 },
4415
9
    { 1.100, 1.075, 1.050, 1.025, 1.000 },
4416
9
  };
4417
9
  const int sqr_bsize_idx = get_sqr_bsize_idx(bsize);
4418
9
  assert(sqr_bsize_idx > 0 && sqr_bsize_idx < SQR_BLOCK_SIZES);
4419
9
  const double this_penalty_factor =
4420
9
      penalty_factors[split_partition_penalty_level - 1][sqr_bsize_idx - 1];
4421
9
  return this_penalty_factor;
4422
1.58M
}
4423
4424
// PARTITION_SPLIT search.
4425
static void split_partition_search(
4426
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
4427
    TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree,
4428
    SIMPLE_MOTION_DATA_TREE *sms_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
4429
    PartitionSearchState *part_search_state, RD_STATS *best_rdc,
4430
8.65M
    SB_MULTI_PASS_MODE multi_pass_mode, int64_t *part_split_rd) {
4431
8.65M
  const AV1_COMMON *const cm = &cpi->common;
4432
8.65M
  PartitionBlkParams blk_params = part_search_state->part_blk_params;
4433
8.65M
  const CommonModeInfoParams *const mi_params = &cm->mi_params;
4434
8.65M
  const int mi_row = blk_params.mi_row;
4435
8.65M
  const int mi_col = blk_params.mi_col;
4436
8.65M
  const BLOCK_SIZE bsize = blk_params.bsize;
4437
8.65M
  assert(bsize < BLOCK_SIZES_ALL);
4438
8.65M
  RD_STATS sum_rdc = part_search_state->sum_rdc;
4439
8.65M
  const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
4440
4441
  // Check if partition split is allowed.
4442
8.65M
  if (part_search_state->terminate_partition_search ||
4443
8.65M
      !part_search_state->do_square_split)
4444
6.03M
    return;
4445
4446
13.0M
  for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
4447
10.4M
    if (pc_tree->split[i] == NULL)
4448
10.4M
      pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
4449
10.4M
    if (!pc_tree->split[i])
4450
0
      aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
4451
0
                         "Failed to allocate PC_TREE");
4452
10.4M
    pc_tree->split[i]->index = i;
4453
10.4M
  }
4454
4455
  // Initialization of this partition RD stats.
4456
2.61M
  av1_init_rd_stats(&sum_rdc);
4457
2.61M
  sum_rdc.rate = part_search_state->partition_cost[PARTITION_SPLIT];
4458
2.61M
  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
4459
4460
2.61M
  int idx;
4461
#if CONFIG_COLLECT_PARTITION_STATS
4462
  PartitionTimingStats *part_timing_stats =
4463
      &part_search_state->part_timing_stats;
4464
  if (best_rdc->rdcost - sum_rdc.rdcost >= 0) {
4465
    start_partition_block_timer(part_timing_stats, PARTITION_SPLIT);
4466
  }
4467
#endif
4468
  // Recursive partition search on 4 sub-blocks.
4469
11.7M
  for (idx = 0; idx < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc->rdcost;
4470
10.1M
       ++idx) {
4471
10.1M
    const int x_idx = (idx & 1) * blk_params.mi_step;
4472
10.1M
    const int y_idx = (idx >> 1) * blk_params.mi_step;
4473
4474
10.1M
    if (mi_row + y_idx >= mi_params->mi_rows ||
4475
9.24M
        mi_col + x_idx >= mi_params->mi_cols)
4476
1.71M
      continue;
4477
4478
8.45M
    pc_tree->split[idx]->index = idx;
4479
8.45M
    int64_t *p_split_rd = &part_search_state->split_rd[idx];
4480
8.45M
    RD_STATS best_remain_rdcost;
4481
8.45M
    av1_rd_stats_subtraction(x->rdmult, best_rdc, &sum_rdc,
4482
8.45M
                             &best_remain_rdcost);
4483
4484
8.45M
    int curr_quad_tree_idx = 0;
4485
8.45M
    if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
4486
7.15M
      curr_quad_tree_idx = part_search_state->intra_part_info->quad_tree_idx;
4487
7.15M
      part_search_state->intra_part_info->quad_tree_idx =
4488
7.15M
          4 * curr_quad_tree_idx + idx + 1;
4489
7.15M
    }
4490
    // Split partition evaluation of corresponding idx.
4491
    // If the RD cost exceeds the best cost then do not
4492
    // evaluate other split sub-partitions.
4493
8.45M
    SIMPLE_MOTION_DATA_TREE *const sms_tree_split =
4494
8.45M
        (sms_tree == NULL) ? NULL : sms_tree->split[idx];
4495
8.45M
    if (!av1_rd_pick_partition(
4496
8.45M
            cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
4497
8.45M
            &part_search_state->this_rdc, best_remain_rdcost,
4498
8.45M
            pc_tree->split[idx], sms_tree_split, p_split_rd, multi_pass_mode,
4499
8.45M
            &part_search_state->split_part_rect_win[idx])) {
4500
1.02M
      av1_invalid_rd_stats(&sum_rdc);
4501
1.02M
      break;
4502
1.02M
    }
4503
7.42M
    if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
4504
6.13M
      part_search_state->intra_part_info->quad_tree_idx = curr_quad_tree_idx;
4505
6.13M
    }
4506
4507
7.42M
    sum_rdc.rate += part_search_state->this_rdc.rate;
4508
7.42M
    sum_rdc.dist += part_search_state->this_rdc.dist;
4509
7.42M
    av1_rd_cost_update(x->rdmult, &sum_rdc);
4510
4511
    // Set split ctx as ready for use.
4512
7.42M
    if (idx <= 1 && (bsize <= BLOCK_8X8 ||
4513
3.80M
                     pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
4514
3.80M
      const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none->mic;
4515
3.80M
      const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4516
      // Neither palette mode nor cfl predicted.
4517
3.80M
      if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
4518
3.80M
        if (mbmi->uv_mode != UV_CFL_PRED)
4519
3.43M
          part_search_state->is_split_ctx_is_ready[idx] = 1;
4520
3.80M
      }
4521
3.80M
    }
4522
7.42M
  }
4523
#if CONFIG_COLLECT_PARTITION_STATS
4524
  if (part_timing_stats->timer_is_on) {
4525
    end_partition_block_timer(part_timing_stats, PARTITION_SPLIT,
4526
                              sum_rdc.rdcost);
4527
  }
4528
#endif
4529
2.61M
  const int reached_last_index = (idx == SUB_PARTITIONS_SPLIT);
4530
4531
  // Calculate the total cost and update the best partition.
4532
2.61M
  *part_split_rd = sum_rdc.rdcost;
4533
2.61M
  if (reached_last_index && sum_rdc.rdcost < best_rdc->rdcost) {
4534
1.58M
    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
4535
1.58M
    const double penalty_factor = get_split_partition_penalty(
4536
1.58M
        bsize, cpi->sf.part_sf.split_partition_penalty_level);
4537
1.58M
    const int64_t this_rdcost = (int64_t)(sum_rdc.rdcost * penalty_factor);
4538
1.58M
    if (this_rdcost < best_rdc->rdcost) {
4539
1.58M
      *best_rdc = sum_rdc;
4540
1.58M
      part_search_state->found_best_partition = true;
4541
1.58M
      pc_tree->partitioning = PARTITION_SPLIT;
4542
1.58M
    }
4543
1.58M
  } else if (cpi->sf.part_sf.less_rectangular_check_level > 0) {
4544
    // Skip rectangular partition test when partition type none gives better
4545
    // rd than partition type split.
4546
1.03M
    if (cpi->sf.part_sf.less_rectangular_check_level == 2 || idx <= 2) {
4547
1.02M
      const int partition_none_valid = part_search_state->none_rd > 0;
4548
1.02M
      const int partition_none_better =
4549
1.02M
          part_search_state->none_rd < sum_rdc.rdcost;
4550
1.02M
      part_search_state->do_rectangular_split &=
4551
1.02M
          !(partition_none_valid && partition_none_better);
4552
1.02M
    }
4553
1.03M
  }
4554
  // Restore the context for the following cases:
4555
  // 1) Current block size not more than maximum partition size as dry run
4556
  // encode happens for these cases
4557
  // 2) Current block size same as superblock size as the final encode
4558
  // happens for this case
4559
2.61M
  if (bsize <= x->sb_enc.max_partition_size || bsize == cm->seq_params->sb_size)
4560
2.61M
    av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
4561
2.61M
}
4562
4563
// The max number of nodes in the partition tree.
4564
// The number of leaf nodes is (128x128) / (4x4) = 1024.
4565
// The number of All possible parent nodes is 1 + 2 + ... + 512 = 1023.
4566
#define NUM_NODES 2048
4567
4568
static void write_partition_tree(AV1_COMP *const cpi,
4569
                                 const PC_TREE *const pc_tree,
4570
                                 const BLOCK_SIZE bsize, const int mi_row,
4571
0
                                 const int mi_col) {
4572
0
  (void)mi_row;
4573
0
  (void)mi_col;
4574
0
  const char *path = cpi->oxcf.partition_info_path;
4575
0
  char filename[256];
4576
0
  snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path,
4577
0
           cpi->sb_counter, 0);
4578
0
  FILE *pfile = fopen(filename, "w");
4579
0
  fprintf(pfile, "%d", bsize);
4580
0
4581
0
  // Write partition type with BFS order.
4582
0
  const PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
4583
0
  int q_idx = 0;
4584
0
  int last_idx = 1;
4585
0
  int num_nodes = 1;
4586
0
4587
0
  // First traversal to get number of leaf nodes.
4588
0
  tree_node_queue[q_idx] = pc_tree;
4589
0
  while (num_nodes > 0) {
4590
0
    const PC_TREE *node = tree_node_queue[q_idx];
4591
0
    if (node->partitioning == PARTITION_SPLIT) {
4592
0
      for (int i = 0; i < 4; ++i) {
4593
0
        tree_node_queue[last_idx] = node->split[i];
4594
0
        ++last_idx;
4595
0
      }
4596
0
      num_nodes += 4;
4597
0
    }
4598
0
    --num_nodes;
4599
0
    ++q_idx;
4600
0
  }
4601
0
  const int num_leafs = last_idx;
4602
0
  fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1);
4603
0
4604
0
  // Write partitions for each node.
4605
0
  q_idx = 0;
4606
0
  last_idx = 1;
4607
0
  num_nodes = 1;
4608
0
  tree_node_queue[q_idx] = pc_tree;
4609
0
  while (num_nodes > 0) {
4610
0
    const PC_TREE *node = tree_node_queue[q_idx];
4611
0
    fprintf(pfile, ",%d", node->partitioning);
4612
0
    if (node->partitioning == PARTITION_SPLIT) {
4613
0
      for (int i = 0; i < 4; ++i) {
4614
0
        tree_node_queue[last_idx] = node->split[i];
4615
0
        ++last_idx;
4616
0
      }
4617
0
      num_nodes += 4;
4618
0
    }
4619
0
    --num_nodes;
4620
0
    ++q_idx;
4621
0
  }
4622
0
  fprintf(pfile, "\n");
4623
0
4624
0
  fclose(pfile);
4625
0
}
4626
4627
#if CONFIG_PARTITION_SEARCH_ORDER
4628
static void verify_write_partition_tree(const AV1_COMP *const cpi,
4629
                                        const PC_TREE *const pc_tree,
4630
                                        const BLOCK_SIZE bsize,
4631
                                        const int config_id, const int mi_row,
4632
                                        const int mi_col) {
4633
  (void)mi_row;
4634
  (void)mi_col;
4635
  const char *path = cpi->oxcf.partition_info_path;
4636
  char filename[256];
4637
  snprintf(filename, sizeof(filename), "%s/verify_partition_tree_sb%d_c%d",
4638
           path, cpi->sb_counter, config_id);
4639
  FILE *pfile = fopen(filename, "w");
4640
  fprintf(pfile, "%d", bsize);
4641
4642
  // Write partition type with BFS order.
4643
  const PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
4644
  int q_idx = 0;
4645
  int last_idx = 1;
4646
  int num_nodes = 1;
4647
4648
  // First traversal to get number of leaf nodes.
4649
  tree_node_queue[q_idx] = pc_tree;
4650
  while (num_nodes > 0) {
4651
    const PC_TREE *node = tree_node_queue[q_idx];
4652
    if (node != NULL && node->partitioning == PARTITION_SPLIT) {
4653
      for (int i = 0; i < 4; ++i) {
4654
        tree_node_queue[last_idx] = node->split[i];
4655
        ++last_idx;
4656
      }
4657
      num_nodes += 4;
4658
    }
4659
    --num_nodes;
4660
    ++q_idx;
4661
  }
4662
  const int num_leafs = last_idx;
4663
  fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1);
4664
4665
  // Write partitions for each node.
4666
  q_idx = 0;
4667
  last_idx = 1;
4668
  num_nodes = 1;
4669
  tree_node_queue[q_idx] = pc_tree;
4670
  while (num_nodes > 0) {
4671
    const PC_TREE *node = tree_node_queue[q_idx];
4672
    if (node != NULL) {  // suppress warning
4673
      fprintf(pfile, ",%d", node->partitioning);
4674
      if (node->partitioning == PARTITION_SPLIT) {
4675
        for (int i = 0; i < 4; ++i) {
4676
          tree_node_queue[last_idx] = node->split[i];
4677
          ++last_idx;
4678
        }
4679
        num_nodes += 4;
4680
      }
4681
    }
4682
    --num_nodes;
4683
    ++q_idx;
4684
  }
4685
  fprintf(pfile, "\n");
4686
4687
  fclose(pfile);
4688
}
4689
4690
static int read_partition_tree(AV1_COMP *const cpi, PC_TREE *const pc_tree,
4691
                               struct aom_internal_error_info *error_info,
4692
                               const int config_id) {
4693
  const AV1_COMMON *const cm = &cpi->common;
4694
  const char *path = cpi->oxcf.partition_info_path;
4695
  char filename[256];
4696
  snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path,
4697
           cpi->sb_counter, config_id);
4698
  FILE *pfile = fopen(filename, "r");
4699
  if (pfile == NULL) {
4700
    aom_internal_error(cm->error, AOM_CODEC_ERROR, "Can't find input file: %s.",
4701
                       filename);
4702
  }
4703
4704
  int read_bsize;
4705
  int num_nodes;
4706
  int num_configs;
4707
  fscanf(pfile, "%d,%d,%d", &read_bsize, &num_nodes, &num_configs);
4708
  assert(read_bsize == cpi->common.seq_params->sb_size);
4709
  BLOCK_SIZE bsize = (BLOCK_SIZE)read_bsize;
4710
  assert(bsize == pc_tree->block_size);
4711
4712
  PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
4713
  int last_idx = 1;
4714
  int q_idx = 0;
4715
  tree_node_queue[q_idx] = pc_tree;
4716
  while (num_nodes > 0) {
4717
    int partitioning;
4718
    fscanf(pfile, ",%d", &partitioning);
4719
    assert(partitioning >= PARTITION_NONE &&
4720
           partitioning < EXT_PARTITION_TYPES);
4721
    PC_TREE *node = tree_node_queue[q_idx];
4722
    if (node != NULL) {
4723
      node->partitioning = partitioning;
4724
      bsize = node->block_size;
4725
    }
4726
    if (partitioning == PARTITION_SPLIT) {
4727
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
4728
      for (int i = 0; i < 4; ++i) {
4729
        if (node != NULL) {  // Suppress warning
4730
          node->split[i] = av1_alloc_pc_tree_node(subsize);
4731
          if (!node->split[i])
4732
            aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,
4733
                               "Failed to allocate PC_TREE");
4734
          node->split[i]->index = i;
4735
          tree_node_queue[last_idx] = node->split[i];
4736
          ++last_idx;
4737
        }
4738
      }
4739
    }
4740
    --num_nodes;
4741
    ++q_idx;
4742
  }
4743
  fclose(pfile);
4744
4745
  return num_configs;
4746
}
4747
4748
static RD_STATS rd_search_for_fixed_partition(
4749
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
4750
    TokenExtra **tp, SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row, int mi_col,
4751
    const BLOCK_SIZE bsize, PC_TREE *pc_tree) {
4752
  const PARTITION_TYPE partition = pc_tree->partitioning;
4753
  const AV1_COMMON *const cm = &cpi->common;
4754
  const int num_planes = av1_num_planes(cm);
4755
  MACROBLOCK *const x = &td->mb;
4756
  MACROBLOCKD *const xd = &x->e_mbd;
4757
  TileInfo *const tile_info = &tile_data->tile_info;
4758
  RD_STATS best_rdc;
4759
  av1_invalid_rd_stats(&best_rdc);
4760
  int sum_subblock_rate = 0;
4761
  int64_t sum_subblock_dist = 0;
4762
  PartitionSearchState part_search_state;
4763
  init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col,
4764
                                     bsize);
4765
  // Override partition costs at the edges of the frame in the same
4766
  // way as in read_partition (see decodeframe.c).
4767
  PartitionBlkParams blk_params = part_search_state.part_blk_params;
4768
  if (!av1_blk_has_rows_and_cols(&blk_params))
4769
    set_partition_cost_for_edge_blk(cm, &part_search_state);
4770
4771
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
4772
4773
  // Save rdmult before it might be changed, so it can be restored later.
4774
  const int orig_rdmult = x->rdmult;
4775
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
4776
  (void)orig_rdmult;
4777
4778
  // Set the context.
4779
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
4780
  xd->above_txfm_context =
4781
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
4782
  xd->left_txfm_context =
4783
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
4784
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4785
4786
  assert(bsize < BLOCK_SIZES_ALL);
4787
  unsigned int pb_source_variance = UINT_MAX;
4788
  int64_t part_none_rd = INT64_MAX;
4789
  int64_t none_rd = INT64_MAX;
4790
  int inc_step[NUM_PART4_TYPES] = { 0 };
4791
  if (partition == PARTITION_HORZ_4) inc_step[HORZ4] = mi_size_high[bsize] / 4;
4792
  if (partition == PARTITION_VERT_4) inc_step[VERT4] = mi_size_wide[bsize] / 4;
4793
4794
  switch (partition) {
4795
    case PARTITION_NONE:
4796
      none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
4797
                            &part_search_state, &best_rdc, &pb_source_variance,
4798
                            &none_rd, &part_none_rd);
4799
      break;
4800
    case PARTITION_HORZ:
4801
      rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
4802
                                   &part_search_state, &best_rdc, NULL, HORZ,
4803
                                   HORZ);
4804
      break;
4805
    case PARTITION_VERT:
4806
      rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
4807
                                   &part_search_state, &best_rdc, NULL, VERT,
4808
                                   VERT);
4809
      break;
4810
    case PARTITION_HORZ_A:
4811
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4812
                           &part_search_state, &best_rdc, NULL,
4813
                           pb_source_variance, 1, HORZ_A, HORZ_A);
4814
      break;
4815
    case PARTITION_HORZ_B:
4816
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4817
                           &part_search_state, &best_rdc, NULL,
4818
                           pb_source_variance, 1, HORZ_B, HORZ_B);
4819
      break;
4820
    case PARTITION_VERT_A:
4821
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4822
                           &part_search_state, &best_rdc, NULL,
4823
                           pb_source_variance, 1, VERT_A, VERT_A);
4824
      break;
4825
    case PARTITION_VERT_B:
4826
      ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4827
                           &part_search_state, &best_rdc, NULL,
4828
                           pb_source_variance, 1, VERT_B, VERT_B);
4829
      break;
4830
    case PARTITION_HORZ_4:
4831
      rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4832
                         pc_tree->horizontal4, &part_search_state, &best_rdc,
4833
                         inc_step, PARTITION_HORZ_4);
4834
      break;
4835
    case PARTITION_VERT_4:
4836
      rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
4837
                         pc_tree->vertical4, &part_search_state, &best_rdc,
4838
                         inc_step, PARTITION_VERT_4);
4839
      break;
4840
    case PARTITION_SPLIT:
4841
      for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; ++idx) {
4842
        const BLOCK_SIZE subsize =
4843
            get_partition_subsize(bsize, PARTITION_SPLIT);
4844
        assert(subsize < BLOCK_SIZES_ALL);
4845
        const int next_mi_row =
4846
            idx < 2 ? mi_row : mi_row + mi_size_high[subsize];
4847
        const int next_mi_col =
4848
            idx % 2 == 0 ? mi_col : mi_col + mi_size_wide[subsize];
4849
        if (next_mi_row >= cm->mi_params.mi_rows ||
4850
            next_mi_col >= cm->mi_params.mi_cols) {
4851
          continue;
4852
        }
4853
        const RD_STATS subblock_rdc = rd_search_for_fixed_partition(
4854
            cpi, td, tile_data, tp, sms_tree->split[idx], next_mi_row,
4855
            next_mi_col, subsize, pc_tree->split[idx]);
4856
        sum_subblock_rate += subblock_rdc.rate;
4857
        sum_subblock_dist += subblock_rdc.dist;
4858
      }
4859
      best_rdc.rate = sum_subblock_rate;
4860
      best_rdc.rate += part_search_state.partition_cost[PARTITION_SPLIT];
4861
      best_rdc.dist = sum_subblock_dist;
4862
      best_rdc.rdcost = RDCOST(x->rdmult, best_rdc.rate, best_rdc.dist);
4863
      break;
4864
    default:
4865
      assert(0 && "invalid partition type.");
4866
      aom_internal_error(cm->error, AOM_CODEC_ERROR, "Invalid partition type.");
4867
  }
4868
  // Note: it is necessary to restore context information.
4869
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4870
4871
  if (bsize != cm->seq_params->sb_size) {
4872
    encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
4873
              pc_tree, NULL);
4874
  }
4875
  x->rdmult = orig_rdmult;
4876
4877
  return best_rdc;
4878
}
4879
4880
static void prepare_sb_features_before_search(
4881
    AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row,
4882
    int mi_col, const BLOCK_SIZE bsize, aom_partition_features_t *features) {
4883
  av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
4884
                                        bsize, features);
4885
  collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, features);
4886
}
4887
4888
static void update_partition_stats(const RD_STATS *const this_rdcost,
4889
                                   aom_partition_stats_t *stats) {
4890
  stats->rate = this_rdcost->rate;
4891
  stats->dist = this_rdcost->dist;
4892
  stats->rdcost = this_rdcost->rdcost;
4893
}
4894
4895
static void build_pc_tree_from_part_decision(
4896
    const aom_partition_decision_t *partition_decision,
4897
    const BLOCK_SIZE this_bsize, PC_TREE *pc_tree,
4898
    struct aom_internal_error_info *error_info) {
4899
  BLOCK_SIZE bsize = this_bsize;
4900
  int num_nodes = partition_decision->num_nodes;
4901
  PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
4902
  int last_idx = 1;
4903
  int q_idx = 0;
4904
  tree_node_queue[q_idx] = pc_tree;
4905
  while (num_nodes > 0) {
4906
    const int partitioning = partition_decision->partition_decision[q_idx];
4907
    assert(partitioning >= PARTITION_NONE &&
4908
           partitioning < EXT_PARTITION_TYPES);
4909
    PC_TREE *node = tree_node_queue[q_idx];
4910
    if (node != NULL) {
4911
      node->partitioning = partitioning;
4912
      bsize = node->block_size;
4913
    }
4914
    if (partitioning == PARTITION_SPLIT) {
4915
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
4916
      for (int i = 0; i < 4; ++i) {
4917
        if (node != NULL) {  // Suppress warning
4918
          node->split[i] = av1_alloc_pc_tree_node(subsize);
4919
          if (!node->split[i])
4920
            aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,
4921
                               "Failed to allocate PC_TREE");
4922
          node->split[i]->index = i;
4923
          tree_node_queue[last_idx] = node->split[i];
4924
          ++last_idx;
4925
        }
4926
      }
4927
    }
4928
    --num_nodes;
4929
    ++q_idx;
4930
  }
4931
}
4932
4933
// The ML model needs to provide the whole decision tree for the superblock.
4934
static bool ml_partition_search_whole_tree(AV1_COMP *const cpi, ThreadData *td,
4935
                                           TileDataEnc *tile_data,
4936
                                           TokenExtra **tp,
4937
                                           SIMPLE_MOTION_DATA_TREE *sms_root,
4938
                                           int mi_row, int mi_col,
4939
                                           const BLOCK_SIZE bsize) {
4940
  AV1_COMMON *const cm = &cpi->common;
4941
  MACROBLOCK *const x = &td->mb;
4942
  ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
4943
  struct aom_internal_error_info *error_info = x->e_mbd.error_info;
4944
  aom_partition_features_t features;
4945
  prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
4946
                                    &features);
4947
  features.mi_row = mi_row;
4948
  features.mi_col = mi_col;
4949
  features.frame_width = cpi->frame_info.frame_width;
4950
  features.frame_height = cpi->frame_info.frame_height;
4951
  features.block_size = bsize;
4952
  av1_ext_part_send_features(ext_part_controller, &features);
4953
4954
  // rd mode search (dry run) for a valid partition decision from the ml model.
4955
  aom_partition_decision_t partition_decision;
4956
  do {
4957
    const bool valid_decision = av1_ext_part_get_partition_decision(
4958
        ext_part_controller, &partition_decision);
4959
    if (!valid_decision) return false;
4960
4961
    // First, let's take the easy approach.
4962
    // We require that the ml model has to provide partition decisions for the
4963
    // whole superblock.
4964
    td->pc_root = av1_alloc_pc_tree_node(bsize);
4965
    if (!td->pc_root)
4966
      aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,
4967
                         "Failed to allocate PC_TREE");
4968
    build_pc_tree_from_part_decision(&partition_decision, bsize, td->pc_root,
4969
                                     error_info);
4970
4971
    const RD_STATS this_rdcost = rd_search_for_fixed_partition(
4972
        cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, td->pc_root);
4973
    aom_partition_stats_t stats;
4974
    update_partition_stats(&this_rdcost, &stats);
4975
    av1_ext_part_send_partition_stats(ext_part_controller, &stats);
4976
    if (!partition_decision.is_final_decision) {
4977
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
4978
                                 cpi->sf.part_sf.partition_search_type);
4979
      td->pc_root = NULL;
4980
    }
4981
  } while (!partition_decision.is_final_decision);
4982
4983
  // Encode with the selected mode and partition.
4984
  set_cb_offsets(x->cb_offset, 0, 0);
4985
  encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
4986
            td->pc_root, NULL);
4987
  av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
4988
                             cpi->sf.part_sf.partition_search_type);
4989
  td->pc_root = NULL;
4990
4991
  return true;
4992
}
4993
4994
// Use a bitmask to represent the valid partition types for the current
4995
// block. "1" represents the corresponding partition type is vaild.
4996
// The least significant bit represents "PARTITION_NONE", the
4997
// largest significant bit represents "PARTITION_VERT_4", follow
4998
// the enum order for PARTITION_TYPE in "enums.h"
4999
static int get_valid_partition_types(
5000
    const AV1_COMP *const cpi,
5001
    const PartitionSearchState *const part_search_state,
5002
    const BLOCK_SIZE bsize) {
5003
  const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg;
5004
  const PartitionBlkParams blk_params = part_search_state->part_blk_params;
5005
  int valid_types = 0;
5006
  // PARTITION_NONE
5007
  valid_types |= (part_search_state->partition_none_allowed << 0);
5008
  // PARTITION_HORZ
5009
  valid_types |= (part_search_state->partition_rect_allowed[HORZ] << 1);
5010
  // PARTITION_VERT
5011
  valid_types |= (part_search_state->partition_rect_allowed[VERT] << 2);
5012
  // PARTITION_SPLIT
5013
  valid_types |= (part_search_state->do_square_split << 3);
5014
  // PARTITION_HORZ_A
5015
  const int ext_partition_allowed = part_search_state->do_rectangular_split &&
5016
                                    av1_blk_has_rows_and_cols(&blk_params);
5017
  const int horzab_partition_allowed =
5018
      ext_partition_allowed && part_cfg->enable_ab_partitions &&
5019
      part_search_state->partition_rect_allowed[HORZ];
5020
  valid_types |= (horzab_partition_allowed << 4);
5021
  // PARTITION_HORZ_B
5022
  valid_types |= (horzab_partition_allowed << 5);
5023
  // PARTITION_VERT_A
5024
  const int vertab_partition_allowed =
5025
      ext_partition_allowed && part_cfg->enable_ab_partitions &&
5026
      part_search_state->partition_rect_allowed[VERT];
5027
  valid_types |= (vertab_partition_allowed << 6);
5028
  // PARTITION_VERT_B
5029
  valid_types |= (vertab_partition_allowed << 7);
5030
  // PARTITION_HORZ_4
5031
  const int partition4_allowed = part_cfg->enable_1to4_partitions &&
5032
                                 ext_partition_allowed &&
5033
                                 bsize != BLOCK_128X128;
5034
  const int horz4_allowed =
5035
      partition4_allowed && part_search_state->partition_rect_allowed[HORZ] &&
5036
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ_4),
5037
                           part_search_state->ss_x,
5038
                           part_search_state->ss_y) != BLOCK_INVALID;
5039
  valid_types |= (horz4_allowed << 8);
5040
  // PARTITION_VERT_4
5041
  const int vert4_allowed =
5042
      partition4_allowed && part_search_state->partition_rect_allowed[HORZ] &&
5043
      get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT_4),
5044
                           part_search_state->ss_x,
5045
                           part_search_state->ss_y) != BLOCK_INVALID;
5046
  valid_types |= (vert4_allowed << 9);
5047
5048
  return valid_types;
5049
}
5050
5051
static void prepare_tpl_stats_block(const AV1_COMP *const cpi,
5052
                                    const BLOCK_SIZE bsize, const int mi_row,
5053
                                    const int mi_col, int64_t *intra_cost,
5054
                                    int64_t *inter_cost, int64_t *mc_dep_cost) {
5055
  const AV1_COMMON *const cm = &cpi->common;
5056
  GF_GROUP *gf_group = &cpi->ppi->gf_group;
5057
  if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE ||
5058
      gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) {
5059
    return;
5060
  }
5061
5062
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
5063
  TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index];
5064
  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
5065
  // If tpl stats is not established, early return
5066
  if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) {
5067
    return;
5068
  }
5069
5070
  const int tpl_stride = tpl_frame->stride;
5071
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
5072
  const int mi_width =
5073
      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
5074
  const int mi_height =
5075
      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
5076
5077
  int64_t sum_intra_cost = 0;
5078
  int64_t sum_inter_cost = 0;
5079
  int64_t sum_mc_dep_cost = 0;
5080
  for (int row = 0; row < mi_height; row += step) {
5081
    for (int col = 0; col < mi_width; col += step) {
5082
      TplDepStats *this_stats =
5083
          &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride,
5084
                                     tpl_data->tpl_stats_block_mis_log2)];
5085
      sum_intra_cost += this_stats->intra_cost;
5086
      sum_inter_cost += this_stats->inter_cost;
5087
      const int64_t mc_dep_delta =
5088
          RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
5089
                 this_stats->mc_dep_dist);
5090
      sum_mc_dep_cost += mc_dep_delta;
5091
    }
5092
  }
5093
5094
  *intra_cost = sum_intra_cost;
5095
  *inter_cost = sum_inter_cost;
5096
  *mc_dep_cost = sum_mc_dep_cost;
5097
}
5098
5099
static bool recursive_partition(AV1_COMP *const cpi, ThreadData *td,
5100
                                TileDataEnc *tile_data, TokenExtra **tp,
5101
                                SIMPLE_MOTION_DATA_TREE *sms_root,
5102
                                PC_TREE *pc_tree, int mi_row, int mi_col,
5103
                                const BLOCK_SIZE bsize, RD_STATS *this_rdcost) {
5104
  const AV1_COMMON *const cm = &cpi->common;
5105
  ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
5106
  MACROBLOCK *const x = &td->mb;
5107
  MACROBLOCKD *const xd = &x->e_mbd;
5108
  if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) {
5109
    return false;
5110
  }
5111
  aom_partition_decision_t partition_decision;
5112
  do {
5113
    PartitionSearchState part_search_state;
5114
    // Initialization of state variables used in partition search.
5115
    // TODO(chengchen): check if there is hidden conditions that don't allow
5116
    // all possible partition types.
5117
    init_partition_search_state_params(x, cpi, &part_search_state, mi_row,
5118
                                       mi_col, bsize);
5119
    // Override partition costs at the edges of the frame in the same
5120
    // way as in read_partition (see decodeframe.c).
5121
    PartitionBlkParams blk_params = part_search_state.part_blk_params;
5122
    if (!av1_blk_has_rows_and_cols(&blk_params))
5123
      set_partition_cost_for_edge_blk(cm, &part_search_state);
5124
    const int orig_rdmult = x->rdmult;
5125
    setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
5126
    const int valid_partition_types =
5127
        get_valid_partition_types(cpi, &part_search_state, bsize);
5128
    const FRAME_UPDATE_TYPE update_type =
5129
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5130
    const int qindex = av1_get_qindex(&cm->seg, xd->mi[0]->segment_id,
5131
                                      cm->quant_params.base_qindex);
5132
    // RD multiplier
5133
    const int rdmult = x->rdmult;
5134
    // pyramid level
5135
    const int pyramid_level =
5136
        cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index];
5137
    x->rdmult = orig_rdmult;
5138
    // Neighbor information
5139
    const int has_above = !!xd->above_mbmi;
5140
    const int has_left = !!xd->left_mbmi;
5141
    const BLOCK_SIZE above_bsize =
5142
        has_above ? xd->above_mbmi->bsize : BLOCK_INVALID;
5143
    const BLOCK_SIZE left_bsize =
5144
        has_left ? xd->left_mbmi->bsize : BLOCK_INVALID;
5145
    const int above_block_width =
5146
        above_bsize == BLOCK_INVALID ? -1 : block_size_wide[above_bsize];
5147
    const int above_block_height =
5148
        above_bsize == BLOCK_INVALID ? -1 : block_size_high[above_bsize];
5149
    const int left_block_width =
5150
        left_bsize == BLOCK_INVALID ? -1 : block_size_wide[left_bsize];
5151
    const int left_block_height =
5152
        left_bsize == BLOCK_INVALID ? -1 : block_size_high[left_bsize];
5153
    // Prepare simple motion search stats as features
5154
    unsigned int block_sse = -1;
5155
    unsigned int block_var = -1;
5156
    unsigned int sub_block_sse[4] = { -1, -1, -1, -1 };
5157
    unsigned int sub_block_var[4] = { -1, -1, -1, -1 };
5158
    unsigned int horz_block_sse[2] = { -1, -1 };
5159
    unsigned int horz_block_var[2] = { -1, -1 };
5160
    unsigned int vert_block_sse[2] = { -1, -1 };
5161
    unsigned int vert_block_var[2] = { -1, -1 };
5162
    av1_prepare_motion_search_features_block(
5163
        cpi, td, tile_data, mi_row, mi_col, bsize, valid_partition_types,
5164
        &block_sse, &block_var, sub_block_sse, sub_block_var, horz_block_sse,
5165
        horz_block_var, vert_block_sse, vert_block_var);
5166
    // Prepare tpl stats for the current block as features
5167
    int64_t tpl_intra_cost = -1;
5168
    int64_t tpl_inter_cost = -1;
5169
    int64_t tpl_mc_dep_cost = -1;
5170
    prepare_tpl_stats_block(cpi, bsize, mi_row, mi_col, &tpl_intra_cost,
5171
                            &tpl_inter_cost, &tpl_mc_dep_cost);
5172
5173
    aom_partition_features_t features;
5174
    features.mi_row = mi_row;
5175
    features.mi_col = mi_col;
5176
    features.frame_width = cpi->frame_info.frame_width;
5177
    features.frame_height = cpi->frame_info.frame_height;
5178
    features.block_size = bsize;
5179
    features.valid_partition_types = valid_partition_types;
5180
    features.update_type = update_type;
5181
    features.qindex = qindex;
5182
    features.rdmult = rdmult;
5183
    features.pyramid_level = pyramid_level;
5184
    features.has_above_block = has_above;
5185
    features.above_block_width = above_block_width;
5186
    features.above_block_height = above_block_height;
5187
    features.has_left_block = has_left;
5188
    features.left_block_width = left_block_width;
5189
    features.left_block_height = left_block_height;
5190
    features.block_sse = block_sse;
5191
    features.block_var = block_var;
5192
    for (int i = 0; i < 4; ++i) {
5193
      features.sub_block_sse[i] = sub_block_sse[i];
5194
      features.sub_block_var[i] = sub_block_var[i];
5195
    }
5196
    for (int i = 0; i < 2; ++i) {
5197
      features.horz_block_sse[i] = horz_block_sse[i];
5198
      features.horz_block_var[i] = horz_block_var[i];
5199
      features.vert_block_sse[i] = vert_block_sse[i];
5200
      features.vert_block_var[i] = vert_block_var[i];
5201
    }
5202
    features.tpl_intra_cost = tpl_intra_cost;
5203
    features.tpl_inter_cost = tpl_inter_cost;
5204
    features.tpl_mc_dep_cost = tpl_mc_dep_cost;
5205
    av1_ext_part_send_features(ext_part_controller, &features);
5206
    const bool valid_decision = av1_ext_part_get_partition_decision(
5207
        ext_part_controller, &partition_decision);
5208
    if (!valid_decision) return false;
5209
    pc_tree->partitioning = partition_decision.current_decision;
5210
5211
    av1_init_rd_stats(this_rdcost);
5212
    if (partition_decision.current_decision == PARTITION_SPLIT) {
5213
      assert(block_size_wide[bsize] >= 8 && block_size_high[bsize] >= 8);
5214
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
5215
      RD_STATS split_rdc[SUB_PARTITIONS_SPLIT];
5216
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
5217
        av1_init_rd_stats(&split_rdc[i]);
5218
        if (pc_tree->split[i] == NULL)
5219
          pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
5220
        if (!pc_tree->split[i])
5221
          aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
5222
                             "Failed to allocate PC_TREE");
5223
        pc_tree->split[i]->index = i;
5224
      }
5225
      const int orig_rdmult_tmp = x->rdmult;
5226
      setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
5227
      // TODO(chengchen): check boundary conditions
5228
      // top-left
5229
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[0],
5230
                          mi_row, mi_col, subsize, &split_rdc[0]);
5231
      // top-right
5232
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[1],
5233
                          mi_row, mi_col + mi_size_wide[subsize], subsize,
5234
                          &split_rdc[1]);
5235
      // bottom-left
5236
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[2],
5237
                          mi_row + mi_size_high[subsize], mi_col, subsize,
5238
                          &split_rdc[2]);
5239
      // bottom_right
5240
      recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[3],
5241
                          mi_row + mi_size_high[subsize],
5242
                          mi_col + mi_size_wide[subsize], subsize,
5243
                          &split_rdc[3]);
5244
      this_rdcost->rate += part_search_state.partition_cost[PARTITION_SPLIT];
5245
      // problem is here, the rdmult is different from the rdmult in sub block.
5246
      for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
5247
        this_rdcost->rate += split_rdc[i].rate;
5248
        this_rdcost->dist += split_rdc[i].dist;
5249
        av1_rd_cost_update(x->rdmult, this_rdcost);
5250
      }
5251
      x->rdmult = orig_rdmult_tmp;
5252
    } else {
5253
      *this_rdcost = rd_search_for_fixed_partition(
5254
          cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, pc_tree);
5255
    }
5256
5257
    aom_partition_stats_t stats;
5258
    update_partition_stats(this_rdcost, &stats);
5259
    av1_ext_part_send_partition_stats(ext_part_controller, &stats);
5260
    if (!partition_decision.is_final_decision) {
5261
      if (partition_decision.current_decision == PARTITION_SPLIT) {
5262
        for (int i = 0; i < 4; ++i) {
5263
          if (pc_tree->split[i] != NULL) {
5264
            av1_free_pc_tree_recursive(pc_tree->split[i], av1_num_planes(cm), 0,
5265
                                       0,
5266
                                       cpi->sf.part_sf.partition_search_type);
5267
            pc_tree->split[i] = NULL;
5268
          }
5269
        }
5270
      }
5271
    }
5272
  } while (!partition_decision.is_final_decision);
5273
5274
  return true;
5275
}
5276
5277
// The ML model only needs to make decisions for the current block each time.
5278
static bool ml_partition_search_partial(AV1_COMP *const cpi, ThreadData *td,
5279
                                        TileDataEnc *tile_data, TokenExtra **tp,
5280
                                        SIMPLE_MOTION_DATA_TREE *sms_root,
5281
                                        int mi_row, int mi_col,
5282
                                        const BLOCK_SIZE bsize) {
5283
  AV1_COMMON *const cm = &cpi->common;
5284
  MACROBLOCK *const x = &td->mb;
5285
  ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
5286
  aom_partition_features_t features;
5287
  prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
5288
                                    &features);
5289
  features.mi_row = mi_row;
5290
  features.mi_col = mi_col;
5291
  features.frame_width = cpi->frame_info.frame_width;
5292
  features.frame_height = cpi->frame_info.frame_height;
5293
  features.block_size = bsize;
5294
  av1_ext_part_send_features(ext_part_controller, &features);
5295
  td->pc_root = av1_alloc_pc_tree_node(bsize);
5296
  if (!td->pc_root)
5297
    aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
5298
                       "Failed to allocate PC_TREE");
5299
5300
  RD_STATS rdcost;
5301
  const bool valid_partition =
5302
      recursive_partition(cpi, td, tile_data, tp, sms_root, td->pc_root, mi_row,
5303
                          mi_col, bsize, &rdcost);
5304
  if (!valid_partition) {
5305
    return false;
5306
  }
5307
5308
  // Encode with the selected mode and partition.
5309
  set_cb_offsets(x->cb_offset, 0, 0);
5310
  encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
5311
            td->pc_root, NULL);
5312
  av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
5313
                             cpi->sf.part_sf.partition_search_type);
5314
  td->pc_root = NULL;
5315
5316
  return true;
5317
}
5318
5319
bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td,
5320
                             TileDataEnc *tile_data, TokenExtra **tp,
5321
                             SIMPLE_MOTION_DATA_TREE *sms_root, int mi_row,
5322
                             int mi_col, const BLOCK_SIZE bsize,
5323
                             RD_STATS *best_rd_cost) {
5324
  AV1_COMMON *const cm = &cpi->common;
5325
  if (cpi->ext_part_controller.ready) {
5326
    bool valid_search = true;
5327
    const aom_ext_part_decision_mode_t decision_mode =
5328
        av1_get_ext_part_decision_mode(&cpi->ext_part_controller);
5329
    if (decision_mode == AOM_EXT_PART_WHOLE_TREE) {
5330
      valid_search = ml_partition_search_whole_tree(
5331
          cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize);
5332
    } else if (decision_mode == AOM_EXT_PART_RECURSIVE) {
5333
      valid_search = ml_partition_search_partial(
5334
          cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize);
5335
    } else {
5336
      assert(0 && "Unknown decision mode.");
5337
      return false;
5338
    }
5339
    if (!valid_search) {
5340
      aom_internal_error(
5341
          cm->error, AOM_CODEC_ERROR,
5342
          "Invalid search from ML model, partition search failed");
5343
    }
5344
    return true;
5345
  }
5346
5347
  MACROBLOCK *const x = &td->mb;
5348
  MACROBLOCKD *const xd = &x->e_mbd;
5349
  int best_idx = 0;
5350
  int64_t min_rdcost = INT64_MAX;
5351
  int num_configs;
5352
  int i = 0;
5353
  do {
5354
    td->pc_root = av1_alloc_pc_tree_node(bsize);
5355
    if (!td->pc_root)
5356
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
5357
                         "Failed to allocate PC_TREE");
5358
    num_configs = read_partition_tree(cpi, td->pc_root, xd->error_info, i);
5359
    if (num_configs <= 0) {
5360
      av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
5361
                                 cpi->sf.part_sf.partition_search_type);
5362
      td->pc_root = NULL;
5363
      aom_internal_error(xd->error_info, AOM_CODEC_ERROR, "Invalid configs.");
5364
    }
5365
    verify_write_partition_tree(cpi, td->pc_root, bsize, i, mi_row, mi_col);
5366
    if (i == 0) {
5367
      AOM_CHECK_MEM_ERROR(xd->error_info, x->rdcost,
5368
                          aom_calloc(num_configs, sizeof(*x->rdcost)));
5369
    }
5370
    // Encode the block with the given partition tree. Get rdcost and encoding
5371
    // time.
5372
    x->rdcost[i] = rd_search_for_fixed_partition(
5373
        cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, td->pc_root);
5374
5375
    if (x->rdcost[i].rdcost < min_rdcost) {
5376
      min_rdcost = x->rdcost[i].rdcost;
5377
      best_idx = i;
5378
      *best_rd_cost = x->rdcost[i];
5379
    }
5380
    av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
5381
                               cpi->sf.part_sf.partition_search_type);
5382
    td->pc_root = NULL;
5383
    ++i;
5384
  } while (i < num_configs);
5385
5386
  aom_free(x->rdcost);
5387
  x->rdcost = NULL;
5388
  // Encode with the partition configuration with the smallest rdcost.
5389
  td->pc_root = av1_alloc_pc_tree_node(bsize);
5390
  if (!td->pc_root)
5391
    aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
5392
                       "Failed to allocate PC_TREE");
5393
  read_partition_tree(cpi, td->pc_root, xd->error_info, best_idx);
5394
  rd_search_for_fixed_partition(cpi, td, tile_data, tp, sms_root, mi_row,
5395
                                mi_col, bsize, td->pc_root);
5396
  set_cb_offsets(x->cb_offset, 0, 0);
5397
  encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
5398
            td->pc_root, NULL);
5399
  av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
5400
                             cpi->sf.part_sf.partition_search_type);
5401
  td->pc_root = NULL;
5402
  ++cpi->sb_counter;
5403
5404
  return true;
5405
}
5406
#endif  // CONFIG_PARTITION_SEARCH_ORDER
5407
5408
static inline bool should_do_dry_run_encode_for_current_block(
5409
    BLOCK_SIZE sb_size, BLOCK_SIZE max_partition_size, int curr_block_index,
5410
7.42M
    BLOCK_SIZE bsize) {
5411
7.42M
  if (bsize > max_partition_size) return false;
5412
5413
  // Enable the reconstruction with dry-run for the 4th sub-block only if its
5414
  // parent block's reconstruction with dry-run is skipped. If
5415
  // max_partition_size is the same as immediate split of superblock, then avoid
5416
  // reconstruction of the 4th sub-block, as this data is not consumed.
5417
7.42M
  if (curr_block_index != 3) return true;
5418
5419
781k
  const BLOCK_SIZE sub_sb_size =
5420
781k
      get_partition_subsize(sb_size, PARTITION_SPLIT);
5421
781k
  return bsize == max_partition_size && sub_sb_size != max_partition_size;
5422
7.42M
}
5423
5424
static void log_sub_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
5425
6.12M
                              double *var_min, double *var_max) {
5426
  // This functions returns a the minimum and maximum log variances for 4x4
5427
  // sub blocks in the current block.
5428
5429
6.12M
  const MACROBLOCKD *const xd = &x->e_mbd;
5430
6.12M
  const int is_hbd = is_cur_buf_hbd(xd);
5431
6.12M
  const int right_overflow =
5432
6.12M
      (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
5433
6.12M
  const int bottom_overflow =
5434
6.12M
      (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
5435
6.12M
  const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow;
5436
6.12M
  const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
5437
5438
  // Initialize minimum variance to a large value and maximum variance to 0.
5439
6.12M
  double min_var_4x4 = (double)INT_MAX;
5440
6.12M
  double max_var_4x4 = 0.0;
5441
5442
6.12M
  aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf;
5443
18.4M
  for (int i = 0; i < bh; i += MI_SIZE) {
5444
61.8M
    for (int j = 0; j < bw; j += MI_SIZE) {
5445
49.5M
      int var;
5446
      // Calculate the 4x4 sub-block variance.
5447
49.5M
      var = av1_calc_normalized_variance(
5448
49.5M
          vf, x->plane[0].src.buf + (i * x->plane[0].src.stride) + j,
5449
49.5M
          x->plane[0].src.stride, is_hbd);
5450
5451
      // Record min and max for over-arching block
5452
49.5M
      min_var_4x4 = AOMMIN(min_var_4x4, var);
5453
49.5M
      max_var_4x4 = AOMMAX(max_var_4x4, var);
5454
49.5M
    }
5455
12.2M
  }
5456
6.12M
  *var_min = log1p(min_var_4x4 / 16.0);
5457
6.12M
  *var_max = log1p(max_var_4x4 / 16.0);
5458
6.12M
}
5459
5460
static inline void set_sms_tree_partitioning(SIMPLE_MOTION_DATA_TREE *sms_tree,
5461
17.3M
                                             PARTITION_TYPE partition) {
5462
17.3M
  if (sms_tree == NULL) return;
5463
5.25M
  sms_tree->partitioning = partition;
5464
5.25M
}
5465
5466
/*!\brief AV1 block partition search (full search).
5467
*
5468
* \ingroup partition_search
5469
* \callgraph
5470
* Searches for the best partition pattern for a block based on the
5471
* rate-distortion cost, and returns a bool value to indicate whether a valid
5472
* partition pattern is found. The partition can recursively go down to the
5473
* smallest block size.
5474
*
5475
* \param[in]    cpi                Top-level encoder structure
5476
* \param[in]    td                 Pointer to thread data
5477
* \param[in]    tile_data          Pointer to struct holding adaptive
5478
data/contexts/models for the tile during
5479
encoding
5480
* \param[in]    tp                 Pointer to the starting token
5481
* \param[in]    mi_row             Row coordinate of the block in a step size
5482
of MI_SIZE
5483
* \param[in]    mi_col             Column coordinate of the block in a step
5484
size of MI_SIZE
5485
* \param[in]    bsize              Current block size
5486
* \param[in]    rd_cost            Pointer to the final rd cost of the block
5487
* \param[in]    best_rdc           Upper bound of rd cost of a valid partition
5488
* \param[in]    pc_tree            Pointer to the PC_TREE node storing the
5489
picked partitions and mode info for the
5490
current block
5491
* \param[in]    sms_tree           Pointer to struct holding simple motion
5492
search data for the current block
5493
* \param[in]    none_rd            Pointer to the rd cost in the case of not
5494
splitting the current block
5495
* \param[in]    multi_pass_mode    SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS
5496
* \param[in]    rect_part_win_info Pointer to struct storing whether horz/vert
5497
partition outperforms previously tested
5498
partitions
5499
*
5500
* \return A bool value is returned indicating if a valid partition is found.
5501
* The pc_tree struct is modified to store the picked partition and modes.
5502
* The rd_cost struct is also updated with the RD stats corresponding to the
5503
* best partition found.
5504
*/
5505
bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
5506
                           TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
5507
                           int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost,
5508
                           RD_STATS best_rdc, PC_TREE *pc_tree,
5509
                           SIMPLE_MOTION_DATA_TREE *sms_tree, int64_t *none_rd,
5510
                           SB_MULTI_PASS_MODE multi_pass_mode,
5511
8.65M
                           RD_RECT_PART_WIN_INFO *rect_part_win_info) {
5512
8.65M
  const AV1_COMMON *const cm = &cpi->common;
5513
8.65M
  const int num_planes = av1_num_planes(cm);
5514
8.65M
  TileInfo *const tile_info = &tile_data->tile_info;
5515
8.65M
  MACROBLOCK *const x = &td->mb;
5516
8.65M
  MACROBLOCKD *const xd = &x->e_mbd;
5517
8.65M
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
5518
8.65M
  const TokenExtra *const tp_orig = *tp;
5519
8.65M
  PartitionSearchState part_search_state;
5520
5521
  // Initialization of state variables used in partition search.
5522
8.65M
  init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col,
5523
8.65M
                                     bsize);
5524
8.65M
  PartitionBlkParams blk_params = part_search_state.part_blk_params;
5525
5526
8.65M
  set_sms_tree_partitioning(sms_tree, PARTITION_NONE);
5527
8.65M
  if (best_rdc.rdcost < 0) {
5528
0
    av1_invalid_rd_stats(rd_cost);
5529
0
    return part_search_state.found_best_partition;
5530
0
  }
5531
8.65M
  if (bsize == cm->seq_params->sb_size) x->must_find_valid_partition = 0;
5532
5533
  // Override skipping rectangular partition operations for edge blocks.
5534
8.65M
  if (none_rd) *none_rd = 0;
5535
8.65M
  (void)*tp_orig;
5536
5537
#if CONFIG_COLLECT_PARTITION_STATS
5538
  // Stats at the current quad tree
5539
  PartitionTimingStats *part_timing_stats =
5540
      &part_search_state.part_timing_stats;
5541
  // Stats aggregated at frame level
5542
  FramePartitionTimingStats *fr_part_timing_stats = &cpi->partition_stats;
5543
#endif  // CONFIG_COLLECT_PARTITION_STATS
5544
5545
  // Override partition costs at the edges of the frame in the same
5546
  // way as in read_partition (see decodeframe.c).
5547
8.65M
  if (!av1_blk_has_rows_and_cols(&blk_params))
5548
807k
    set_partition_cost_for_edge_blk(cm, &part_search_state);
5549
5550
  // Disable rectangular partitions for inner blocks when the current block is
5551
  // forced to only use square partitions.
5552
8.65M
  if (bsize > cpi->sf.part_sf.use_square_partition_only_threshold) {
5553
696k
    part_search_state.partition_rect_allowed[HORZ] &= !blk_params.has_rows;
5554
696k
    part_search_state.partition_rect_allowed[VERT] &= !blk_params.has_cols;
5555
696k
  }
5556
5557
#ifndef NDEBUG
5558
  // Nothing should rely on the default value of this array (which is just
5559
  // leftover from encoding the previous block. Setting it to fixed pattern
5560
  // when debugging.
5561
  // bit 0, 1, 2 are blk_skip of each plane
5562
  // bit 4, 5, 6 are initialization checking of each plane
5563
  memset(x->txfm_search_info.blk_skip, 0x77,
5564
         sizeof(x->txfm_search_info.blk_skip));
5565
#endif  // NDEBUG
5566
5567
8.65M
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);
5568
5569
  // Set buffers and offsets.
5570
8.65M
  av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
5571
5572
8.65M
  if (cpi->oxcf.mode == ALLINTRA) {
5573
6.02M
    if (bsize == cm->seq_params->sb_size) {
5574
98.6k
      double var_min, var_max;
5575
98.6k
      log_sub_block_var(cpi, x, bsize, &var_min, &var_max);
5576
5577
98.6k
      x->intra_sb_rdmult_modifier = 128;
5578
98.6k
      if ((var_min < 2.0) && (var_max > 4.0)) {
5579
22.1k
        if ((var_max - var_min) > 8.0) {
5580
21.0k
          x->intra_sb_rdmult_modifier -= 48;
5581
21.0k
        } else {
5582
1.09k
          x->intra_sb_rdmult_modifier -= (int)((var_max - var_min) * 6);
5583
1.09k
        }
5584
22.1k
      }
5585
98.6k
    }
5586
6.02M
  }
5587
5588
  // Save rdmult before it might be changed, so it can be restored later.
5589
8.65M
  const int orig_rdmult = x->rdmult;
5590
8.65M
  setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
5591
5592
  // Apply simple motion search for the entire super block with fixed block
5593
  // size, e.g., 16x16, to collect features and write to files for the
5594
  // external ML model.
5595
  // TODO(chengchen): reduce motion search. This function is similar to
5596
  // av1_get_max_min_partition_features().
5597
8.65M
  if (COLLECT_MOTION_SEARCH_FEATURE_SB && !frame_is_intra_only(cm) &&
5598
0
      bsize == cm->seq_params->sb_size) {
5599
0
    av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
5600
0
                                          bsize, /*features=*/NULL);
5601
0
    collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, /*features=*/NULL);
5602
0
  }
5603
5604
  // Update rd cost of the bound using the current multiplier.
5605
8.65M
  av1_rd_cost_update(x->rdmult, &best_rdc);
5606
5607
8.65M
  if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
5608
0
    x->mb_energy = av1_log_block_var(cpi, x, bsize);
5609
5610
  // Set the context.
5611
8.65M
  xd->above_txfm_context =
5612
8.65M
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
5613
8.65M
  xd->left_txfm_context =
5614
8.65M
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
5615
8.65M
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
5616
5617
#if CONFIG_COLLECT_COMPONENT_TIMING
5618
  start_timing(cpi, av1_prune_partitions_time);
5619
#endif
5620
  // Pruning: before searching any partition type, using source and simple
5621
  // motion search results to prune out unlikely partitions.
5622
8.65M
  av1_prune_partitions_before_search(cpi, x, sms_tree, &part_search_state);
5623
5624
  // Pruning: eliminating partition types leading to coding block sizes outside
5625
  // the min and max bsize limitations set from the encoder.
5626
8.65M
  av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state);
5627
#if CONFIG_COLLECT_COMPONENT_TIMING
5628
  end_timing(cpi, av1_prune_partitions_time);
5629
#endif
5630
5631
  // Partition search
5632
8.65M
BEGIN_PARTITION_SEARCH:
5633
  // If a valid partition is required, usually when the first round cannot find
5634
  // a valid one under the cost limit after pruning, reset the limitations on
5635
  // partition types and intra cnn output.
5636
8.65M
  if (x->must_find_valid_partition) {
5637
0
    reset_part_limitations(cpi, &part_search_state);
5638
0
    av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state);
5639
    // Invalidate intra cnn output for key frames.
5640
0
    if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
5641
0
      part_search_state.intra_part_info->quad_tree_idx = 0;
5642
0
      part_search_state.intra_part_info->cnn_output_valid = 0;
5643
0
    }
5644
0
  }
5645
  // Partition block source pixel variance.
5646
8.65M
  unsigned int pb_source_variance = UINT_MAX;
5647
5648
#if CONFIG_COLLECT_COMPONENT_TIMING
5649
  start_timing(cpi, none_partition_search_time);
5650
#endif
5651
5652
8.65M
  if (cpi->oxcf.mode == ALLINTRA) {
5653
6.02M
    const bool bsize_at_least_16x16 = (bsize >= BLOCK_16X16);
5654
6.02M
    const bool prune_rect_part_using_4x4_var_deviation =
5655
6.02M
        (cpi->sf.part_sf.prune_rect_part_using_4x4_var_deviation &&
5656
6.02M
         !x->must_find_valid_partition);
5657
5658
6.02M
    if (bsize_at_least_16x16 || prune_rect_part_using_4x4_var_deviation) {
5659
6.02M
      double var_min, var_max;
5660
6.02M
      log_sub_block_var(cpi, x, bsize, &var_min, &var_max);
5661
5662
      // Further pruning or in some cases reverse pruning when allintra is set.
5663
      // This code helps visual and in some cases metrics quality where the
5664
      // current block comprises at least one very low variance sub-block and at
5665
      // least one where the variance is much higher.
5666
      //
5667
      // The idea is that in such cases there is danger of ringing and other
5668
      // visual artifacts from a high variance feature such as an edge into a
5669
      // very low variance region.
5670
      //
5671
      // The approach taken is to force break down / split to a smaller block
5672
      // size to try and separate out the low variance and well predicted blocks
5673
      // from the more complex ones and to prevent propagation of ringing over a
5674
      // large region.
5675
6.02M
      if (bsize_at_least_16x16 && (var_min < 0.272) &&
5676
77.8k
          ((var_max - var_min) > 3.0)) {
5677
71.0k
        part_search_state.partition_none_allowed = 0;
5678
71.0k
        part_search_state.terminate_partition_search = 0;
5679
71.0k
        part_search_state.do_square_split = 1;
5680
5.95M
      } else if (prune_rect_part_using_4x4_var_deviation &&
5681
5.95M
                 (var_max - var_min < 3.0)) {
5682
        // Prune rectangular partitions if the variance deviation of 4x4
5683
        // sub-blocks within the block is less than a threshold (derived
5684
        // empirically).
5685
5.85M
        part_search_state.do_rectangular_split = 0;
5686
5.85M
      }
5687
6.02M
    }
5688
6.02M
  }
5689
5690
  // PARTITION_NONE search stage.
5691
8.65M
  int64_t part_none_rd = INT64_MAX;
5692
8.65M
  none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
5693
8.65M
                        &part_search_state, &best_rdc, &pb_source_variance,
5694
8.65M
                        none_rd, &part_none_rd);
5695
5696
#if CONFIG_COLLECT_COMPONENT_TIMING
5697
  end_timing(cpi, none_partition_search_time);
5698
#endif
5699
#if CONFIG_COLLECT_COMPONENT_TIMING
5700
  start_timing(cpi, split_partition_search_time);
5701
#endif
5702
  // PARTITION_SPLIT search stage.
5703
8.65M
  int64_t part_split_rd = INT64_MAX;
5704
8.65M
  split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx,
5705
8.65M
                         &part_search_state, &best_rdc, multi_pass_mode,
5706
8.65M
                         &part_split_rd);
5707
#if CONFIG_COLLECT_COMPONENT_TIMING
5708
  end_timing(cpi, split_partition_search_time);
5709
#endif
5710
  // Terminate partition search for child partition,
5711
  // when NONE and SPLIT partition rd_costs are INT64_MAX.
5712
8.65M
  if (cpi->sf.part_sf.early_term_after_none_split &&
5713
8.65M
      part_none_rd == INT64_MAX && part_split_rd == INT64_MAX &&
5714
690k
      !x->must_find_valid_partition && (bsize != cm->seq_params->sb_size)) {
5715
690k
    part_search_state.terminate_partition_search = 1;
5716
690k
  }
5717
5718
  // Do not evaluate non-square partitions if NONE partition did not choose a
5719
  // newmv mode and is skippable.
5720
8.65M
  if ((cpi->sf.part_sf.skip_non_sq_part_based_on_none >= 2) &&
5721
0
      (pc_tree->none != NULL)) {
5722
0
    if (x->qindex <= 200 && is_inter_mode(pc_tree->none->mic.mode) &&
5723
0
        !have_newmv_in_inter_mode(pc_tree->none->mic.mode) &&
5724
0
        pc_tree->none->skippable && !x->must_find_valid_partition &&
5725
0
        bsize >= BLOCK_16X16)
5726
0
      part_search_state.do_rectangular_split = 0;
5727
0
  }
5728
5729
  // Prune partitions based on PARTITION_NONE and PARTITION_SPLIT.
5730
8.65M
  prune_partitions_after_split(cpi, x, sms_tree, &part_search_state, &best_rdc,
5731
8.65M
                               part_none_rd, part_split_rd);
5732
#if CONFIG_COLLECT_COMPONENT_TIMING
5733
  start_timing(cpi, rectangular_partition_search_time);
5734
#endif
5735
  // Rectangular partitions search stage.
5736
8.65M
  rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
5737
8.65M
                               &part_search_state, &best_rdc,
5738
8.65M
                               rect_part_win_info, HORZ, VERT);
5739
#if CONFIG_COLLECT_COMPONENT_TIMING
5740
  end_timing(cpi, rectangular_partition_search_time);
5741
#endif
5742
5743
8.65M
  if (pb_source_variance == UINT_MAX) {
5744
1.17M
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
5745
1.17M
    pb_source_variance = av1_get_perpixel_variance_facade(
5746
1.17M
        cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
5747
1.17M
  }
5748
5749
8.65M
  assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
5750
8.65M
                 !part_search_state.do_rectangular_split));
5751
5752
8.65M
  const int prune_ext_part_state = prune_ext_part_none_skippable(
5753
8.65M
      pc_tree->none, x->must_find_valid_partition,
5754
8.65M
      cpi->sf.part_sf.skip_non_sq_part_based_on_none, bsize);
5755
5756
8.65M
  const int ab_partition_allowed = allow_ab_partition_search(
5757
8.65M
      &part_search_state, &cpi->sf.part_sf, pc_tree->partitioning,
5758
8.65M
      x->must_find_valid_partition, prune_ext_part_state, best_rdc.rdcost);
5759
5760
#if CONFIG_COLLECT_COMPONENT_TIMING
5761
  start_timing(cpi, ab_partitions_search_time);
5762
#endif
5763
  // AB partitions search stage.
5764
8.65M
  ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
5765
8.65M
                       &part_search_state, &best_rdc, rect_part_win_info,
5766
8.65M
                       pb_source_variance, ab_partition_allowed, HORZ_A,
5767
8.65M
                       VERT_B);
5768
#if CONFIG_COLLECT_COMPONENT_TIMING
5769
  end_timing(cpi, ab_partitions_search_time);
5770
#endif
5771
5772
  // 4-way partitions search stage.
5773
8.65M
  int part4_search_allowed[NUM_PART4_TYPES] = { 1, 1 };
5774
  // Prune 4-way partition search.
5775
8.65M
  prune_4_way_partition_search(cpi, x, pc_tree, &part_search_state, &best_rdc,
5776
8.65M
                               pb_source_variance, prune_ext_part_state,
5777
8.65M
                               part4_search_allowed);
5778
5779
#if CONFIG_COLLECT_COMPONENT_TIMING
5780
  start_timing(cpi, rd_pick_4partition_time);
5781
#endif
5782
  // PARTITION_HORZ_4
5783
8.65M
  assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
5784
8.65M
                 !part4_search_allowed[HORZ4]));
5785
8.65M
  if (!part_search_state.terminate_partition_search &&
5786
7.96M
      part4_search_allowed[HORZ4]) {
5787
0
    const int inc_step[NUM_PART4_TYPES] = { mi_size_high[blk_params.bsize] / 4,
5788
0
                                            0 };
5789
    // Evaluation of Horz4 partition type.
5790
0
    rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
5791
0
                       pc_tree->horizontal4, &part_search_state, &best_rdc,
5792
0
                       inc_step, PARTITION_HORZ_4);
5793
0
  }
5794
5795
  // PARTITION_VERT_4
5796
8.65M
  assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
5797
8.65M
                 !part4_search_allowed[VERT4]));
5798
8.65M
  if (!part_search_state.terminate_partition_search &&
5799
7.96M
      part4_search_allowed[VERT4] && blk_params.has_cols) {
5800
0
    const int inc_step[NUM_PART4_TYPES] = { 0, mi_size_wide[blk_params.bsize] /
5801
0
                                                   4 };
5802
    // Evaluation of Vert4 partition type.
5803
0
    rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
5804
0
                       pc_tree->vertical4, &part_search_state, &best_rdc,
5805
0
                       inc_step, PARTITION_VERT_4);
5806
0
  }
5807
#if CONFIG_COLLECT_COMPONENT_TIMING
5808
  end_timing(cpi, rd_pick_4partition_time);
5809
#endif
5810
5811
8.65M
  if (bsize == cm->seq_params->sb_size &&
5812
201k
      !part_search_state.found_best_partition) {
5813
    // Did not find a valid partition, go back and search again, with less
5814
    // constraint on which partition types to search.
5815
0
    x->must_find_valid_partition = 1;
5816
#if CONFIG_COLLECT_PARTITION_STATS
5817
    fr_part_timing_stats->partition_redo += 1;
5818
#endif  // CONFIG_COLLECT_PARTITION_STATS
5819
0
    goto BEGIN_PARTITION_SEARCH;
5820
0
  }
5821
5822
  // Store the final rd cost
5823
8.65M
  *rd_cost = best_rdc;
5824
5825
  // Also record the best partition in simple motion data tree because it is
5826
  // necessary for the related speed features.
5827
8.65M
  set_sms_tree_partitioning(sms_tree, pc_tree->partitioning);
5828
5829
#if CONFIG_COLLECT_PARTITION_STATS
5830
  if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
5831
    part_timing_stats->partition_decisions[pc_tree->partitioning] += 1;
5832
  }
5833
5834
  // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
5835
  // prediction block.
5836
  print_partition_timing_stats_with_rdcost(
5837
      part_timing_stats, mi_row, mi_col, bsize,
5838
      cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
5839
      cm->current_frame.frame_number, &best_rdc, "part_timing.csv");
5840
  const bool print_timing_stats = false;
5841
  if (print_timing_stats) {
5842
    print_partition_timing_stats(part_timing_stats, cm->show_frame,
5843
                                 frame_is_intra_only(cm), bsize,
5844
                                 "part_timing_data.csv");
5845
  }
5846
  // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
5847
  // the whole clip. So we need to pass the information upstream to the encoder.
5848
  accumulate_partition_timing_stats(fr_part_timing_stats, part_timing_stats,
5849
                                    bsize);
5850
#endif  // CONFIG_COLLECT_PARTITION_STATS
5851
5852
  // Reset the PC_TREE deallocation flag.
5853
8.65M
  int pc_tree_dealloc = 0;
5854
5855
#if CONFIG_COLLECT_COMPONENT_TIMING
5856
  start_timing(cpi, encode_sb_time);
5857
#endif
5858
8.65M
  if (part_search_state.found_best_partition) {
5859
7.62M
    if (bsize == cm->seq_params->sb_size) {
5860
      // Encode the superblock.
5861
201k
      const int emit_output = multi_pass_mode != SB_DRY_PASS;
5862
201k
      const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL;
5863
5864
      // Write partition tree to file. Not used by default.
5865
201k
      if (COLLECT_MOTION_SEARCH_FEATURE_SB) {
5866
0
        write_partition_tree(cpi, pc_tree, bsize, mi_row, mi_col);
5867
0
        ++cpi->sb_counter;
5868
0
      }
5869
5870
201k
      set_cb_offsets(x->cb_offset, 0, 0);
5871
201k
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize,
5872
201k
                pc_tree, NULL);
5873
201k
      assert(pc_tree == td->pc_root);
5874
      // Dealloc the whole PC_TREE after a superblock is done.
5875
201k
      av1_free_pc_tree_recursive(pc_tree, num_planes, 0, 0,
5876
201k
                                 cpi->sf.part_sf.partition_search_type);
5877
201k
      pc_tree = NULL;
5878
201k
      td->pc_root = NULL;
5879
201k
      pc_tree_dealloc = 1;
5880
7.42M
    } else if (should_do_dry_run_encode_for_current_block(
5881
7.42M
                   cm->seq_params->sb_size, x->sb_enc.max_partition_size,
5882
7.42M
                   pc_tree->index, bsize)) {
5883
      // Encode the smaller blocks in DRY_RUN mode.
5884
6.64M
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
5885
6.64M
                pc_tree, NULL);
5886
6.64M
    }
5887
7.62M
  }
5888
#if CONFIG_COLLECT_COMPONENT_TIMING
5889
  end_timing(cpi, encode_sb_time);
5890
#endif
5891
5892
  // If the tree still exists (non-superblock), dealloc most nodes, only keep
5893
  // nodes for the best partition and PARTITION_NONE.
5894
8.65M
  if (pc_tree_dealloc == 0)
5895
8.45M
    av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1,
5896
8.45M
                               cpi->sf.part_sf.partition_search_type);
5897
5898
8.65M
  if (bsize == cm->seq_params->sb_size) {
5899
201k
    assert(best_rdc.rate < INT_MAX);
5900
201k
    assert(best_rdc.dist < INT64_MAX);
5901
8.45M
  } else {
5902
8.45M
    assert(tp_orig == *tp);
5903
8.45M
  }
5904
5905
  // Restore the rd multiplier.
5906
8.65M
  x->rdmult = orig_rdmult;
5907
8.65M
  return part_search_state.found_best_partition;
5908
8.65M
}
5909
#endif  // !CONFIG_REALTIME_ONLY
5910
5911
#undef COLLECT_MOTION_SEARCH_FEATURE_SB
5912
5913
#if CONFIG_RT_ML_PARTITIONING
5914
#define FEATURES 6
5915
#define LABELS 2
5916
static int ml_predict_var_partitioning(AV1_COMP *cpi, MACROBLOCK *x,
5917
                                       BLOCK_SIZE bsize, int mi_row,
5918
                                       int mi_col) {
5919
  AV1_COMMON *const cm = &cpi->common;
5920
  const NN_CONFIG *nn_config = NULL;
5921
  const float *means = NULL;
5922
  const float *vars = NULL;
5923
  switch (bsize) {
5924
    case BLOCK_64X64:
5925
      nn_config = &av1_var_part_nnconfig_64;
5926
      means = av1_var_part_means_64;
5927
      vars = av1_var_part_vars_64;
5928
      break;
5929
    case BLOCK_32X32:
5930
      nn_config = &av1_var_part_nnconfig_32;
5931
      means = av1_var_part_means_32;
5932
      vars = av1_var_part_vars_32;
5933
      break;
5934
    case BLOCK_16X16:
5935
      nn_config = &av1_var_part_nnconfig_16;
5936
      means = av1_var_part_means_16;
5937
      vars = av1_var_part_vars_16;
5938
      break;
5939
    case BLOCK_8X8:
5940
    default: assert(0 && "Unexpected block size."); return -1;
5941
  }
5942
5943
  if (!nn_config) return -1;
5944
5945
  {
5946
    const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
5947
    float features[FEATURES] = { 0.0f };
5948
    const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
5949
                                      cm->seq_params->bit_depth);
5950
    int feature_idx = 0;
5951
    float score[LABELS];
5952
5953
    features[feature_idx] =
5954
        (log1pf((float)(dc_q * dc_q) / 256.0f) - means[feature_idx]) /
5955
        sqrtf(vars[feature_idx]);
5956
    feature_idx++;
5957
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1, bsize);
5958
    {
5959
      const int bs = block_size_wide[bsize];
5960
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
5961
      const int sb_offset_row = 4 * (mi_row & 15);
5962
      const int sb_offset_col = 4 * (mi_col & 15);
5963
      const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
5964
      const uint8_t *src = x->plane[0].src.buf;
5965
      const int src_stride = x->plane[0].src.stride;
5966
      const int pred_stride = 64;
5967
      unsigned int sse;
5968
      int i;
5969
      // Variance of whole block.
5970
      const unsigned int var =
5971
          cpi->ppi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
5972
      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
5973
5974
      features[feature_idx] =
5975
          (log1pf((float)var) - means[feature_idx]) / sqrtf(vars[feature_idx]);
5976
      feature_idx++;
5977
      for (i = 0; i < 4; ++i) {
5978
        const int x_idx = (i & 1) * bs / 2;
5979
        const int y_idx = (i >> 1) * bs / 2;
5980
        const int src_offset = y_idx * src_stride + x_idx;
5981
        const int pred_offset = y_idx * pred_stride + x_idx;
5982
        // Variance of quarter block.
5983
        const unsigned int sub_var =
5984
            cpi->ppi->fn_ptr[subsize].vf(src + src_offset, src_stride,
5985
                                         pred + pred_offset, pred_stride, &sse);
5986
        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
5987
        features[feature_idx] =
5988
            (var_ratio - means[feature_idx]) / sqrtf(vars[feature_idx]);
5989
        feature_idx++;
5990
      }
5991
    }
5992
    //    for (int i = 0; i<FEATURES; i++)
5993
    //      printf("F_%d, %f; ", i, features[i]);
5994
    assert(feature_idx == FEATURES);
5995
    av1_nn_predict(features, nn_config, 1, score);
5996
    //    printf("Score %f, thr %f ", (float)score[0], thresh);
5997
    if (score[0] > thresh) return PARTITION_SPLIT;
5998
    if (score[0] < -thresh) return PARTITION_NONE;
5999
    return -1;
6000
  }
6001
}
6002
#undef FEATURES
6003
#undef LABELS
6004
6005
// Uncomment for collecting data for ML-based partitioning
6006
// #define _COLLECT_GROUND_TRUTH_
6007
6008
#ifdef _COLLECT_GROUND_TRUTH_
6009
static int store_partition_data(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
6010
                                int mi_row, int mi_col, PARTITION_TYPE part) {
6011
  AV1_COMMON *const cm = &cpi->common;
6012
  char fname[128];
6013
  switch (bsize) {
6014
    case BLOCK_64X64: sprintf(fname, "data_64x64.txt"); break;
6015
    case BLOCK_32X32: sprintf(fname, "data_32x32.txt"); break;
6016
    case BLOCK_16X16: sprintf(fname, "data_16x16.txt"); break;
6017
    case BLOCK_8X8: sprintf(fname, "data_8x8.txt"); break;
6018
    default: assert(0 && "Unexpected block size."); return -1;
6019
  }
6020
6021
  float features[6];  // DC_Q, VAR, VAR_RATIO-0..3
6022
6023
  FILE *f = fopen(fname, "a");
6024
6025
  {
6026
    const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
6027
                                      cm->seq_params->bit_depth);
6028
    int feature_idx = 0;
6029
6030
    features[feature_idx++] = log1pf((float)(dc_q * dc_q) / 256.0f);
6031
    av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1, bsize);
6032
    {
6033
      const int bs = block_size_wide[bsize];
6034
      const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
6035
      const int sb_offset_row = 4 * (mi_row & 15);
6036
      const int sb_offset_col = 4 * (mi_col & 15);
6037
      const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
6038
      const uint8_t *src = x->plane[0].src.buf;
6039
      const int src_stride = x->plane[0].src.stride;
6040
      const int pred_stride = 64;
6041
      unsigned int sse;
6042
      int i;
6043
      // Variance of whole block.
6044
      /*
6045
                if (bs == 8)
6046
                {
6047
                  int r, c;
6048
                  printf("%d %d\n", mi_row, mi_col);
6049
                  for (r = 0; r < bs; ++r) {
6050
                    for (c = 0; c < bs; ++c) {
6051
                      printf("%3d ",
6052
                             src[r * src_stride + c] - pred[64 * r + c]);
6053
                    }
6054
                    printf("\n");
6055
                  }
6056
                  printf("\n");
6057
                }
6058
      */
6059
      const unsigned int var =
6060
          cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
6061
      const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
6062
6063
      features[feature_idx++] = log1pf((float)var);
6064
6065
      fprintf(f, "%f,%f,", features[0], features[1]);
6066
      for (i = 0; i < 4; ++i) {
6067
        const int x_idx = (i & 1) * bs / 2;
6068
        const int y_idx = (i >> 1) * bs / 2;
6069
        const int src_offset = y_idx * src_stride + x_idx;
6070
        const int pred_offset = y_idx * pred_stride + x_idx;
6071
        // Variance of quarter block.
6072
        const unsigned int sub_var =
6073
            cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
6074
                                    pred + pred_offset, pred_stride, &sse);
6075
        const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
6076
        features[feature_idx++] = var_ratio;
6077
        fprintf(f, "%f,", var_ratio);
6078
      }
6079
6080
      fprintf(f, "%d\n", part == PARTITION_NONE ? 0 : 1);
6081
    }
6082
6083
    fclose(f);
6084
    return -1;
6085
  }
6086
}
6087
#endif
6088
6089
static void duplicate_mode_info_in_sb(AV1_COMMON *cm, MACROBLOCKD *xd,
6090
                                      int mi_row, int mi_col,
6091
                                      BLOCK_SIZE bsize) {
6092
  const int block_width =
6093
      AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col);
6094
  const int block_height =
6095
      AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row);
6096
  const int mi_stride = xd->mi_stride;
6097
  MB_MODE_INFO *const src_mi = xd->mi[0];
6098
  int i, j;
6099
6100
  for (j = 0; j < block_height; ++j)
6101
    for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
6102
}
6103
6104
static inline void copy_mbmi_ext_frame_to_mbmi_ext(
6105
    MB_MODE_INFO_EXT *const mbmi_ext,
6106
    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_best, uint8_t ref_frame_type) {
6107
  memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
6108
         sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
6109
  memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
6110
         sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
6111
  mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
6112
  mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
6113
  memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
6114
         sizeof(mbmi_ext->global_mvs));
6115
}
6116
6117
static void fill_mode_info_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
6118
                              int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
6119
  AV1_COMMON *const cm = &cpi->common;
6120
  MACROBLOCKD *xd = &x->e_mbd;
6121
  int hbs = mi_size_wide[bsize] >> 1;
6122
  PARTITION_TYPE partition = pc_tree->partitioning;
6123
  BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
6124
6125
  assert(bsize >= BLOCK_8X8);
6126
6127
  if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols)
6128
    return;
6129
6130
  switch (partition) {
6131
    case PARTITION_NONE:
6132
      set_mode_info_offsets(&cm->mi_params, &cpi->mbmi_ext_info, x, xd, mi_row,
6133
                            mi_col);
6134
      *(xd->mi[0]) = pc_tree->none->mic;
6135
      copy_mbmi_ext_frame_to_mbmi_ext(
6136
          &x->mbmi_ext, &pc_tree->none->mbmi_ext_best, LAST_FRAME);
6137
      duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
6138
      break;
6139
    case PARTITION_SPLIT: {
6140
      fill_mode_info_sb(cpi, x, mi_row, mi_col, subsize, pc_tree->split[0]);
6141
      fill_mode_info_sb(cpi, x, mi_row, mi_col + hbs, subsize,
6142
                        pc_tree->split[1]);
6143
      fill_mode_info_sb(cpi, x, mi_row + hbs, mi_col, subsize,
6144
                        pc_tree->split[2]);
6145
      fill_mode_info_sb(cpi, x, mi_row + hbs, mi_col + hbs, subsize,
6146
                        pc_tree->split[3]);
6147
      break;
6148
    }
6149
    default: break;
6150
  }
6151
}
6152
6153
void av1_nonrd_pick_partition(AV1_COMP *cpi, ThreadData *td,
6154
                              TileDataEnc *tile_data, TokenExtra **tp,
6155
                              int mi_row, int mi_col, BLOCK_SIZE bsize,
6156
                              RD_STATS *rd_cost, int do_recon, int64_t best_rd,
6157
                              PC_TREE *pc_tree) {
6158
  AV1_COMMON *const cm = &cpi->common;
6159
  TileInfo *const tile_info = &tile_data->tile_info;
6160
  MACROBLOCK *const x = &td->mb;
6161
  MACROBLOCKD *const xd = &x->e_mbd;
6162
  const int hbs = mi_size_wide[bsize] >> 1;
6163
  TokenExtra *tp_orig = *tp;
6164
  const ModeCosts *mode_costs = &x->mode_costs;
6165
  RD_STATS this_rdc, best_rdc;
6166
  RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
6167
  int do_split = bsize > BLOCK_8X8;
6168
  // Override skipping rectangular partition operations for edge blocks
6169
  const int force_horz_split = (mi_row + 2 * hbs > cm->mi_params.mi_rows);
6170
  const int force_vert_split = (mi_col + 2 * hbs > cm->mi_params.mi_cols);
6171
6172
  int partition_none_allowed = !force_horz_split && !force_vert_split;
6173
6174
  assert(mi_size_wide[bsize] == mi_size_high[bsize]);  // Square partition only
6175
  assert(cm->seq_params->sb_size == BLOCK_64X64);      // Small SB so far
6176
6177
  (void)*tp_orig;
6178
6179
  av1_invalid_rd_stats(&best_rdc);
6180
  best_rdc.rdcost = best_rd;
6181
#ifndef _COLLECT_GROUND_TRUTH_
6182
  if (partition_none_allowed && do_split) {
6183
    const int ml_predicted_partition =
6184
        ml_predict_var_partitioning(cpi, x, bsize, mi_row, mi_col);
6185
    if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
6186
    if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
6187
  }
6188
#endif
6189
6190
  xd->above_txfm_context =
6191
      cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
6192
  xd->left_txfm_context =
6193
      xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
6194
  av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
6195
6196
  // PARTITION_NONE
6197
  if (partition_none_allowed) {
6198
    pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
6199
    if (!pc_tree->none)
6200
      aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
6201
                         "Failed to allocate PICK_MODE_CONTEXT");
6202
    PICK_MODE_CONTEXT *ctx = pc_tree->none;
6203
6204
// Flip for RDO based pick mode
6205
#if 0
6206
    RD_STATS dummy;
6207
    av1_invalid_rd_stats(&dummy);
6208
    pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
6209
                  PARTITION_NONE, bsize, ctx, dummy);
6210
#else
6211
    pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
6212
                        ctx);
6213
#endif
6214
    if (this_rdc.rate != INT_MAX) {
6215
      const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
6216
6217
      this_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
6218
      this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
6219
      if (this_rdc.rdcost < best_rdc.rdcost) {
6220
        best_rdc = this_rdc;
6221
        if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
6222
      }
6223
    }
6224
  }
6225
6226
  // PARTITION_SPLIT
6227
  if (do_split) {
6228
    RD_STATS sum_rdc;
6229
    const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
6230
6231
    av1_init_rd_stats(&sum_rdc);
6232
6233
    for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
6234
      pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
6235
      if (!pc_tree->split[i])
6236
        aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
6237
                           "Failed to allocate PC_TREE");
6238
      pc_tree->split[i]->index = i;
6239
    }
6240
6241
    int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
6242
    sum_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
6243
    sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
6244
    for (int i = 0;
6245
         i < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
6246
      const int x_idx = (i & 1) * hbs;
6247
      const int y_idx = (i >> 1) * hbs;
6248
6249
      if (mi_row + y_idx >= cm->mi_params.mi_rows ||
6250
          mi_col + x_idx >= cm->mi_params.mi_cols)
6251
        continue;
6252
      av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
6253
                               mi_col + x_idx, subsize, &this_rdc, i < 3,
6254
                               best_rdc.rdcost - sum_rdc.rdcost,
6255
                               pc_tree->split[i]);
6256
6257
      if (this_rdc.rate == INT_MAX) {
6258
        av1_invalid_rd_stats(&sum_rdc);
6259
      } else {
6260
        sum_rdc.rate += this_rdc.rate;
6261
        sum_rdc.dist += this_rdc.dist;
6262
        sum_rdc.rdcost += this_rdc.rdcost;
6263
      }
6264
    }
6265
    if (sum_rdc.rdcost < best_rdc.rdcost) {
6266
      best_rdc = sum_rdc;
6267
      pc_tree->partitioning = PARTITION_SPLIT;
6268
    }
6269
  }
6270
6271
#ifdef _COLLECT_GROUND_TRUTH_
6272
  store_partition_data(cpi, x, bsize, mi_row, mi_col, pc_tree->partitioning);
6273
#endif
6274
6275
  *rd_cost = best_rdc;
6276
6277
  av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
6278
6279
  if (best_rdc.rate == INT_MAX) {
6280
    av1_invalid_rd_stats(rd_cost);
6281
    return;
6282
  }
6283
6284
  // update mode info array
6285
  fill_mode_info_sb(cpi, x, mi_row, mi_col, bsize, pc_tree);
6286
6287
  if (do_recon) {
6288
    if (bsize == cm->seq_params->sb_size) {
6289
      // NOTE: To get estimate for rate due to the tokens, use:
6290
      // int rate_coeffs = 0;
6291
      // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
6292
      //           bsize, pc_tree, &rate_coeffs);
6293
      set_cb_offsets(x->cb_offset, 0, 0);
6294
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
6295
                pc_tree, NULL);
6296
    } else {
6297
      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
6298
                pc_tree, NULL);
6299
    }
6300
  }
6301
6302
  if (bsize == BLOCK_64X64 && do_recon) {
6303
    assert(best_rdc.rate < INT_MAX);
6304
    assert(best_rdc.dist < INT64_MAX);
6305
  } else {
6306
    assert(tp_orig == *tp);
6307
  }
6308
}
6309
#endif  // CONFIG_RT_ML_PARTITIONING