Coverage Report

Created: 2025-11-11 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/rdopt.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdint.h>
16
#include <string.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/blend.h"
24
#include "aom_mem/aom_mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_ports/mem.h"
27
28
#include "av1/common/av1_common_int.h"
29
#include "av1/common/cfl.h"
30
#include "av1/common/blockd.h"
31
#include "av1/common/common.h"
32
#include "av1/common/common_data.h"
33
#include "av1/common/entropy.h"
34
#include "av1/common/entropymode.h"
35
#include "av1/common/enums.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mvref_common.h"
38
#include "av1/common/obmc.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconinter.h"
42
#include "av1/common/reconintra.h"
43
#include "av1/common/scan.h"
44
#include "av1/common/seg_common.h"
45
#include "av1/common/txb_common.h"
46
#include "av1/common/warped_motion.h"
47
48
#include "av1/encoder/aq_variance.h"
49
#include "av1/encoder/av1_quantize.h"
50
#include "av1/encoder/block.h"
51
#include "av1/encoder/cost.h"
52
#include "av1/encoder/compound_type.h"
53
#include "av1/encoder/encodemb.h"
54
#include "av1/encoder/encodemv.h"
55
#include "av1/encoder/encoder.h"
56
#include "av1/encoder/encodetxb.h"
57
#include "av1/encoder/hybrid_fwd_txfm.h"
58
#include "av1/encoder/interp_search.h"
59
#include "av1/encoder/intra_mode_search.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/mcomp.h"
62
#include "av1/encoder/ml.h"
63
#include "av1/encoder/mode_prune_model_weights.h"
64
#include "av1/encoder/model_rd.h"
65
#include "av1/encoder/motion_search_facade.h"
66
#include "av1/encoder/palette.h"
67
#include "av1/encoder/pustats.h"
68
#include "av1/encoder/random.h"
69
#include "av1/encoder/ratectrl.h"
70
#include "av1/encoder/rd.h"
71
#include "av1/encoder/rdopt.h"
72
#include "av1/encoder/reconinter_enc.h"
73
#include "av1/encoder/tokenize.h"
74
#include "av1/encoder/tpl_model.h"
75
#include "av1/encoder/tx_search.h"
76
#include "av1/encoder/var_based_part.h"
77
78
8.38M
#define LAST_NEW_MV_INDEX 6
79
80
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
81
// The values are kept in Q12 format and equation used to derive is
82
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
83
8.18M
#define MODE_THRESH_QBITS 12
84
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
85
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
86
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
87
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
88
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
89
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
90
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
91
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
92
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
93
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
94
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
95
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
96
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
97
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
98
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
99
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
100
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
101
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
102
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
103
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
104
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
105
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
106
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
107
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
108
  4144,  4120,  4096
109
};
110
111
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
112
  THR_NEARESTMV,
113
  THR_NEARESTL2,
114
  THR_NEARESTL3,
115
  THR_NEARESTB,
116
  THR_NEARESTA2,
117
  THR_NEARESTA,
118
  THR_NEARESTG,
119
120
  THR_NEWMV,
121
  THR_NEWL2,
122
  THR_NEWL3,
123
  THR_NEWB,
124
  THR_NEWA2,
125
  THR_NEWA,
126
  THR_NEWG,
127
128
  THR_NEARMV,
129
  THR_NEARL2,
130
  THR_NEARL3,
131
  THR_NEARB,
132
  THR_NEARA2,
133
  THR_NEARA,
134
  THR_NEARG,
135
136
  THR_GLOBALMV,
137
  THR_GLOBALL2,
138
  THR_GLOBALL3,
139
  THR_GLOBALB,
140
  THR_GLOBALA2,
141
  THR_GLOBALA,
142
  THR_GLOBALG,
143
144
  THR_COMP_NEAREST_NEARESTLA,
145
  THR_COMP_NEAREST_NEARESTL2A,
146
  THR_COMP_NEAREST_NEARESTL3A,
147
  THR_COMP_NEAREST_NEARESTGA,
148
  THR_COMP_NEAREST_NEARESTLB,
149
  THR_COMP_NEAREST_NEARESTL2B,
150
  THR_COMP_NEAREST_NEARESTL3B,
151
  THR_COMP_NEAREST_NEARESTGB,
152
  THR_COMP_NEAREST_NEARESTLA2,
153
  THR_COMP_NEAREST_NEARESTL2A2,
154
  THR_COMP_NEAREST_NEARESTL3A2,
155
  THR_COMP_NEAREST_NEARESTGA2,
156
  THR_COMP_NEAREST_NEARESTLL2,
157
  THR_COMP_NEAREST_NEARESTLL3,
158
  THR_COMP_NEAREST_NEARESTLG,
159
  THR_COMP_NEAREST_NEARESTBA,
160
161
  THR_COMP_NEAR_NEARLB,
162
  THR_COMP_NEW_NEWLB,
163
  THR_COMP_NEW_NEARESTLB,
164
  THR_COMP_NEAREST_NEWLB,
165
  THR_COMP_NEW_NEARLB,
166
  THR_COMP_NEAR_NEWLB,
167
  THR_COMP_GLOBAL_GLOBALLB,
168
169
  THR_COMP_NEAR_NEARLA,
170
  THR_COMP_NEW_NEWLA,
171
  THR_COMP_NEW_NEARESTLA,
172
  THR_COMP_NEAREST_NEWLA,
173
  THR_COMP_NEW_NEARLA,
174
  THR_COMP_NEAR_NEWLA,
175
  THR_COMP_GLOBAL_GLOBALLA,
176
177
  THR_COMP_NEAR_NEARL2A,
178
  THR_COMP_NEW_NEWL2A,
179
  THR_COMP_NEW_NEARESTL2A,
180
  THR_COMP_NEAREST_NEWL2A,
181
  THR_COMP_NEW_NEARL2A,
182
  THR_COMP_NEAR_NEWL2A,
183
  THR_COMP_GLOBAL_GLOBALL2A,
184
185
  THR_COMP_NEAR_NEARL3A,
186
  THR_COMP_NEW_NEWL3A,
187
  THR_COMP_NEW_NEARESTL3A,
188
  THR_COMP_NEAREST_NEWL3A,
189
  THR_COMP_NEW_NEARL3A,
190
  THR_COMP_NEAR_NEWL3A,
191
  THR_COMP_GLOBAL_GLOBALL3A,
192
193
  THR_COMP_NEAR_NEARGA,
194
  THR_COMP_NEW_NEWGA,
195
  THR_COMP_NEW_NEARESTGA,
196
  THR_COMP_NEAREST_NEWGA,
197
  THR_COMP_NEW_NEARGA,
198
  THR_COMP_NEAR_NEWGA,
199
  THR_COMP_GLOBAL_GLOBALGA,
200
201
  THR_COMP_NEAR_NEARL2B,
202
  THR_COMP_NEW_NEWL2B,
203
  THR_COMP_NEW_NEARESTL2B,
204
  THR_COMP_NEAREST_NEWL2B,
205
  THR_COMP_NEW_NEARL2B,
206
  THR_COMP_NEAR_NEWL2B,
207
  THR_COMP_GLOBAL_GLOBALL2B,
208
209
  THR_COMP_NEAR_NEARL3B,
210
  THR_COMP_NEW_NEWL3B,
211
  THR_COMP_NEW_NEARESTL3B,
212
  THR_COMP_NEAREST_NEWL3B,
213
  THR_COMP_NEW_NEARL3B,
214
  THR_COMP_NEAR_NEWL3B,
215
  THR_COMP_GLOBAL_GLOBALL3B,
216
217
  THR_COMP_NEAR_NEARGB,
218
  THR_COMP_NEW_NEWGB,
219
  THR_COMP_NEW_NEARESTGB,
220
  THR_COMP_NEAREST_NEWGB,
221
  THR_COMP_NEW_NEARGB,
222
  THR_COMP_NEAR_NEWGB,
223
  THR_COMP_GLOBAL_GLOBALGB,
224
225
  THR_COMP_NEAR_NEARLA2,
226
  THR_COMP_NEW_NEWLA2,
227
  THR_COMP_NEW_NEARESTLA2,
228
  THR_COMP_NEAREST_NEWLA2,
229
  THR_COMP_NEW_NEARLA2,
230
  THR_COMP_NEAR_NEWLA2,
231
  THR_COMP_GLOBAL_GLOBALLA2,
232
233
  THR_COMP_NEAR_NEARL2A2,
234
  THR_COMP_NEW_NEWL2A2,
235
  THR_COMP_NEW_NEARESTL2A2,
236
  THR_COMP_NEAREST_NEWL2A2,
237
  THR_COMP_NEW_NEARL2A2,
238
  THR_COMP_NEAR_NEWL2A2,
239
  THR_COMP_GLOBAL_GLOBALL2A2,
240
241
  THR_COMP_NEAR_NEARL3A2,
242
  THR_COMP_NEW_NEWL3A2,
243
  THR_COMP_NEW_NEARESTL3A2,
244
  THR_COMP_NEAREST_NEWL3A2,
245
  THR_COMP_NEW_NEARL3A2,
246
  THR_COMP_NEAR_NEWL3A2,
247
  THR_COMP_GLOBAL_GLOBALL3A2,
248
249
  THR_COMP_NEAR_NEARGA2,
250
  THR_COMP_NEW_NEWGA2,
251
  THR_COMP_NEW_NEARESTGA2,
252
  THR_COMP_NEAREST_NEWGA2,
253
  THR_COMP_NEW_NEARGA2,
254
  THR_COMP_NEAR_NEWGA2,
255
  THR_COMP_GLOBAL_GLOBALGA2,
256
257
  THR_COMP_NEAR_NEARLL2,
258
  THR_COMP_NEW_NEWLL2,
259
  THR_COMP_NEW_NEARESTLL2,
260
  THR_COMP_NEAREST_NEWLL2,
261
  THR_COMP_NEW_NEARLL2,
262
  THR_COMP_NEAR_NEWLL2,
263
  THR_COMP_GLOBAL_GLOBALLL2,
264
265
  THR_COMP_NEAR_NEARLL3,
266
  THR_COMP_NEW_NEWLL3,
267
  THR_COMP_NEW_NEARESTLL3,
268
  THR_COMP_NEAREST_NEWLL3,
269
  THR_COMP_NEW_NEARLL3,
270
  THR_COMP_NEAR_NEWLL3,
271
  THR_COMP_GLOBAL_GLOBALLL3,
272
273
  THR_COMP_NEAR_NEARLG,
274
  THR_COMP_NEW_NEWLG,
275
  THR_COMP_NEW_NEARESTLG,
276
  THR_COMP_NEAREST_NEWLG,
277
  THR_COMP_NEW_NEARLG,
278
  THR_COMP_NEAR_NEWLG,
279
  THR_COMP_GLOBAL_GLOBALLG,
280
281
  THR_COMP_NEAR_NEARBA,
282
  THR_COMP_NEW_NEWBA,
283
  THR_COMP_NEW_NEARESTBA,
284
  THR_COMP_NEAREST_NEWBA,
285
  THR_COMP_NEW_NEARBA,
286
  THR_COMP_NEAR_NEWBA,
287
  THR_COMP_GLOBAL_GLOBALBA,
288
289
  THR_DC,
290
  THR_PAETH,
291
  THR_SMOOTH,
292
  THR_SMOOTH_V,
293
  THR_SMOOTH_H,
294
  THR_H_PRED,
295
  THR_V_PRED,
296
  THR_D135_PRED,
297
  THR_D203_PRED,
298
  THR_D157_PRED,
299
  THR_D67_PRED,
300
  THR_D113_PRED,
301
  THR_D45_PRED,
302
};
303
304
/*!\cond */
305
typedef struct SingleInterModeState {
306
  int64_t rd;
307
  MV_REFERENCE_FRAME ref_frame;
308
  int valid;
309
} SingleInterModeState;
310
311
typedef struct InterModeSearchState {
312
  int64_t best_rd;
313
  int64_t best_skip_rd[2];
314
  MB_MODE_INFO best_mbmode;
315
  int best_rate_y;
316
  int best_rate_uv;
317
  int best_mode_skippable;
318
  int best_skip2;
319
  THR_MODES best_mode_index;
320
  int num_available_refs;
321
  int64_t dist_refs[REF_FRAMES];
322
  int dist_order_refs[REF_FRAMES];
323
  int64_t mode_threshold[MAX_MODES];
324
  int64_t best_intra_rd;
325
  unsigned int best_pred_sse;
326
327
  /*!
328
   * \brief Keep track of best intra rd for use in compound mode.
329
   */
330
  int64_t best_pred_rd[REFERENCE_MODES];
331
  // Save a set of single_newmv for each checked ref_mv.
332
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
333
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
335
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
336
  // The rd of simple translation in single inter modes
337
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
338
  int64_t best_single_rd[REF_FRAMES];
339
  PREDICTION_MODE best_single_mode[REF_FRAMES];
340
341
  // Single search results by [directions][modes][reference frames]
342
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
343
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
344
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
345
                                            [FWD_REFS];
346
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
347
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
348
  IntraModeSearchState intra_search_state;
349
  RD_STATS best_y_rdcost;
350
} InterModeSearchState;
351
/*!\endcond */
352
353
234k
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
354
5.38M
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
355
5.15M
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
356
5.15M
    md->ready = 0;
357
5.15M
    md->num = 0;
358
5.15M
    md->dist_sum = 0;
359
5.15M
    md->ld_sum = 0;
360
5.15M
    md->sse_sum = 0;
361
5.15M
    md->sse_sse_sum = 0;
362
5.15M
    md->sse_ld_sum = 0;
363
5.15M
  }
364
234k
}
365
366
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
367
                             int64_t sse, int *est_residue_cost,
368
130k
                             int64_t *est_dist) {
369
130k
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
370
130k
  if (md->ready) {
371
130k
    if (sse < md->dist_mean) {
372
43
      *est_residue_cost = 0;
373
43
      *est_dist = sse;
374
130k
    } else {
375
130k
      *est_dist = (int64_t)round(md->dist_mean);
376
130k
      const double est_ld = md->a * sse + md->b;
377
      // Clamp estimated rate cost by INT_MAX / 2.
378
      // TODO(angiebird@google.com): find better solution than clamping.
379
130k
      if (fabs(est_ld) < 1e-2) {
380
0
        *est_residue_cost = INT_MAX / 2;
381
130k
      } else {
382
130k
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
383
130k
        if (est_residue_cost_dbl < 0) {
384
24
          *est_residue_cost = 0;
385
130k
        } else {
386
130k
          *est_residue_cost =
387
130k
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
388
130k
        }
389
130k
      }
390
130k
      if (*est_residue_cost <= 0) {
391
24
        *est_residue_cost = 0;
392
24
        *est_dist = sse;
393
24
      }
394
130k
    }
395
130k
    return 1;
396
130k
  }
397
0
  return 0;
398
130k
}
399
400
21.8k
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
401
501k
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
402
479k
    const int block_idx = inter_mode_data_block_idx(bsize);
403
479k
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
404
479k
    if (block_idx == -1) continue;
405
370k
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
406
369k
      continue;
407
369k
    } else {
408
659
      if (md->ready == 0) {
409
142
        md->dist_mean = md->dist_sum / md->num;
410
142
        md->ld_mean = md->ld_sum / md->num;
411
142
        md->sse_mean = md->sse_sum / md->num;
412
142
        md->sse_sse_mean = md->sse_sse_sum / md->num;
413
142
        md->sse_ld_mean = md->sse_ld_sum / md->num;
414
517
      } else {
415
517
        const double factor = 3;
416
517
        md->dist_mean =
417
517
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
418
517
        md->ld_mean =
419
517
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
420
517
        md->sse_mean =
421
517
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
422
517
        md->sse_sse_mean =
423
517
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
424
517
            (factor + 1);
425
517
        md->sse_ld_mean =
426
517
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
427
517
            (factor + 1);
428
517
      }
429
430
659
      const double my = md->ld_mean;
431
659
      const double mx = md->sse_mean;
432
659
      const double dx = sqrt(md->sse_sse_mean);
433
659
      const double dxy = md->sse_ld_mean;
434
435
659
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
436
659
      md->b = my - md->a * mx;
437
659
      md->ready = 1;
438
439
659
      md->num = 0;
440
659
      md->dist_sum = 0;
441
659
      md->ld_sum = 0;
442
659
      md->sse_sum = 0;
443
659
      md->sse_sse_sum = 0;
444
659
      md->sse_ld_sum = 0;
445
659
    }
446
659
    (void)rdmult;
447
659
  }
448
21.8k
}
449
450
static inline void inter_mode_data_push(TileDataEnc *tile_data,
451
                                        BLOCK_SIZE bsize, int64_t sse,
452
523k
                                        int64_t dist, int residue_cost) {
453
523k
  if (residue_cost == 0 || sse == dist) return;
454
417k
  const int block_idx = inter_mode_data_block_idx(bsize);
455
417k
  if (block_idx == -1) return;
456
417k
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
457
417k
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
458
417k
    const double ld = (sse - dist) * 1. / residue_cost;
459
417k
    ++rd_model->num;
460
417k
    rd_model->dist_sum += dist;
461
417k
    rd_model->ld_sum += ld;
462
417k
    rd_model->sse_sum += sse;
463
417k
    rd_model->sse_sse_sum += (double)sse * (double)sse;
464
417k
    rd_model->sse_ld_sum += sse * ld;
465
417k
  }
466
417k
}
467
468
static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
469
                                         int mode_rate, int64_t sse, int64_t rd,
470
                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
471
                                         RD_STATS *rd_cost_uv,
472
178k
                                         const MB_MODE_INFO *mbmi) {
473
178k
  const int num = inter_modes_info->num;
474
178k
  assert(num < MAX_INTER_MODES);
475
178k
  inter_modes_info->mbmi_arr[num] = *mbmi;
476
178k
  inter_modes_info->mode_rate_arr[num] = mode_rate;
477
178k
  inter_modes_info->sse_arr[num] = sse;
478
178k
  inter_modes_info->est_rd_arr[num] = rd;
479
178k
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
480
178k
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
481
178k
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
482
178k
  ++inter_modes_info->num;
483
178k
}
484
485
234k
static int compare_rd_idx_pair(const void *a, const void *b) {
486
234k
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
487
    // To avoid inconsistency in qsort() ordering when two elements are equal,
488
    // using idx as tie breaker. Refer aomedia:2928
489
12
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
490
0
      return 0;
491
12
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
492
0
      return 1;
493
12
    else
494
12
      return -1;
495
234k
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
496
131k
    return 1;
497
131k
  } else {
498
103k
    return -1;
499
103k
  }
500
234k
}
501
502
static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
503
44.5k
                                         RdIdxPair *rd_idx_pair_arr) {
504
44.5k
  if (inter_modes_info->num == 0) {
505
2
    return;
506
2
  }
507
222k
  for (int i = 0; i < inter_modes_info->num; ++i) {
508
178k
    rd_idx_pair_arr[i].idx = i;
509
178k
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
510
178k
  }
511
44.5k
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
512
44.5k
        compare_rd_idx_pair);
513
44.5k
}
514
515
// Similar to get_horver_correlation, but also takes into account first
516
// row/column, when computing horizontal/vertical correlation.
517
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
518
                                       int width, int height, float *hcorr,
519
0
                                       float *vcorr) {
520
  // The following notation is used:
521
  // x - current pixel
522
  // y - left neighbor pixel
523
  // z - top neighbor pixel
524
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
525
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
526
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
527
528
  // First, process horizontal correlation on just the first row
529
0
  x_sum += diff[0];
530
0
  x2_sum += diff[0] * diff[0];
531
0
  x_firstrow += diff[0];
532
0
  x2_firstrow += diff[0] * diff[0];
533
0
  for (int j = 1; j < width; ++j) {
534
0
    const int16_t x = diff[j];
535
0
    const int16_t y = diff[j - 1];
536
0
    x_sum += x;
537
0
    x_firstrow += x;
538
0
    x2_sum += x * x;
539
0
    x2_firstrow += x * x;
540
0
    xy_sum += x * y;
541
0
  }
542
543
  // Process vertical correlation in the first column
544
0
  x_firstcol += diff[0];
545
0
  x2_firstcol += diff[0] * diff[0];
546
0
  for (int i = 1; i < height; ++i) {
547
0
    const int16_t x = diff[i * stride];
548
0
    const int16_t z = diff[(i - 1) * stride];
549
0
    x_sum += x;
550
0
    x_firstcol += x;
551
0
    x2_sum += x * x;
552
0
    x2_firstcol += x * x;
553
0
    xz_sum += x * z;
554
0
  }
555
556
  // Now process horiz and vert correlation through the rest unit
557
0
  for (int i = 1; i < height; ++i) {
558
0
    for (int j = 1; j < width; ++j) {
559
0
      const int16_t x = diff[i * stride + j];
560
0
      const int16_t y = diff[i * stride + j - 1];
561
0
      const int16_t z = diff[(i - 1) * stride + j];
562
0
      x_sum += x;
563
0
      x2_sum += x * x;
564
0
      xy_sum += x * y;
565
0
      xz_sum += x * z;
566
0
    }
567
0
  }
568
569
0
  for (int j = 0; j < width; ++j) {
570
0
    x_finalrow += diff[(height - 1) * stride + j];
571
0
    x2_finalrow +=
572
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
573
0
  }
574
0
  for (int i = 0; i < height; ++i) {
575
0
    x_finalcol += diff[i * stride + width - 1];
576
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
577
0
  }
578
579
0
  int64_t xhor_sum = x_sum - x_finalcol;
580
0
  int64_t xver_sum = x_sum - x_finalrow;
581
0
  int64_t y_sum = x_sum - x_firstcol;
582
0
  int64_t z_sum = x_sum - x_firstrow;
583
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
584
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
585
0
  int64_t y2_sum = x2_sum - x2_firstcol;
586
0
  int64_t z2_sum = x2_sum - x2_firstrow;
587
588
0
  const float num_hor = (float)(height * (width - 1));
589
0
  const float num_ver = (float)((height - 1) * width);
590
591
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
592
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
593
594
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
595
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
596
597
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
598
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
599
600
0
  if (xhor_var_n > 0 && y_var_n > 0) {
601
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
602
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
603
0
  } else {
604
0
    *hcorr = 1.0;
605
0
  }
606
0
  if (xver_var_n > 0 && z_var_n > 0) {
607
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
608
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
609
0
  } else {
610
0
    *vcorr = 1.0;
611
0
  }
612
0
}
613
614
static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var,
615
0
                                   int64_t *rec_var) {
616
0
  const MACROBLOCKD *xd = &x->e_mbd;
617
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
618
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
619
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
620
621
0
  BLOCK_SIZE bsize = mbmi->bsize;
622
0
  int bw = block_size_wide[bsize];
623
0
  int bh = block_size_high[bsize];
624
625
0
  static const int gau_filter[3][3] = {
626
0
    { 1, 2, 1 },
627
0
    { 2, 4, 2 },
628
0
    { 1, 2, 1 },
629
0
  };
630
631
0
  DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
632
633
0
  uint16_t *pred_ptr = &dclevel[bw + 1];
634
0
  int pred_stride = xd->plane[0].dst.stride;
635
636
0
  for (int idy = -1; idy < bh + 1; ++idy) {
637
0
    for (int idx = -1; idx < bw + 1; ++idx) {
638
0
      int offset_idy = idy;
639
0
      int offset_idx = idx;
640
0
      if (idy == -1) offset_idy = 0;
641
0
      if (idy == bh) offset_idy = bh - 1;
642
0
      if (idx == -1) offset_idx = 0;
643
0
      if (idx == bw) offset_idx = bw - 1;
644
645
0
      int offset = offset_idy * pred_stride + offset_idx;
646
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset];
647
0
    }
648
0
  }
649
650
0
  *rec_var = 0;
651
0
  for (int idy = 0; idy < bh; ++idy) {
652
0
    for (int idx = 0; idx < bw; ++idx) {
653
0
      int sum = 0;
654
0
      for (int iy = 0; iy < 3; ++iy)
655
0
        for (int ix = 0; ix < 3; ++ix)
656
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
657
0
                 gau_filter[iy][ix];
658
659
0
      sum = sum >> 4;
660
661
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
662
0
      *rec_var += diff * diff;
663
0
    }
664
0
  }
665
0
  *rec_var <<= 4;
666
667
0
  int src_stride = p->src.stride;
668
0
  for (int idy = -1; idy < bh + 1; ++idy) {
669
0
    for (int idx = -1; idx < bw + 1; ++idx) {
670
0
      int offset_idy = idy;
671
0
      int offset_idx = idx;
672
0
      if (idy == -1) offset_idy = 0;
673
0
      if (idy == bh) offset_idy = bh - 1;
674
0
      if (idx == -1) offset_idx = 0;
675
0
      if (idx == bw) offset_idx = bw - 1;
676
677
0
      int offset = offset_idy * src_stride + offset_idx;
678
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset];
679
0
    }
680
0
  }
681
682
0
  *src_var = 0;
683
0
  for (int idy = 0; idy < bh; ++idy) {
684
0
    for (int idx = 0; idx < bw; ++idx) {
685
0
      int sum = 0;
686
0
      for (int iy = 0; iy < 3; ++iy)
687
0
        for (int ix = 0; ix < 3; ++ix)
688
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
689
0
                 gau_filter[iy][ix];
690
691
0
      sum = sum >> 4;
692
693
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
694
0
      *src_var += diff * diff;
695
0
    }
696
0
  }
697
0
  *src_var <<= 4;
698
0
}
699
700
static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
701
0
                               int64_t *rec_var) {
702
0
  const MACROBLOCKD *xd = &x->e_mbd;
703
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
704
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
705
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
706
707
0
  BLOCK_SIZE bsize = mbmi->bsize;
708
0
  int bw = block_size_wide[bsize];
709
0
  int bh = block_size_high[bsize];
710
711
0
  static const int gau_filter[3][3] = {
712
0
    { 1, 2, 1 },
713
0
    { 2, 4, 2 },
714
0
    { 1, 2, 1 },
715
0
  };
716
717
0
  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
718
719
0
  uint8_t *pred_ptr = &dclevel[bw + 1];
720
0
  int pred_stride = xd->plane[0].dst.stride;
721
722
0
  for (int idy = -1; idy < bh + 1; ++idy) {
723
0
    for (int idx = -1; idx < bw + 1; ++idx) {
724
0
      int offset_idy = idy;
725
0
      int offset_idx = idx;
726
0
      if (idy == -1) offset_idy = 0;
727
0
      if (idy == bh) offset_idy = bh - 1;
728
0
      if (idx == -1) offset_idx = 0;
729
0
      if (idx == bw) offset_idx = bw - 1;
730
731
0
      int offset = offset_idy * pred_stride + offset_idx;
732
0
      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
733
0
    }
734
0
  }
735
736
0
  *rec_var = 0;
737
0
  for (int idy = 0; idy < bh; ++idy) {
738
0
    for (int idx = 0; idx < bw; ++idx) {
739
0
      int sum = 0;
740
0
      for (int iy = 0; iy < 3; ++iy)
741
0
        for (int ix = 0; ix < 3; ++ix)
742
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
743
0
                 gau_filter[iy][ix];
744
745
0
      sum = sum >> 4;
746
747
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
748
0
      *rec_var += diff * diff;
749
0
    }
750
0
  }
751
0
  *rec_var <<= 4;
752
753
0
  int src_stride = p->src.stride;
754
0
  for (int idy = -1; idy < bh + 1; ++idy) {
755
0
    for (int idx = -1; idx < bw + 1; ++idx) {
756
0
      int offset_idy = idy;
757
0
      int offset_idx = idx;
758
0
      if (idy == -1) offset_idy = 0;
759
0
      if (idy == bh) offset_idy = bh - 1;
760
0
      if (idx == -1) offset_idx = 0;
761
0
      if (idx == bw) offset_idx = bw - 1;
762
763
0
      int offset = offset_idy * src_stride + offset_idx;
764
0
      pred_ptr[idy * bw + idx] = p->src.buf[offset];
765
0
    }
766
0
  }
767
768
0
  *src_var = 0;
769
0
  for (int idy = 0; idy < bh; ++idy) {
770
0
    for (int idx = 0; idx < bw; ++idx) {
771
0
      int sum = 0;
772
0
      for (int iy = 0; iy < 3; ++iy)
773
0
        for (int ix = 0; ix < 3; ++ix)
774
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
775
0
                 gau_filter[iy][ix];
776
777
0
      sum = sum >> 4;
778
779
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
780
0
      *src_var += diff * diff;
781
0
    }
782
0
  }
783
0
  *src_var <<= 4;
784
0
}
785
786
static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
787
6.91M
                          RD_STATS *rd_cost) {
788
6.91M
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
789
790
110
  if (frame_is_kf_gf_arf(cpi)) return;
791
792
110
  int64_t src_var, rec_var;
793
794
110
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
795
110
  if (is_hbd)
796
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
797
110
  else
798
110
    get_variance_stats(x, &src_var, &rec_var);
799
800
110
  if (src_var <= rec_var) return;
801
802
110
  int64_t var_offset = src_var - rec_var;
803
804
110
  rd_cost->dist += var_offset;
805
806
110
  rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
807
110
}
808
809
static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
810
5.33M
                        int64_t *rd_cost) {
811
5.33M
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
812
813
59
  if (frame_is_kf_gf_arf(cpi)) return;
814
815
59
  int64_t src_var, rec_var;
816
59
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
817
818
59
  if (is_hbd)
819
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
820
59
  else
821
59
    get_variance_stats(x, &src_var, &rec_var);
822
823
59
  if (src_var <= rec_var) return;
824
825
59
  int64_t var_offset = src_var - rec_var;
826
827
59
  *rd_cost += RDCOST(x->rdmult, 0, var_offset);
828
59
}
829
830
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
831
1.55M
                       int64_t *sse_y) {
832
1.55M
  const AV1_COMMON *cm = &cpi->common;
833
1.55M
  const int num_planes = av1_num_planes(cm);
834
1.55M
  const MACROBLOCKD *xd = &x->e_mbd;
835
1.55M
  const MB_MODE_INFO *mbmi = xd->mi[0];
836
1.55M
  int64_t total_sse = 0;
837
4.79M
  for (int plane = 0; plane < num_planes; ++plane) {
838
3.24M
    if (plane && !xd->is_chroma_ref) break;
839
3.24M
    const struct macroblock_plane *const p = &x->plane[plane];
840
3.24M
    const struct macroblockd_plane *const pd = &xd->plane[plane];
841
3.24M
    const BLOCK_SIZE bs =
842
3.24M
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
843
3.24M
    unsigned int sse;
844
845
3.24M
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
846
3.24M
                            pd->dst.stride, &sse);
847
3.24M
    total_sse += sse;
848
3.24M
    if (!plane && sse_y) *sse_y = sse;
849
3.24M
  }
850
1.55M
  total_sse <<= 4;
851
1.55M
  return total_sse;
852
1.55M
}
853
854
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
855
0
                          intptr_t block_size, int64_t *ssz) {
856
0
  int i;
857
0
  int64_t error = 0, sqcoeff = 0;
858
859
0
  for (i = 0; i < block_size; i++) {
860
0
    const int diff = coeff[i] - dqcoeff[i];
861
0
    error += diff * diff;
862
0
    sqcoeff += coeff[i] * coeff[i];
863
0
  }
864
865
0
  *ssz = sqcoeff;
866
0
  return error;
867
0
}
868
869
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
870
0
                             intptr_t block_size) {
871
0
  int64_t error = 0;
872
873
0
  for (int i = 0; i < block_size; i++) {
874
0
    const int diff = coeff[i] - dqcoeff[i];
875
0
    error += diff * diff;
876
0
  }
877
878
0
  return error;
879
0
}
880
881
#if CONFIG_AV1_HIGHBITDEPTH
882
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
883
                                 const tran_low_t *dqcoeff, intptr_t block_size,
884
0
                                 int64_t *ssz, int bd) {
885
0
  int i;
886
0
  int64_t error = 0, sqcoeff = 0;
887
0
  int shift = 2 * (bd - 8);
888
0
  int rounding = (1 << shift) >> 1;
889
890
0
  for (i = 0; i < block_size; i++) {
891
0
    const int64_t diff = coeff[i] - dqcoeff[i];
892
0
    error += diff * diff;
893
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
894
0
  }
895
0
  error = (error + rounding) >> shift;
896
0
  sqcoeff = (sqcoeff + rounding) >> shift;
897
898
0
  *ssz = sqcoeff;
899
0
  return error;
900
0
}
901
#endif
902
903
static int conditional_skipintra(PREDICTION_MODE mode,
904
0
                                 PREDICTION_MODE best_intra_mode) {
905
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
906
0
      best_intra_mode != D135_PRED)
907
0
    return 1;
908
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
909
0
      best_intra_mode != D45_PRED)
910
0
    return 1;
911
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
912
0
      best_intra_mode != D45_PRED)
913
0
    return 1;
914
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
915
0
      best_intra_mode != D135_PRED)
916
0
    return 1;
917
0
  return 0;
918
0
}
919
920
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
921
6.20M
                       int16_t mode_context) {
922
6.20M
  if (is_inter_compound_mode(mode)) {
923
0
    return mode_costs
924
0
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
925
0
  }
926
927
6.20M
  int mode_cost = 0;
928
6.20M
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
929
930
6.20M
  assert(is_inter_mode(mode));
931
932
6.20M
  if (mode == NEWMV) {
933
1.42M
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
934
1.42M
    return mode_cost;
935
4.78M
  } else {
936
4.78M
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
937
4.78M
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
938
939
4.78M
    if (mode == GLOBALMV) {
940
1.42M
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
941
1.42M
      return mode_cost;
942
3.35M
    } else {
943
3.35M
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
944
3.35M
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
945
3.35M
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
946
3.35M
      return mode_cost;
947
3.35M
    }
948
4.78M
  }
949
6.20M
}
950
951
static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
952
9.43M
                                              int ref_idx) {
953
9.43M
  return ref_idx ? compound_ref1_mode(this_mode)
954
9.43M
                 : compound_ref0_mode(this_mode);
955
9.43M
}
956
957
static inline void estimate_ref_frame_costs(
958
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
959
    int segment_id, unsigned int *ref_costs_single,
960
932k
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
961
932k
  int seg_ref_active =
962
932k
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
963
932k
  if (seg_ref_active) {
964
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
965
0
    int ref_frame;
966
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
967
0
      memset(ref_costs_comp[ref_frame], 0,
968
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
969
932k
  } else {
970
932k
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
971
932k
    ref_costs_single[INTRA_FRAME] =
972
932k
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
973
932k
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
974
975
7.45M
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
976
6.52M
      ref_costs_single[i] = base_cost;
977
978
932k
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
979
932k
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
980
932k
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
981
932k
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
982
932k
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
983
932k
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
984
985
    // Determine cost of a single ref frame, where frame types are represented
986
    // by a tree:
987
    // Level 0: add cost whether this ref is a forward or backward ref
988
932k
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
989
932k
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
990
932k
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
991
932k
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
992
932k
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
993
932k
    ref_costs_single[ALTREF2_FRAME] +=
994
932k
        mode_costs->single_ref_cost[ctx_p1][0][1];
995
932k
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
996
997
    // Level 1: if this ref is forward ref,
998
    // add cost whether it is last/last2 or last3/golden
999
932k
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1000
932k
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1001
932k
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1002
932k
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1003
1004
    // Level 1: if this ref is backward ref
1005
    // then add cost whether this ref is altref or backward ref
1006
932k
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
1007
932k
    ref_costs_single[ALTREF2_FRAME] +=
1008
932k
        mode_costs->single_ref_cost[ctx_p2][1][0];
1009
932k
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
1010
1011
    // Level 2: further add cost whether this ref is last or last2
1012
932k
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
1013
932k
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
1014
1015
    // Level 2: last3 or golden
1016
932k
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
1017
932k
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
1018
1019
    // Level 2: bwdref or altref2
1020
932k
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
1021
932k
    ref_costs_single[ALTREF2_FRAME] +=
1022
932k
        mode_costs->single_ref_cost[ctx_p6][5][1];
1023
1024
932k
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
1025
      // Similar to single ref, determine cost of compound ref frames.
1026
      // cost_compound_refs = cost_first_ref + cost_second_ref
1027
932k
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
1028
932k
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
1029
932k
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
1030
932k
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
1031
932k
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
1032
1033
932k
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
1034
932k
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
1035
1036
932k
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
1037
932k
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
1038
932k
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
1039
932k
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
1040
932k
      ref_bicomp_costs[ALTREF_FRAME] = 0;
1041
1042
      // cost of first ref frame
1043
932k
      ref_bicomp_costs[LAST_FRAME] +=
1044
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1045
932k
      ref_bicomp_costs[LAST2_FRAME] +=
1046
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1047
932k
      ref_bicomp_costs[LAST3_FRAME] +=
1048
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1049
932k
      ref_bicomp_costs[GOLDEN_FRAME] +=
1050
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1051
1052
932k
      ref_bicomp_costs[LAST_FRAME] +=
1053
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
1054
932k
      ref_bicomp_costs[LAST2_FRAME] +=
1055
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
1056
1057
932k
      ref_bicomp_costs[LAST3_FRAME] +=
1058
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
1059
932k
      ref_bicomp_costs[GOLDEN_FRAME] +=
1060
932k
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
1061
1062
      // cost of second ref frame
1063
932k
      ref_bicomp_costs[BWDREF_FRAME] +=
1064
932k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1065
932k
      ref_bicomp_costs[ALTREF2_FRAME] +=
1066
932k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1067
932k
      ref_bicomp_costs[ALTREF_FRAME] +=
1068
932k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
1069
1070
932k
      ref_bicomp_costs[BWDREF_FRAME] +=
1071
932k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
1072
932k
      ref_bicomp_costs[ALTREF2_FRAME] +=
1073
932k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
1074
1075
      // cost: if one ref frame is forward ref, the other ref is backward ref
1076
932k
      int ref0, ref1;
1077
4.66M
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1078
14.9M
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
1079
11.1M
          ref_costs_comp[ref0][ref1] =
1080
11.1M
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
1081
11.1M
        }
1082
3.72M
      }
1083
1084
      // cost: if both ref frames are the same side.
1085
932k
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
1086
932k
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
1087
932k
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
1088
932k
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
1089
932k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1090
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1091
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
1092
932k
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
1093
932k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1094
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1095
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1096
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
1097
932k
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
1098
932k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1099
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1100
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1101
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
1102
932k
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
1103
932k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1104
932k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
1105
18.4E
    } else {
1106
18.4E
      int ref0, ref1;
1107
18.4E
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1108
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
1109
0
          ref_costs_comp[ref0][ref1] = 512;
1110
0
      }
1111
18.4E
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
1112
18.4E
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
1113
18.4E
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
1114
18.4E
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
1115
18.4E
    }
1116
932k
  }
1117
932k
}
1118
1119
static inline void store_coding_context(
1120
#if CONFIG_INTERNAL_STATS
1121
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
1122
#else
1123
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1124
#endif  // CONFIG_INTERNAL_STATS
1125
927k
    int skippable) {
1126
927k
  MACROBLOCKD *const xd = &x->e_mbd;
1127
1128
  // Take a snapshot of the coding context so it can be
1129
  // restored if we decide to encode this way
1130
927k
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
1131
927k
  ctx->skippable = skippable;
1132
#if CONFIG_INTERNAL_STATS
1133
  ctx->best_mode_index = mode_index;
1134
#endif  // CONFIG_INTERNAL_STATS
1135
927k
  ctx->mic = *xd->mi[0];
1136
927k
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
1137
927k
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1138
927k
}
1139
1140
static inline void setup_buffer_ref_mvs_inter(
1141
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
1142
1.51M
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
1143
1.51M
  const AV1_COMMON *cm = &cpi->common;
1144
1.51M
  const int num_planes = av1_num_planes(cm);
1145
1.51M
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
1146
1.51M
      av1_get_scaled_ref_frame(cpi, ref_frame);
1147
1.51M
  MACROBLOCKD *const xd = &x->e_mbd;
1148
1.51M
  MB_MODE_INFO *const mbmi = xd->mi[0];
1149
1.51M
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1150
1.51M
  const struct scale_factors *const sf =
1151
1.51M
      get_ref_scale_factors_const(cm, ref_frame);
1152
1.51M
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
1153
1.51M
  assert(yv12 != NULL);
1154
1155
1.51M
  if (scaled_ref_frame) {
1156
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
1157
    // support scaling.
1158
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
1159
0
                         num_planes);
1160
1.51M
  } else {
1161
1.51M
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1162
1.51M
  }
1163
1164
  // Gets an initial list of candidate vectors from neighbours and orders them
1165
1.51M
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
1166
1.51M
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
1167
1.51M
                   mbmi_ext->mode_context);
1168
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
1169
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
1170
1.51M
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
1171
  // Further refinement that is encode side only to test the top few candidates
1172
  // in full and choose the best as the center point for subsequent searches.
1173
  // The current implementation doesn't support scaling.
1174
1.51M
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
1175
1.51M
              ref_frame, block_size);
1176
1177
  // Go back to unscaled reference.
1178
1.51M
  if (scaled_ref_frame) {
1179
    // We had temporarily setup pred block based on scaled reference above. Go
1180
    // back to unscaled reference now, for subsequent use.
1181
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1182
0
  }
1183
1.51M
}
1184
1185
5.75M
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1186
5.75M
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1187
1188
// TODO(jingning): this mv clamping function should be block size dependent.
1189
2.87M
static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
1190
2.87M
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
1191
2.87M
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
1192
2.87M
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
1193
2.87M
                                     xd->mb_to_bottom_edge +
1194
2.87M
                                         RIGHT_BOTTOM_MARGIN };
1195
2.87M
  clamp_mv(mv, &mv_limits);
1196
2.87M
}
1197
1198
/* If the current mode shares the same mv with other modes with higher cost,
1199
 * skip this mode. */
1200
static int skip_repeated_mv(const AV1_COMMON *const cm,
1201
                            const MACROBLOCK *const x,
1202
                            PREDICTION_MODE this_mode,
1203
                            const MV_REFERENCE_FRAME ref_frames[2],
1204
5.70M
                            InterModeSearchState *search_state) {
1205
5.70M
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
1206
5.70M
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
1207
5.70M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1208
5.70M
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1209
5.70M
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
1210
5.70M
  if (!is_comp_pred) {
1211
5.70M
    if (this_mode == NEARMV) {
1212
1.42M
      if (ref_mv_count == 0) {
1213
        // NEARMV has the same motion vector as NEARESTMV
1214
619k
        compare_mode = NEARESTMV;
1215
619k
      }
1216
1.42M
      if (ref_mv_count == 1 &&
1217
462k
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1218
        // NEARMV has the same motion vector as GLOBALMV
1219
462k
        compare_mode = GLOBALMV;
1220
462k
      }
1221
1.42M
    }
1222
5.70M
    if (this_mode == GLOBALMV) {
1223
1.42M
      if (ref_mv_count == 0 &&
1224
619k
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1225
        // GLOBALMV has the same motion vector as NEARESTMV
1226
619k
        compare_mode = NEARESTMV;
1227
619k
      }
1228
1.42M
      if (ref_mv_count == 1) {
1229
        // GLOBALMV has the same motion vector as NEARMV
1230
462k
        compare_mode = NEARMV;
1231
462k
      }
1232
1.42M
    }
1233
1234
5.70M
    if (compare_mode != MB_MODE_COUNT) {
1235
      // Use modelled_rd to check whether compare mode was searched
1236
2.16M
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1237
2.16M
          INT64_MAX) {
1238
971k
        const int16_t mode_ctx =
1239
971k
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1240
971k
        const int compare_cost =
1241
971k
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1242
971k
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1243
1244
        // Only skip if the mode cost is larger than compare mode cost
1245
971k
        if (this_cost > compare_cost) {
1246
971k
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1247
971k
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1248
971k
          return 1;
1249
971k
        }
1250
971k
      }
1251
2.16M
    }
1252
5.70M
  }
1253
4.73M
  return 0;
1254
5.70M
}
1255
1256
static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1257
                                     const AV1_COMMON *cm,
1258
2.87M
                                     const MACROBLOCK *x) {
1259
2.87M
  const MACROBLOCKD *const xd = &x->e_mbd;
1260
2.87M
  *out_mv = in_mv;
1261
2.87M
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1262
2.87M
                     cm->features.cur_frame_force_integer_mv);
1263
2.87M
  clamp_mv2(&out_mv->as_mv, xd);
1264
2.87M
  return av1_is_fullmv_in_range(&x->mv_limits,
1265
2.87M
                                get_fullmv_from_mv(&out_mv->as_mv));
1266
2.87M
}
1267
1268
// To use single newmv directly for compound modes, need to clamp the mv to the
1269
// valid mv range. Without this, encoder would generate out of range mv, and
1270
// this is seen in 8k encoding.
1271
static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1272
0
                                     int ref_idx) {
1273
0
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1274
0
  SubpelMvLimits mv_limits;
1275
1276
0
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1277
0
  clamp_mv(&mv->as_mv, &mv_limits);
1278
0
}
1279
1280
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1281
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1282
                            int *const rate_mv, HandleInterModeArgs *const args,
1283
1.84M
                            inter_mode_info *mode_info) {
1284
1.84M
  MACROBLOCKD *const xd = &x->e_mbd;
1285
1.84M
  MB_MODE_INFO *const mbmi = xd->mi[0];
1286
1.84M
  const int is_comp_pred = has_second_ref(mbmi);
1287
1.84M
  const PREDICTION_MODE this_mode = mbmi->mode;
1288
1.84M
  const int refs[2] = { mbmi->ref_frame[0],
1289
18.4E
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1290
1.84M
  const int ref_mv_idx = mbmi->ref_mv_idx;
1291
1292
1.84M
  if (is_comp_pred) {
1293
0
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1294
0
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1295
0
    if (this_mode == NEW_NEWMV) {
1296
0
      if (valid_mv0) {
1297
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1298
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1299
0
      }
1300
0
      if (valid_mv1) {
1301
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1302
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1303
0
      }
1304
0
      *rate_mv = 0;
1305
0
      for (int i = 0; i < 2; ++i) {
1306
0
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1307
0
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1308
0
                                    x->mv_costs->nmv_joint_cost,
1309
0
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1310
0
      }
1311
0
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1312
0
      if (valid_mv1) {
1313
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1314
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1315
0
      }
1316
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1317
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1318
0
                                 x->mv_costs->nmv_joint_cost,
1319
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1320
0
    } else {
1321
0
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1322
0
      if (valid_mv0) {
1323
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1324
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1325
0
      }
1326
0
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1327
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1328
0
                                 x->mv_costs->nmv_joint_cost,
1329
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1330
0
    }
1331
1.84M
  } else {
1332
    // Single ref case.
1333
1.84M
    const int ref_idx = 0;
1334
1.84M
    int search_range = INT_MAX;
1335
1336
1.84M
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1337
428k
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1338
428k
      int min_mv_diff = INT_MAX;
1339
428k
      int best_match = -1;
1340
428k
      MV prev_ref_mv[2] = { { 0 } };
1341
962k
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1342
534k
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1343
534k
                                                     idx, &x->mbmi_ext)
1344
534k
                               .as_mv;
1345
534k
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1346
534k
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1347
1348
534k
        if (min_mv_diff > ref_mv_diff) {
1349
478k
          min_mv_diff = ref_mv_diff;
1350
478k
          best_match = idx;
1351
478k
        }
1352
534k
      }
1353
1354
428k
      if (min_mv_diff < (16 << 3)) {
1355
291k
        if (args->single_newmv_valid[best_match][refs[0]]) {
1356
273k
          search_range = min_mv_diff;
1357
273k
          search_range +=
1358
273k
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1359
273k
                         prev_ref_mv[best_match].row),
1360
273k
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1361
273k
                         prev_ref_mv[best_match].col));
1362
          // Get full pixel search range.
1363
273k
          search_range = (search_range + 4) >> 3;
1364
273k
        }
1365
291k
      }
1366
428k
    }
1367
1368
1.84M
    int_mv best_mv;
1369
1.84M
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1370
1.84M
                             mode_info, &best_mv, args);
1371
1.84M
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1372
1373
1.61M
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1374
1.61M
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1375
1.61M
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1376
1.61M
    cur_mv[0].as_int = best_mv.as_int;
1377
1378
    // Return after single_newmv is set.
1379
1.61M
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1380
1.61M
  }
1381
1382
1.61M
  return 0;
1383
1.84M
}
1384
1385
static inline void update_mode_start_end_index(
1386
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1387
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1388
4.30M
    int interintra_allowed, int eval_motion_mode) {
1389
4.30M
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1390
4.30M
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1391
4.30M
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1392
2.05M
    if (!eval_motion_mode) {
1393
1.35M
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1394
1.35M
    } else {
1395
      // Set the start index appropriately to process motion modes other than
1396
      // simple translation
1397
704k
      *mode_index_start = 1;
1398
704k
    }
1399
2.05M
  }
1400
4.30M
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1401
0
    *mode_index_end = SIMPLE_TRANSLATION;
1402
4.30M
}
1403
1404
// Increase rd cost of warp mode for low complexity decoding.
1405
static inline void increase_warp_mode_rd(const MB_MODE_INFO *const best_mbmi,
1406
                                         const MB_MODE_INFO *const this_mbmi,
1407
                                         int64_t *const best_scaled_rd,
1408
                                         int64_t *const this_scaled_rd,
1409
319k
                                         int rd_bias_scale_pct) {
1410
  // Check rd bias percentage is non-zero.
1411
319k
  if (!rd_bias_scale_pct) return;
1412
3
  if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return;
1413
1414
  // Experiments have been performed with increasing the RD cost of warp mode at
1415
  // the below locations of inter mode evaluation.
1416
  // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode().
1417
  // (2). Motion mode evaluation during handle_inter_mode() call.
1418
  // (3). Motion mode evaluation for winner motion modes.
1419
  // (4). Tx search for best inter candidates.
1420
  // Based on the speed quality trade-off results of this speed feature, the rd
1421
  // bias logic is enabled only at (2), (3) and (4).
1422
3
  const double rd_bias_scale = rd_bias_scale_pct / 100.0;
1423
3
  if (best_mbmi->motion_mode == WARPED_CAUSAL)
1424
0
    *best_scaled_rd += (int64_t)(rd_bias_scale * *best_scaled_rd);
1425
3
  if (this_mbmi->motion_mode == WARPED_CAUSAL)
1426
0
    *this_scaled_rd += (int64_t)(rd_bias_scale * *this_scaled_rd);
1427
3
}
1428
1429
/*!\brief AV1 motion mode search
1430
 *
1431
 * \ingroup inter_mode_search
1432
 * Function to search over and determine the motion mode. It will update
1433
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1434
 * WARPED_CAUSAL and determine any necessary side information for the selected
1435
 * motion mode. It will also perform the full transform search, unless the
1436
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1437
 * than an RD corresponding to a full transform search. It will return the
1438
 * RD for the final motion_mode.
1439
 * Do the RD search for a given inter mode and compute all information relevant
1440
 * to the input mode. It will compute the best MV,
1441
 * compound parameters (if the mode is a compound mode) and interpolation filter
1442
 * parameters.
1443
 *
1444
 * \param[in]     cpi               Top-level encoder structure.
1445
 * \param[in]     tile_data         Pointer to struct holding adaptive
1446
 *                                  data/contexts/models for the tile during
1447
 *                                  encoding.
1448
 * \param[in]     x                 Pointer to struct holding all the data for
1449
 *                                  the current macroblock.
1450
 * \param[in]     bsize             Current block size.
1451
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1452
 *                                  information.
1453
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1454
 *                                  for only the Y plane.
1455
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1456
 *                                  for only the UV planes.
1457
 * \param[in]     args              HandleInterModeArgs struct holding
1458
 *                                  miscellaneous arguments for inter mode
1459
 *                                  search. See the documentation for this
1460
 *                                  struct for a description of each member.
1461
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1462
 *                                  It is used for early termination of this
1463
 *                                  search if the RD exceeds this value.
1464
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1465
 *                                  best total RD for a skip mode so far, and
1466
 *                                  skip_rd[1] is the best RD for a skip mode so
1467
 *                                  far in luma. This is used as a speed feature
1468
 *                                  to skip the transform search if the computed
1469
 *                                  skip RD for the current mode is not better
1470
 *                                  than the best skip_rd so far.
1471
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1472
 *                                  This will be modified if a motion search is
1473
 *                                  done in the motion mode search.
1474
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1475
 *                                  prediction. This will eventually hold the
1476
 *                                  final prediction, and the tmp_dst info will
1477
 *                                  be copied here.
1478
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1479
 *                                  do_tx_search (see below) is 0.
1480
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1481
 *                                  a full transform search. This will compute
1482
 *                                  an estimated RD for the modes without the
1483
 *                                  transform search and later perform the full
1484
 *                                  transform search on the best candidates.
1485
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1486
 *                                  information to perform a full transform
1487
 *                                  search only on winning candidates searched
1488
 *                                  with an estimate for transform coding RD.
1489
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1490
 *                                  motion modes other than SIMPLE_TRANSLATION.
1491
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1492
 *                                  the luma plane.
1493
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1494
 * current motion mode being tested should be skipped. It returns 0 if the
1495
 * motion mode search is a success.
1496
 */
1497
static int64_t motion_mode_rd(
1498
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1499
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1500
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1501
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1502
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1503
4.30M
    int eval_motion_mode, int64_t *yrd) {
1504
4.30M
  const AV1_COMMON *const cm = &cpi->common;
1505
4.30M
  const FeatureFlags *const features = &cm->features;
1506
4.30M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1507
4.30M
  const int num_planes = av1_num_planes(cm);
1508
4.30M
  MACROBLOCKD *xd = &x->e_mbd;
1509
4.30M
  MB_MODE_INFO *mbmi = xd->mi[0];
1510
4.30M
  const int is_comp_pred = has_second_ref(mbmi);
1511
4.30M
  const PREDICTION_MODE this_mode = mbmi->mode;
1512
4.30M
  const int rate2_nocoeff = rd_stats->rate;
1513
4.30M
  int best_xskip_txfm = 0;
1514
4.30M
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1515
4.30M
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1516
4.30M
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1517
4.30M
  const int rate_mv0 = *rate_mv;
1518
4.30M
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1519
0
                                 is_interintra_allowed(mbmi) &&
1520
0
                                 mbmi->compound_idx;
1521
4.30M
  WARP_SAMPLE_INFO *const warp_sample_info =
1522
4.30M
      &x->warp_sample_info[mbmi->ref_frame[0]];
1523
4.30M
  int *pts0 = warp_sample_info->pts;
1524
4.30M
  int *pts_inref0 = warp_sample_info->pts_inref;
1525
1526
4.30M
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1527
4.30M
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1528
4.30M
  av1_invalid_rd_stats(&best_rd_stats);
1529
4.30M
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1530
4.30M
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1531
4.30M
  *yrd = INT64_MAX;
1532
4.30M
  if (features->switchable_motion_mode) {
1533
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1534
    // is allowed.
1535
4.30M
    last_motion_mode_allowed = motion_mode_allowed(
1536
4.30M
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1537
4.30M
  }
1538
1539
4.30M
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1540
    // Collect projection samples used in least squares approximation of
1541
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1542
1.97M
    if (warp_sample_info->num < 0) {
1543
544k
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1544
544k
    }
1545
1.97M
    mbmi->num_proj_ref = warp_sample_info->num;
1546
1.97M
  }
1547
4.30M
  const int total_samples = mbmi->num_proj_ref;
1548
4.30M
  if (total_samples == 0) {
1549
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1550
    // warped parameters.
1551
466k
    last_motion_mode_allowed = OBMC_CAUSAL;
1552
466k
  }
1553
1554
4.30M
  const MB_MODE_INFO base_mbmi = *mbmi;
1555
4.30M
  MB_MODE_INFO best_mbmi;
1556
4.30M
  const int interp_filter = features->interp_filter;
1557
4.30M
  const int switchable_rate =
1558
4.30M
      av1_is_interp_needed(xd)
1559
4.30M
          ? av1_get_switchable_rate(x, xd, interp_filter,
1560
4.25M
                                    cm->seq_params->enable_dual_filter)
1561
4.30M
          : 0;
1562
4.30M
  int64_t best_rd = INT64_MAX;
1563
4.30M
  int best_rate_mv = rate_mv0;
1564
4.30M
  const int mi_row = xd->mi_row;
1565
4.30M
  const int mi_col = xd->mi_col;
1566
4.30M
  int mode_index_start, mode_index_end;
1567
4.30M
  const int txfm_rd_gate_level =
1568
4.30M
      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1569
4.30M
                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1570
4.30M
                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
1571
1572
  // Modify the start and end index according to speed features. For example,
1573
  // if SIMPLE_TRANSLATION has already been searched according to
1574
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1575
  // to avoid searching it again.
1576
4.30M
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1577
4.30M
                              last_motion_mode_allowed, interintra_allowed,
1578
4.30M
                              eval_motion_mode);
1579
  // Main function loop. This loops over all of the possible motion modes and
1580
  // computes RD to determine the best one. This process includes computing
1581
  // any necessary side information for the motion mode and performing the
1582
  // transform search.
1583
10.2M
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1584
5.90M
       mode_index++) {
1585
5.90M
    if (args->skip_motion_mode && mode_index) continue;
1586
5.90M
    int tmp_rate2 = rate2_nocoeff;
1587
5.90M
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1588
5.90M
    int tmp_rate_mv = rate_mv0;
1589
1590
5.90M
    *mbmi = base_mbmi;
1591
5.90M
    if (is_interintra_mode) {
1592
      // Only use SIMPLE_TRANSLATION for interintra
1593
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1594
5.90M
    } else {
1595
5.90M
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1596
5.90M
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1597
5.90M
    }
1598
1599
5.90M
    if (cpi->oxcf.algo_cfg.sharpness == 3 &&
1600
0
        (mbmi->motion_mode == OBMC_CAUSAL ||
1601
0
         mbmi->motion_mode == WARPED_CAUSAL))
1602
0
      continue;
1603
1604
    // Do not search OBMC if the probability of selecting it is below a
1605
    // predetermined threshold for this update_type and block size.
1606
5.90M
    const FRAME_UPDATE_TYPE update_type =
1607
5.90M
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1608
5.90M
    int use_actual_frame_probs = 1;
1609
5.90M
    int prune_obmc;
1610
#if CONFIG_FPMT_TEST
1611
    use_actual_frame_probs =
1612
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1613
    if (!use_actual_frame_probs) {
1614
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1615
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1616
    }
1617
#endif
1618
5.90M
    if (use_actual_frame_probs) {
1619
5.89M
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1620
5.89M
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1621
5.89M
    }
1622
5.90M
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1623
5.90M
        mbmi->motion_mode == OBMC_CAUSAL)
1624
1.32M
      continue;
1625
1626
4.57M
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1627
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1628
      // The prediction is calculated before motion_mode_rd() is called in
1629
      // handle_inter_mode()
1630
3.60M
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1631
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1632
      // OBMC_CAUSAL not allowed for compound prediction
1633
0
      assert(!is_comp_pred);
1634
0
      if (have_newmv_in_inter_mode(this_mode)) {
1635
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1636
0
                                 &mbmi->mv[0], NULL);
1637
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1638
0
      }
1639
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1640
        // Build the predictor according to the current motion vector if it has
1641
        // not already been built
1642
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1643
0
                                      0, av1_num_planes(cm) - 1);
1644
0
      }
1645
      // Build the inter predictor by blending the predictor corresponding to
1646
      // this MV, and the neighboring blocks using the OBMC model
1647
0
      av1_build_obmc_inter_prediction(
1648
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1649
0
          args->left_pred_buf, args->left_pred_stride);
1650
0
#if !CONFIG_REALTIME_ONLY
1651
968k
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1652
968k
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1653
968k
      mbmi->motion_mode = WARPED_CAUSAL;
1654
968k
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1655
968k
      mbmi->interp_filters =
1656
968k
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1657
1658
968k
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1659
968k
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1660
      // Select the samples according to motion vector difference
1661
968k
      if (mbmi->num_proj_ref > 1) {
1662
453k
        mbmi->num_proj_ref = av1_selectSamples(
1663
453k
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1664
453k
      }
1665
1666
      // Compute the warped motion parameters with a least squares fit
1667
      //  using the collected samples
1668
968k
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1669
968k
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1670
968k
                               &mbmi->wm_params, mi_row, mi_col)) {
1671
712k
        assert(!is_comp_pred);
1672
712k
        if (have_newmv_in_inter_mode(this_mode)) {
1673
          // Refine MV for NEWMV mode
1674
192k
          const int_mv mv0 = mbmi->mv[0];
1675
192k
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1676
192k
          const int num_proj_ref0 = mbmi->num_proj_ref;
1677
1678
192k
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1679
192k
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1680
192k
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1681
192k
                                            &ref_mv.as_mv, NULL);
1682
1683
          // Refine MV in a small range.
1684
192k
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1685
192k
                               total_samples, cpi->sf.mv_sf.warp_search_method,
1686
192k
                               cpi->sf.mv_sf.warp_search_iters);
1687
1688
192k
          if (mv0.as_int != mbmi->mv[0].as_int) {
1689
            // Keep the refined MV and WM parameters.
1690
102k
            tmp_rate_mv = av1_mv_bit_cost(
1691
102k
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1692
102k
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1693
102k
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1694
102k
          } else {
1695
            // Restore the old MV and WM parameters.
1696
90.3k
            mbmi->mv[0] = mv0;
1697
90.3k
            mbmi->wm_params = wm_params0;
1698
90.3k
            mbmi->num_proj_ref = num_proj_ref0;
1699
90.3k
          }
1700
192k
        }
1701
1702
        // Build the warped predictor
1703
712k
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1704
712k
                                      av1_num_planes(cm) - 1);
1705
712k
      } else {
1706
256k
        continue;
1707
256k
      }
1708
968k
#endif  // !CONFIG_REALTIME_ONLY
1709
18.4E
    } else if (is_interintra_mode) {
1710
0
      const int ret =
1711
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1712
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1713
0
      if (ret < 0) continue;
1714
0
    }
1715
1716
    // If we are searching newmv and the mv is the same as refmv, skip the
1717
    // current mode
1718
4.31M
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1719
1720
    // Update rd_stats for the current motion mode
1721
4.11M
    txfm_info->skip_txfm = 0;
1722
4.11M
    rd_stats->dist = 0;
1723
4.11M
    rd_stats->sse = 0;
1724
4.11M
    rd_stats->skip_txfm = 1;
1725
4.11M
    rd_stats->rate = tmp_rate2;
1726
4.11M
    const ModeCosts *mode_costs = &x->mode_costs;
1727
4.11M
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1728
4.11M
    if (interintra_allowed) {
1729
0
      rd_stats->rate +=
1730
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1731
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1732
0
    }
1733
4.11M
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1734
2.23M
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1735
2.23M
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1736
1.84M
        rd_stats->rate +=
1737
1.84M
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1738
1.84M
      } else {
1739
398k
        rd_stats->rate +=
1740
398k
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1741
398k
      }
1742
2.23M
    }
1743
1744
4.11M
    int64_t this_yrd = INT64_MAX;
1745
1746
4.11M
    if (!do_tx_search) {
1747
      // Avoid doing a transform search here to speed up the overall mode
1748
      // search. It will be done later in the mode search if the current
1749
      // motion mode seems promising.
1750
178k
      int64_t curr_sse = -1;
1751
178k
      int64_t sse_y = -1;
1752
178k
      int est_residue_cost = 0;
1753
178k
      int64_t est_dist = 0;
1754
178k
      int64_t est_rd = 0;
1755
178k
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1756
130k
        curr_sse = get_sse(cpi, x, &sse_y);
1757
130k
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1758
130k
                                                 &est_residue_cost, &est_dist);
1759
130k
        (void)has_est_rd;
1760
130k
        assert(has_est_rd);
1761
130k
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1762
48.3k
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1763
48.3k
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1764
48.3k
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1765
48.3k
            NULL, &curr_sse, NULL, NULL, NULL);
1766
48.3k
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1767
48.3k
      }
1768
178k
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1769
178k
      if (est_rd * 0.80 > *best_est_rd) {
1770
485
        mbmi->ref_frame[1] = ref_frame_1;
1771
485
        continue;
1772
485
      }
1773
178k
      const int mode_rate = rd_stats->rate;
1774
178k
      rd_stats->rate += est_residue_cost;
1775
178k
      rd_stats->dist = est_dist;
1776
178k
      rd_stats->rdcost = est_rd;
1777
178k
      if (rd_stats->rdcost < *best_est_rd) {
1778
104k
        *best_est_rd = rd_stats->rdcost;
1779
104k
        assert(sse_y >= 0);
1780
104k
        ref_skip_rd[1] = txfm_rd_gate_level
1781
104k
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1782
104k
                             : INT64_MAX;
1783
104k
      }
1784
178k
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1785
0
        if (!is_comp_pred) {
1786
0
          assert(curr_sse >= 0);
1787
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1788
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1789
0
                                rd_stats_uv, mbmi);
1790
0
        }
1791
178k
      } else {
1792
178k
        assert(curr_sse >= 0);
1793
178k
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1794
178k
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1795
178k
                              rd_stats_uv, mbmi);
1796
178k
      }
1797
178k
      mbmi->skip_txfm = 0;
1798
3.94M
    } else {
1799
      // Perform full transform search
1800
3.94M
      int64_t skip_rd = INT64_MAX;
1801
3.94M
      int64_t skip_rdy = INT64_MAX;
1802
3.94M
      if (txfm_rd_gate_level) {
1803
        // Check if the mode is good enough based on skip RD
1804
1.42M
        int64_t sse_y = INT64_MAX;
1805
1.42M
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1806
1.42M
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1807
1.42M
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1808
1.42M
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1809
1.42M
                                        txfm_rd_gate_level, 0);
1810
1.42M
        if (!eval_txfm) continue;
1811
1.42M
      }
1812
1813
      // Do transform search
1814
3.76M
      const int mode_rate = rd_stats->rate;
1815
3.76M
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1816
3.76M
                           rd_stats->rate, ref_best_rd)) {
1817
1.76M
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1818
211
          return INT64_MAX;
1819
211
        }
1820
1.76M
        continue;
1821
1.76M
      }
1822
1.99M
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1823
1.99M
      const int y_rate =
1824
1.99M
          rd_stats->skip_txfm
1825
1.99M
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1826
1.99M
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1827
1.99M
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1828
1829
1.99M
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1830
1.99M
      if (curr_rd < ref_best_rd) {
1831
1.99M
        ref_best_rd = curr_rd;
1832
1.99M
        ref_skip_rd[0] = skip_rd;
1833
1.99M
        ref_skip_rd[1] = skip_rdy;
1834
1.99M
      }
1835
1.99M
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1836
475k
        inter_mode_data_push(
1837
475k
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1838
475k
            rd_stats_y->rate + rd_stats_uv->rate +
1839
475k
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1840
475k
      }
1841
1.99M
    }
1842
1843
2.17M
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1844
1.87k
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1845
1.87k
        mbmi->interp_filters =
1846
1.87k
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1847
1.87k
      }
1848
1.87k
    }
1849
1850
2.17M
    adjust_cost(cpi, x, &this_yrd);
1851
2.17M
    adjust_rdcost(cpi, x, rd_stats);
1852
2.17M
    adjust_rdcost(cpi, x, rd_stats_y);
1853
1854
2.17M
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1855
2.17M
    if (mode_index == 0) {
1856
1.97M
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1857
1.97M
    }
1858
2.17M
    int64_t best_scaled_rd = best_rd;
1859
2.17M
    int64_t this_scaled_rd = tmp_rd;
1860
2.17M
    if (mode_index != 0)
1861
205k
      increase_warp_mode_rd(&best_mbmi, mbmi, &best_scaled_rd, &this_scaled_rd,
1862
205k
                            cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
1863
1864
2.17M
    if (mode_index == 0 || this_scaled_rd < best_scaled_rd) {
1865
      // Update best_rd data if this is the best motion mode so far
1866
2.16M
      best_mbmi = *mbmi;
1867
2.16M
      best_rd = tmp_rd;
1868
2.16M
      best_rd_stats = *rd_stats;
1869
2.16M
      best_rd_stats_y = *rd_stats_y;
1870
2.16M
      best_rate_mv = tmp_rate_mv;
1871
2.16M
      *yrd = this_yrd;
1872
2.16M
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1873
2.16M
      memcpy(best_blk_skip, txfm_info->blk_skip,
1874
2.16M
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1875
2.16M
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1876
2.16M
      best_xskip_txfm = mbmi->skip_txfm;
1877
2.16M
    }
1878
2.17M
  }
1879
  // Update RD and mbmi stats for selected motion mode
1880
4.30M
  mbmi->ref_frame[1] = ref_frame_1;
1881
4.30M
  *rate_mv = best_rate_mv;
1882
4.30M
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1883
2.27M
    av1_invalid_rd_stats(rd_stats);
1884
2.27M
    restore_dst_buf(xd, *orig_dst, num_planes);
1885
2.27M
    return INT64_MAX;
1886
2.27M
  }
1887
2.03M
  *mbmi = best_mbmi;
1888
2.03M
  *rd_stats = best_rd_stats;
1889
2.03M
  *rd_stats_y = best_rd_stats_y;
1890
2.03M
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1891
2.03M
  memcpy(txfm_info->blk_skip, best_blk_skip,
1892
2.03M
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1893
2.03M
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1894
2.03M
  txfm_info->skip_txfm = best_xskip_txfm;
1895
1896
2.03M
  restore_dst_buf(xd, *orig_dst, num_planes);
1897
2.03M
  return 0;
1898
4.30M
}
1899
1900
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1901
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1902
0
                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
1903
0
  assert(bsize < BLOCK_SIZES_ALL);
1904
0
  const AV1_COMMON *cm = &cpi->common;
1905
0
  const int num_planes = av1_num_planes(cm);
1906
0
  MACROBLOCKD *const xd = &x->e_mbd;
1907
0
  const int mi_row = xd->mi_row;
1908
0
  const int mi_col = xd->mi_col;
1909
0
  int64_t total_sse = 0;
1910
0
  int64_t this_rd = INT64_MAX;
1911
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1912
0
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1913
1914
0
  for (int plane = 0; plane < num_planes; ++plane) {
1915
    // Call av1_enc_build_inter_predictor() for one plane at a time.
1916
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1917
0
                                  plane, plane);
1918
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1919
0
    const BLOCK_SIZE plane_bsize =
1920
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1921
1922
0
    av1_subtract_plane(x, plane_bsize, plane);
1923
1924
0
    int64_t sse =
1925
0
        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1926
0
    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1927
0
    sse <<= 4;
1928
0
    total_sse += sse;
1929
    // When current rd cost is more than the best rd, skip evaluation of
1930
    // remaining planes.
1931
0
    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1932
0
    if (this_rd > best_rd) break;
1933
0
  }
1934
1935
0
  rd_stats->dist = rd_stats->sse = total_sse;
1936
0
  rd_stats->rdcost = this_rd;
1937
1938
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1939
0
  return 0;
1940
0
}
1941
1942
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1943
// mode
1944
// Note(rachelbarker): This speed feature currently does not interact correctly
1945
// with global motion. The issue is that, when global motion is used, GLOBALMV
1946
// produces a different prediction to NEARESTMV/NEARMV even if the motion
1947
// vectors are the same. Thus GLOBALMV should not be pruned in this case.
1948
static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1949
                                      int ref_idx,
1950
                                      const MV_REFERENCE_FRAME *ref_frame,
1951
0
                                      PREDICTION_MODE single_mode) {
1952
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1953
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1954
0
  assert(single_mode != NEWMV);
1955
0
  if (single_mode == NEARESTMV) {
1956
0
    return 0;
1957
0
  } else if (single_mode == NEARMV) {
1958
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1959
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1960
0
    if (ref_mv_count < 2) return 1;
1961
0
  } else if (single_mode == GLOBALMV) {
1962
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1963
0
    if (ref_mv_count == 0) return 1;
1964
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1965
0
    else if (ref_mv_count == 1)
1966
0
      return 0;
1967
1968
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1969
    // Check GLOBALMV is matching with any mv in ref_mv_stack
1970
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1971
0
      int_mv this_mv;
1972
1973
0
      if (ref_idx == 0)
1974
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1975
0
      else
1976
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1977
1978
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1979
0
        return 1;
1980
0
    }
1981
0
  }
1982
0
  return 0;
1983
0
}
1984
1985
static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1986
                              int ref_idx, int ref_mv_idx,
1987
                              int skip_repeated_ref_mv,
1988
                              const MV_REFERENCE_FRAME *ref_frame,
1989
4.72M
                              const MB_MODE_INFO_EXT *mbmi_ext) {
1990
4.72M
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1991
4.72M
  assert(is_inter_singleref_mode(single_mode));
1992
4.72M
  if (single_mode == NEWMV) {
1993
1.84M
    this_mv->as_int = INVALID_MV;
1994
2.87M
  } else if (single_mode == GLOBALMV) {
1995
768k
    if (skip_repeated_ref_mv &&
1996
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1997
0
      return 0;
1998
768k
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1999
2.10M
  } else {
2000
2.10M
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
2001
2.10M
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2002
2.10M
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
2003
2.10M
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
2004
1.12M
      assert(ref_mv_offset >= 0);
2005
1.12M
      if (ref_idx == 0) {
2006
1.12M
        *this_mv =
2007
1.12M
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
2008
18.4E
      } else {
2009
18.4E
        *this_mv =
2010
18.4E
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
2011
18.4E
      }
2012
1.12M
    } else {
2013
976k
      if (skip_repeated_ref_mv &&
2014
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2015
0
        return 0;
2016
976k
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2017
976k
    }
2018
2.10M
  }
2019
4.72M
  return 1;
2020
4.72M
}
2021
2022
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
2023
// population
2024
static inline int skip_nearest_near_mv_using_refmv_weight(
2025
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
2026
1.75M
    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
2027
1.75M
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
2028
  // Do not skip the mode if the current block has not yet obtained a valid
2029
  // inter mode.
2030
788k
  if (!is_inter_mode(best_mode)) return 0;
2031
2032
415k
  const MACROBLOCKD *xd = &x->e_mbd;
2033
  // Do not skip the mode if both the top and left neighboring blocks are not
2034
  // available.
2035
415k
  if (!xd->left_available || !xd->up_available) return 0;
2036
198k
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2037
198k
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
2038
198k
  const int ref_mv_count =
2039
198k
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
2040
2041
198k
  if (ref_mv_count == 0) return 0;
2042
  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
2043
140k
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
2044
2045
  // Count number of ref mvs populated from nearest candidates
2046
127k
  int nearest_refmv_count = 0;
2047
350k
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
2048
222k
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
2049
222k
  }
2050
2051
  // nearest_refmv_count indicates the closeness of block motion characteristics
2052
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
2053
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
2054
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
2055
  // mode since these modes work well for blocks that shares similar motion
2056
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
2057
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
2058
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
2059
127k
  const int prune_thresh = 1 + (ref_mv_count >= 2);
2060
127k
  if (nearest_refmv_count < prune_thresh) return 1;
2061
59.8k
  return 0;
2062
127k
}
2063
2064
// This function update the non-new mv for the current prediction mode
2065
static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
2066
                               const AV1_COMMON *cm, const MACROBLOCK *x,
2067
4.72M
                               int skip_repeated_ref_mv) {
2068
4.72M
  const MACROBLOCKD *xd = &x->e_mbd;
2069
4.72M
  const MB_MODE_INFO *mbmi = xd->mi[0];
2070
4.72M
  const int is_comp_pred = has_second_ref(mbmi);
2071
2072
4.72M
  int ret = 1;
2073
9.44M
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2074
4.72M
    int_mv this_mv;
2075
4.72M
    this_mv.as_int = INVALID_MV;
2076
4.72M
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
2077
4.72M
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
2078
4.72M
    if (!ret) return 0;
2079
4.72M
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
2080
4.72M
    if (single_mode == NEWMV) {
2081
1.84M
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2082
1.84M
      cur_mv[i] =
2083
1.84M
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2084
1.84M
                         .this_mv
2085
1.84M
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2086
12
                         .comp_mv;
2087
2.87M
    } else {
2088
2.87M
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
2089
2.87M
    }
2090
4.72M
  }
2091
4.72M
  return ret;
2092
4.72M
}
2093
2094
static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
2095
                               const MB_MODE_INFO_EXT *mbmi_ext,
2096
                               const int (*const drl_mode_cost0)[2],
2097
5.80M
                               int8_t ref_frame_type) {
2098
5.80M
  int cost = 0;
2099
5.80M
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
2100
6.35M
    for (int idx = 0; idx < 2; ++idx) {
2101
4.61M
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2102
2.00M
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2103
2.00M
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
2104
2.00M
        if (mbmi->ref_mv_idx == idx) return cost;
2105
2.00M
      }
2106
4.61M
    }
2107
1.74M
    return cost;
2108
2.64M
  }
2109
2110
3.15M
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
2111
2.33M
    for (int idx = 1; idx < 3; ++idx) {
2112
1.66M
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2113
561k
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2114
561k
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
2115
561k
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
2116
561k
      }
2117
1.66M
    }
2118
672k
    return cost;
2119
916k
  }
2120
2.24M
  return cost;
2121
3.15M
}
2122
2123
static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
2124
                                        const MB_MODE_INFO *const mbmi,
2125
0
                                        PREDICTION_MODE this_mode) {
2126
0
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
2127
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2128
0
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
2129
0
    if (single_mode == NEWMV &&
2130
0
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
2131
0
      return 0;
2132
0
    }
2133
0
  }
2134
0
  return 1;
2135
0
}
2136
2137
static int get_drl_refmv_count(const MACROBLOCK *const x,
2138
                               const MV_REFERENCE_FRAME *ref_frame,
2139
6.81M
                               PREDICTION_MODE mode) {
2140
6.81M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2141
6.81M
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2142
6.81M
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
2143
6.81M
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
2144
6.81M
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
2145
6.81M
  const int has_drl =
2146
6.81M
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
2147
6.81M
  const int ref_set =
2148
6.81M
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
2149
2150
6.81M
  return ref_set;
2151
6.81M
}
2152
2153
// Checks if particular ref_mv_idx should be pruned.
2154
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
2155
                                         const int qindex,
2156
22.0k
                                         const int ref_mv_idx) {
2157
22.0k
  if (reduce_inter_modes >= 3) return 1;
2158
  // Q-index logic based pruning is enabled only for
2159
  // reduce_inter_modes = 2.
2160
22.0k
  assert(reduce_inter_modes == 2);
2161
  // When reduce_inter_modes=2, pruning happens as below based on q index.
2162
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
2163
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
2164
  // For q index range between 171 and 255: no pruning.
2165
1
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
2166
1
  return (ref_mv_idx >= min_prune_ref_mv_idx);
2167
22.0k
}
2168
2169
// Whether this reference motion vector can be skipped, based on initial
2170
// heuristics.
2171
static bool ref_mv_idx_early_breakout(
2172
    const SPEED_FEATURES *const sf,
2173
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
2174
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
2175
1.00M
    int ref_mv_idx) {
2176
1.00M
  MACROBLOCKD *xd = &x->e_mbd;
2177
1.00M
  MB_MODE_INFO *mbmi = xd->mi[0];
2178
1.00M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2179
1.00M
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2180
1.00M
  const int is_comp_pred = has_second_ref(mbmi);
2181
1.00M
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
2182
572k
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
2183
572k
        mbmi->ref_frame[0] == LAST3_FRAME ||
2184
572k
        mbmi->ref_frame[1] == LAST2_FRAME ||
2185
572k
        mbmi->ref_frame[1] == LAST3_FRAME) {
2186
0
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2187
0
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2188
0
          REF_CAT_LEVEL) {
2189
0
        return true;
2190
0
      }
2191
0
    }
2192
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
2193
572k
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
2194
229k
        have_newmv_in_inter_mode(mbmi->mode)) {
2195
169k
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
2196
22.0k
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
2197
22.0k
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2198
22.0k
        const int do_prune = prune_ref_mv_idx_using_qindex(
2199
22.0k
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
2200
22.0k
        if (do_prune &&
2201
22.0k
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2202
22.0k
             REF_CAT_LEVEL)) {
2203
21.1k
          return true;
2204
21.1k
        }
2205
22.0k
      }
2206
169k
    }
2207
572k
  }
2208
2209
979k
  mbmi->ref_mv_idx = ref_mv_idx;
2210
979k
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
2211
0
    return true;
2212
0
  }
2213
979k
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
2214
979k
  const int drl_cost = get_drl_cost(
2215
979k
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
2216
979k
  est_rd_rate += drl_cost;
2217
979k
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
2218
39
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2219
39
    return true;
2220
39
  }
2221
979k
  return false;
2222
979k
}
2223
2224
// Compute the estimated RD cost for the motion vector with simple translation.
2225
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
2226
                                          RD_STATS *rd_stats,
2227
                                          HandleInterModeArgs *args,
2228
                                          int ref_mv_idx, int64_t ref_best_rd,
2229
7.31k
                                          BLOCK_SIZE bsize) {
2230
7.31k
  MACROBLOCKD *xd = &x->e_mbd;
2231
7.31k
  MB_MODE_INFO *mbmi = xd->mi[0];
2232
7.31k
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2233
7.31k
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2234
7.31k
  const AV1_COMMON *cm = &cpi->common;
2235
7.31k
  const int is_comp_pred = has_second_ref(mbmi);
2236
7.31k
  const ModeCosts *mode_costs = &x->mode_costs;
2237
2238
7.31k
  struct macroblockd_plane *p = xd->plane;
2239
7.31k
  const BUFFER_SET orig_dst = {
2240
7.31k
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2241
7.31k
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2242
7.31k
  };
2243
7.31k
  av1_init_rd_stats(rd_stats);
2244
2245
7.31k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2246
7.31k
  mbmi->comp_group_idx = 0;
2247
7.31k
  mbmi->compound_idx = 1;
2248
7.31k
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
2249
0
    mbmi->ref_frame[1] = NONE_FRAME;
2250
0
  }
2251
7.31k
  int16_t mode_ctx =
2252
7.31k
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2253
2254
7.31k
  mbmi->num_proj_ref = 0;
2255
7.31k
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2256
7.31k
  mbmi->ref_mv_idx = ref_mv_idx;
2257
2258
7.31k
  rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
2259
7.31k
  const int drl_cost =
2260
7.31k
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2261
7.31k
  rd_stats->rate += drl_cost;
2262
2263
7.31k
  int_mv cur_mv[2];
2264
7.31k
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2265
158
    return INT64_MAX;
2266
158
  }
2267
7.31k
  assert(have_nearmv_in_inter_mode(mbmi->mode));
2268
14.3k
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2269
7.15k
    mbmi->mv[i].as_int = cur_mv[i].as_int;
2270
7.15k
  }
2271
7.15k
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2272
7.15k
  rd_stats->rate += ref_mv_cost;
2273
2274
7.15k
  if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2275
0
    return INT64_MAX;
2276
0
  }
2277
2278
7.15k
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2279
7.15k
  mbmi->num_proj_ref = 0;
2280
7.15k
  if (is_comp_pred) {
2281
    // Only compound_average
2282
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2283
0
    mbmi->comp_group_idx = 0;
2284
0
    mbmi->compound_idx = 1;
2285
0
  }
2286
7.15k
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2287
2288
7.15k
  const int mi_row = xd->mi_row;
2289
7.15k
  const int mi_col = xd->mi_col;
2290
7.15k
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2291
7.15k
                                AOM_PLANE_Y, AOM_PLANE_Y);
2292
7.15k
  int est_rate;
2293
7.15k
  int64_t est_dist;
2294
7.15k
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2295
7.15k
                                  NULL, NULL, NULL, NULL, NULL);
2296
7.15k
  return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2297
7.15k
}
2298
2299
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2300
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2301
// it is included.
2302
982k
static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2303
2304
4.83M
static inline bool mask_check_bit(int mask, int index) {
2305
4.83M
  return (mask >> index) & 0x1;
2306
4.83M
}
2307
2308
// Before performing the full MV search in handle_inter_mode, do a simple
2309
// translation search and see if we can eliminate any motion vectors.
2310
// Returns an integer where, if the i-th bit is set, it means that the i-th
2311
// motion vector should be searched. This is only set for NEAR_MV.
2312
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2313
                                RD_STATS *rd_stats,
2314
                                HandleInterModeArgs *const args,
2315
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2316
4.25M
                                const int ref_set) {
2317
  // If the number of ref mv count is equal to 1, do not prune the same. It
2318
  // is better to evaluate the same than to prune it.
2319
4.25M
  if (ref_set == 1) return 1;
2320
427k
  AV1_COMMON *const cm = &cpi->common;
2321
427k
  const MACROBLOCKD *const xd = &x->e_mbd;
2322
427k
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2323
427k
  const PREDICTION_MODE this_mode = mbmi->mode;
2324
2325
  // Only search indices if they have some chance of being good.
2326
427k
  int good_indices = 0;
2327
1.42M
  for (int i = 0; i < ref_set; ++i) {
2328
1.00M
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2329
1.00M
                                  ref_best_rd, i)) {
2330
21.1k
      continue;
2331
21.1k
    }
2332
979k
    mask_set_bit(&good_indices, i);
2333
979k
  }
2334
2335
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2336
  // is large enough. If these conditions are not met, return all good indices
2337
  // found so far.
2338
427k
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2339
0
    return good_indices;
2340
427k
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2341
82.5k
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2342
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2343
  // so b/2384 can be resolved.
2344
1.85k
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2345
2.94k
      (mbmi->ref_frame[1] > 0 &&
2346
0
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2347
0
    return good_indices;
2348
0
  }
2349
2350
  // Calculate the RD cost for the motion vectors using simple translation.
2351
1.85k
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2352
9.16k
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2353
    // If this index is bad, ignore it.
2354
7.31k
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2355
0
      continue;
2356
0
    }
2357
7.31k
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2358
7.31k
        cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2359
7.31k
  }
2360
  // Find the index with the best RD cost.
2361
1.85k
  int best_idx = 0;
2362
7.75k
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2363
5.89k
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2364
1.83k
      best_idx = i;
2365
1.83k
    }
2366
5.89k
  }
2367
  // Only include indices that are good and within a % of the best.
2368
1.85k
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2369
  // If the simple translation cost is not within this multiple of the
2370
  // best RD, skip it. Note that the cutoff is derived experimentally.
2371
1.85k
  const double ref_dth = 5;
2372
1.85k
  int result = 0;
2373
9.16k
  for (int i = 0; i < ref_set; ++i) {
2374
7.31k
    if (mask_check_bit(good_indices, i) &&
2375
7.31k
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2376
3.09k
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2377
3.03k
      mask_set_bit(&result, i);
2378
3.03k
    }
2379
7.31k
  }
2380
1.85k
  return result;
2381
1.85k
}
2382
2383
/*!\brief Motion mode information for inter mode search speedup.
2384
 *
2385
 * Used in a speed feature to search motion modes other than
2386
 * SIMPLE_TRANSLATION only on winning candidates.
2387
 */
2388
typedef struct motion_mode_candidate {
2389
  /*!
2390
   * Mode info for the motion mode candidate.
2391
   */
2392
  MB_MODE_INFO mbmi;
2393
  /*!
2394
   * Rate describing the cost of the motion vectors for this candidate.
2395
   */
2396
  int rate_mv;
2397
  /*!
2398
   * Rate before motion mode search and transform coding is applied.
2399
   */
2400
  int rate2_nocoeff;
2401
  /*!
2402
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2403
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2404
   * candidate.
2405
   */
2406
  int skip_motion_mode;
2407
  /*!
2408
   * Total RD cost for this candidate.
2409
   */
2410
  int64_t rd_cost;
2411
} motion_mode_candidate;
2412
2413
/*!\cond */
2414
typedef struct motion_mode_best_st_candidate {
2415
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2416
  int num_motion_mode_cand;
2417
} motion_mode_best_st_candidate;
2418
2419
// Checks if the current reference frame matches with neighbouring block's
2420
// (top/left) reference frames
2421
static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2422
962k
                                               MB_MODE_INFO *nb_mbmi) {
2423
962k
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2424
962k
                                          nb_mbmi->ref_frame[1] };
2425
962k
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2426
962k
                                           cur_mbmi->ref_frame[1] };
2427
962k
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2428
962k
  int match_found = 0;
2429
2430
1.92M
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2431
962k
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2432
215k
        (cur_ref_frames[i] == nb_ref_frames[1]))
2433
744k
      match_found = 1;
2434
962k
  }
2435
962k
  return match_found;
2436
962k
}
2437
2438
static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2439
1.69M
                                              MACROBLOCKD *xd) {
2440
1.69M
  if (!xd->up_available) return 1;
2441
1.29M
  const int mi_col = xd->mi_col;
2442
1.29M
  MB_MODE_INFO **cur_mbmi = xd->mi;
2443
  // prev_row_mi points into the mi array, starting at the beginning of the
2444
  // previous row.
2445
1.29M
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2446
1.29M
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2447
1.29M
  uint8_t mi_step;
2448
2.23M
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2449
1.31M
       above_mi_col += mi_step) {
2450
1.31M
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2451
1.31M
    mi_step = mi_size_wide[above_mi[0]->bsize];
2452
1.31M
    int match_found = 0;
2453
1.31M
    if (is_inter_block(*above_mi))
2454
484k
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2455
1.31M
    if (match_found) return 1;
2456
1.31M
  }
2457
922k
  return 0;
2458
1.29M
}
2459
2460
static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2461
1.68M
                                             MACROBLOCKD *xd) {
2462
1.68M
  if (!xd->left_available) return 1;
2463
1.29M
  const int mi_row = xd->mi_row;
2464
1.29M
  MB_MODE_INFO **cur_mbmi = xd->mi;
2465
  // prev_col_mi points into the mi array, starting at the top of the
2466
  // previous column
2467
1.29M
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2468
1.29M
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2469
1.29M
  uint8_t mi_step;
2470
2.22M
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2471
1.30M
       left_mi_row += mi_step) {
2472
1.30M
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2473
1.30M
    mi_step = mi_size_high[left_mi[0]->bsize];
2474
1.30M
    int match_found = 0;
2475
1.30M
    if (is_inter_block(*left_mi))
2476
480k
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2477
1.30M
    if (match_found) return 1;
2478
1.30M
  }
2479
917k
  return 0;
2480
1.29M
}
2481
/*!\endcond */
2482
2483
/*! \brief Struct used to hold TPL data to
2484
 * narrow down parts of the inter mode search.
2485
 */
2486
typedef struct {
2487
  /*!
2488
   * The best inter cost out of all of the reference frames.
2489
   */
2490
  int64_t best_inter_cost;
2491
  /*!
2492
   * The inter cost for each reference frame.
2493
   */
2494
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2495
} PruneInfoFromTpl;
2496
2497
#if !CONFIG_REALTIME_ONLY
2498
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2499
static inline void get_block_level_tpl_stats(
2500
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2501
368k
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2502
368k
  AV1_COMMON *const cm = &cpi->common;
2503
2504
368k
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2505
368k
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2506
368k
  const int tpl_idx = cpi->gf_frame_index;
2507
368k
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2508
368k
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2509
368k
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2510
368k
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2511
368k
  const int mi_wide = mi_size_wide[bsize];
2512
368k
  const int mi_high = mi_size_high[bsize];
2513
368k
  const int tpl_stride = tpl_frame->stride;
2514
368k
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2515
368k
  const int mi_col_sr =
2516
368k
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2517
368k
  const int mi_col_end_sr =
2518
368k
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2519
368k
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2520
2521
368k
  const int row_step = step;
2522
368k
  const int col_step_sr =
2523
368k
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2524
744k
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2525
375k
       row += row_step) {
2526
771k
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2527
396k
         col += col_step_sr) {
2528
396k
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2529
396k
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2530
2531
      // Sums up the inter cost of corresponding ref frames
2532
3.16M
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2533
2.76M
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2534
2.76M
            this_stats->pred_error[ref_idx];
2535
2.76M
      }
2536
396k
    }
2537
375k
  }
2538
2539
  // Computes the best inter cost (minimum inter_cost)
2540
368k
  int64_t best_inter_cost = INT64_MAX;
2541
2.94M
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2542
2.57M
    const int64_t cur_inter_cost =
2543
2.57M
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2544
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2545
    // calculating the minimum inter_cost
2546
2.57M
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2547
465k
        valid_refs[ref_idx])
2548
465k
      best_inter_cost = cur_inter_cost;
2549
2.57M
  }
2550
368k
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2551
368k
}
2552
#endif
2553
2554
static inline int prune_modes_based_on_tpl_stats(
2555
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2556
550k
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2557
550k
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2558
550k
  if ((prune_mode_level < 2) && have_newmv) return 0;
2559
2560
550k
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2561
550k
  if (best_inter_cost == INT64_MAX) return 0;
2562
2563
550k
  const int prune_level = prune_mode_level - 1;
2564
550k
  int64_t cur_inter_cost;
2565
2566
550k
  const int is_globalmv =
2567
550k
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2568
550k
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2569
2570
  // Thresholds used for pruning:
2571
  // Lower value indicates aggressive pruning and higher value indicates
2572
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2573
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2574
  // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2575
550k
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2576
550k
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2577
550k
  };
2578
2579
550k
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2580
550k
  if (!is_comp_pred) {
2581
550k
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2582
550k
  } else {
2583
12
    const int64_t inter_cost_ref0 =
2584
12
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2585
12
    const int64_t inter_cost_ref1 =
2586
12
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2587
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2588
    // more aggressive pruning
2589
12
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2590
12
  }
2591
2592
  // Prune the mode if cur_inter_cost is greater than threshold times
2593
  // best_inter_cost
2594
550k
  if (cur_inter_cost >
2595
550k
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2596
550k
        best_inter_cost) >>
2597
550k
       2))
2598
86.1k
    return 1;
2599
464k
  return 0;
2600
550k
}
2601
2602
/*!\brief High level function to select parameters for compound mode.
2603
 *
2604
 * \ingroup inter_mode_search
2605
 * The main search functionality is done in the call to av1_compound_type_rd().
2606
 *
2607
 * \param[in]     cpi               Top-level encoder structure.
2608
 * \param[in]     x                 Pointer to struct holding all the data for
2609
 *                                  the current macroblock.
2610
 * \param[in]     args              HandleInterModeArgs struct holding
2611
 *                                  miscellaneous arguments for inter mode
2612
 *                                  search. See the documentation for this
2613
 *                                  struct for a description of each member.
2614
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2615
 *                                  It is used for early termination of this
2616
 *                                  search if the RD exceeds this value.
2617
 * \param[in,out] cur_mv            Current motion vector.
2618
 * \param[in]     bsize             Current block size.
2619
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2620
                                    compound mode.
2621
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2622
 *                                  allocated buffers for the compound
2623
 *                                  predictors and masks in the compound type
2624
 *                                  search.
2625
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2626
 *                                  prediction. This will eventually hold the
2627
 *                                  final prediction, and the tmp_dst info will
2628
 *                                  be copied here.
2629
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2630
 *                                  computed prediction.
2631
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2632
 *                                  This will be modified if a motion search is
2633
 *                                  done in the motion mode search.
2634
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2635
 *                                  information.
2636
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2637
 *                                  best total RD for a skip mode so far, and
2638
 *                                  skip_rd[1] is the best RD for a skip mode so
2639
 *                                  far in luma. This is used as a speed feature
2640
 *                                  to skip the transform search if the computed
2641
 *                                  skip RD for the current mode is not better
2642
 *                                  than the best skip_rd so far.
2643
 * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2644
 *                                  predictor. If this is 0, the inter predictor
2645
 *                                  has already been built and thus we can avoid
2646
 *                                  repeating computation.
2647
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2648
 * a viable candidate.
2649
 */
2650
static int process_compound_inter_mode(
2651
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2652
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2653
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2654
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2655
0
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2656
0
  MACROBLOCKD *xd = &x->e_mbd;
2657
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2658
0
  const AV1_COMMON *cm = &cpi->common;
2659
0
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2660
0
                                   cm->seq_params->enable_masked_compound;
2661
0
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2662
0
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2663
2664
0
  const int num_planes = av1_num_planes(cm);
2665
0
  const int mi_row = xd->mi_row;
2666
0
  const int mi_col = xd->mi_col;
2667
0
  int is_luma_interp_done = 0;
2668
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2669
2670
0
  int64_t best_rd_compound;
2671
0
  int64_t rd_thresh;
2672
0
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2673
0
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2674
0
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2675
0
                                         comp_type_rd_scale);
2676
  // Select compound type and any parameters related to that type
2677
  // (for example, the mask parameters if it is a masked mode) and compute
2678
  // the RD
2679
0
  *compmode_interinter_cost = av1_compound_type_rd(
2680
0
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2681
0
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2682
0
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2683
0
  if (ref_best_rd < INT64_MAX &&
2684
0
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2685
0
          ref_best_rd) {
2686
0
    restore_dst_buf(xd, *orig_dst, num_planes);
2687
0
    return 1;
2688
0
  }
2689
2690
  // Build only uv predictor for COMPOUND_AVERAGE.
2691
  // Note there is no need to call av1_enc_build_inter_predictor
2692
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2693
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2694
  // rather than the tmp_buf.
2695
0
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2696
0
    if (num_planes > 1) {
2697
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2698
0
                                    AOM_PLANE_U, num_planes - 1);
2699
0
    }
2700
0
    *skip_build_pred = 1;
2701
0
  }
2702
0
  return 0;
2703
0
}
2704
2705
// Speed feature to prune out MVs that are similar to previous MVs if they
2706
// don't achieve the best RD advantage.
2707
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2708
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2709
0
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2710
0
  int i;
2711
0
  const int is_comp_pred = has_second_ref(mbmi);
2712
0
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2713
2714
  // Skip the evaluation if an MV match is found.
2715
0
  if (ref_mv_idx > 0) {
2716
0
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2717
0
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2718
2719
0
      int mv_diff = 0;
2720
0
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2721
0
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2722
0
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2723
0
      }
2724
2725
      // If this mode is not the best one, and current MV is similar to
2726
      // previous stored MV, terminate this ref_mv_idx evaluation.
2727
0
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2728
0
    }
2729
0
  }
2730
2731
0
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2732
0
    for (i = 0; i < is_comp_pred + 1; ++i)
2733
0
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2734
0
  }
2735
2736
0
  return 0;
2737
0
}
2738
2739
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2740
 *
2741
 * \ingroup inter_mode_search
2742
 *
2743
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2744
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2745
 * Else returns 0.
2746
 *
2747
 * Note that the sse of here comes from single_motion_search. So it is
2748
 * interpolated with the filter in motion search, not the actual interpolation
2749
 * filter used in encoding.
2750
 *
2751
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2752
 * \param[in]     x                 Pointer to struct holding all the data for
2753
 *                                  the current macroblock.
2754
 * \param[in]     bsize             The current block_size.
2755
 * \param[in]     args              The args to handle_inter_mode, used to track
2756
 *                                  the best SSE.
2757
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2758
 *                                       prune_zero_mv_with_sse value
2759
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2760
 */
2761
static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2762
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
2763
                                         const HandleInterModeArgs *args,
2764
768k
                                         int prune_zero_mv_with_sse) {
2765
768k
  const MACROBLOCKD *xd = &x->e_mbd;
2766
768k
  const MB_MODE_INFO *mbmi = xd->mi[0];
2767
2768
768k
  const int is_comp_pred = has_second_ref(mbmi);
2769
768k
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2770
2771
1.53M
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2772
768k
    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2773
      // Pruning logic only works for IDENTITY type models
2774
      // Note: In theory we could apply similar logic for TRANSLATION
2775
      // type models, but we do not code these due to a spec bug
2776
      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2777
0
      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2778
0
      return 0;
2779
0
    }
2780
2781
    // Don't prune if we have invalid data
2782
768k
    assert(mbmi->mv[idx].as_int == 0);
2783
768k
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2784
0
      return 0;
2785
0
    }
2786
768k
  }
2787
2788
  // Sum up the sse of ZEROMV and best NEWMV
2789
768k
  unsigned int this_sse_sum = 0;
2790
768k
  unsigned int best_sse_sum = 0;
2791
1.53M
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2792
768k
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2793
768k
    const struct macroblockd_plane *pd = xd->plane;
2794
768k
    const struct buf_2d *src_buf = &p->src;
2795
768k
    const struct buf_2d *ref_buf = &pd->pre[idx];
2796
768k
    const uint8_t *src = src_buf->buf;
2797
768k
    const uint8_t *ref = ref_buf->buf;
2798
768k
    const int src_stride = src_buf->stride;
2799
768k
    const int ref_stride = ref_buf->stride;
2800
2801
768k
    unsigned int this_sse;
2802
768k
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2803
768k
    this_sse_sum += this_sse;
2804
2805
768k
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2806
768k
    best_sse_sum += best_sse;
2807
768k
  }
2808
2809
768k
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2810
768k
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2811
712k
    return 1;
2812
712k
  }
2813
2814
55.2k
  return 0;
2815
768k
}
2816
2817
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2818
 *
2819
 * \ingroup inter_mode_search
2820
 *
2821
 * Does a simple interpolation filter search during winner mode evaluation. This
2822
 * is currently only used by realtime mode as \ref
2823
 * av1_interpolation_filter_search is not called during realtime encoding.
2824
 *
2825
 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2826
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2827
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2828
 *  *
2829
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2830
 *                                  flags.
2831
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2832
 *                                  used to access the buffers.
2833
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2834
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2835
 * \param[in]     bsize             The current block_size.
2836
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2837
 */
2838
static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2839
                                      int mi_row, int mi_col,
2840
0
                                      BLOCK_SIZE bsize) {
2841
0
  static const InterpFilters filters_ref_set[3] = {
2842
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2843
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2844
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2845
0
  };
2846
2847
0
  const AV1_COMMON *const cm = &cpi->common;
2848
0
  MACROBLOCKD *const xd = &x->e_mbd;
2849
0
  MB_MODE_INFO *const mi = xd->mi[0];
2850
0
  int64_t best_cost = INT64_MAX;
2851
0
  int best_filter_index = -1;
2852
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2853
0
  const int num_planes = av1_num_planes(cm);
2854
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2855
0
  assert(is_inter_mode(mi->mode));
2856
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2857
0
  assert(!is_inter_compound_mode(mi->mode));
2858
2859
0
  if (!av1_is_interp_needed(xd)) {
2860
0
    return false;
2861
0
  }
2862
2863
0
  struct macroblockd_plane *pd = xd->plane;
2864
0
  const BUFFER_SET orig_dst = {
2865
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2866
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2867
0
  };
2868
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2869
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2870
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2871
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2872
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2873
2874
0
  for (int i = 0; i < 3; ++i) {
2875
0
    if (is_240p_or_lesser) {
2876
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2877
0
        continue;
2878
0
      }
2879
0
    } else {
2880
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2881
0
        continue;
2882
0
      }
2883
0
    }
2884
0
    int64_t cost;
2885
0
    RD_STATS tmp_rd = { 0 };
2886
2887
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2888
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2889
2890
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2891
0
                       ? MODELRD_LEGACY
2892
0
                       : MODELRD_TYPE_INTERP_FILTER](
2893
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2894
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2895
2896
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2897
0
                                           cm->seq_params->enable_dual_filter);
2898
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2899
0
    if (cost < best_cost) {
2900
0
      best_filter_index = i;
2901
0
      best_cost = cost;
2902
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2903
0
    }
2904
0
  }
2905
0
  assert(best_filter_index >= 0);
2906
2907
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2908
2909
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2910
2911
0
  if (is_best_pred_in_orig) {
2912
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2913
0
  } else {
2914
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2915
    // is_best_pred_in_orig is false, that means the current buffer is the
2916
    // original one.
2917
0
    assert(&orig_dst == dst_bufs[0]);
2918
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2919
0
    const int width = block_size_wide[bsize];
2920
0
    const int height = block_size_high[bsize];
2921
0
#if CONFIG_AV1_HIGHBITDEPTH
2922
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2923
0
    if (is_hbd) {
2924
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2925
0
                               tmp_dst.stride[AOM_PLANE_Y],
2926
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2927
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2928
0
    } else {
2929
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2930
0
                        orig_dst.plane[AOM_PLANE_Y],
2931
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2932
0
    }
2933
#else
2934
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2935
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2936
                      width, height);
2937
#endif
2938
0
  }
2939
2940
  // Build the YUV predictor.
2941
0
  if (num_planes > 1) {
2942
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2943
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2944
0
  }
2945
2946
0
  return true;
2947
0
}
2948
2949
/*!\brief AV1 inter mode RD computation
2950
 *
2951
 * \ingroup inter_mode_search
2952
 * Do the RD search for a given inter mode and compute all information relevant
2953
 * to the input mode. It will compute the best MV,
2954
 * compound parameters (if the mode is a compound mode) and interpolation filter
2955
 * parameters.
2956
 *
2957
 * \param[in]     cpi               Top-level encoder structure.
2958
 * \param[in]     tile_data         Pointer to struct holding adaptive
2959
 *                                  data/contexts/models for the tile during
2960
 *                                  encoding.
2961
 * \param[in]     x                 Pointer to structure holding all the data
2962
 *                                  for the current macroblock.
2963
 * \param[in]     bsize             Current block size.
2964
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2965
 *                                  information.
2966
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2967
 *                                  for only the Y plane.
2968
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2969
 *                                  for only the UV planes.
2970
 * \param[in]     args              HandleInterModeArgs struct holding
2971
 *                                  miscellaneous arguments for inter mode
2972
 *                                  search. See the documentation for this
2973
 *                                  struct for a description of each member.
2974
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2975
 *                                  It is used for early termination of this
2976
 *                                  search if the RD exceeds this value.
2977
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2978
 *                                  built in this search.
2979
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2980
 *                                  allocated buffers for the compound
2981
 *                                  predictors and masks in the compound type
2982
 *                                  search.
2983
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2984
 *                                  do_tx_search (see below) is 0.
2985
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2986
 *                                  a full transform search. This will compute
2987
 *                                  an estimated RD for the modes without the
2988
 *                                  transform search and later perform the full
2989
 *                                  transform search on the best candidates.
2990
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2991
 *                                  information to perform a full transform
2992
 *                                  search only on winning candidates searched
2993
 *                                  with an estimate for transform coding RD.
2994
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2995
 *                                  motion mode information used in a speed
2996
 *                                  feature to search motion modes other than
2997
 *                                  SIMPLE_TRANSLATION only on winning
2998
 *                                  candidates.
2999
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
3000
 *                                  best total RD for a skip mode so far, and
3001
 *                                  skip_rd[1] is the best RD for a skip mode so
3002
 *                                  far in luma. This is used as a speed feature
3003
 *                                  to skip the transform search if the computed
3004
 *                                  skip RD for the current mode is not better
3005
 *                                  than the best skip_rd so far.
3006
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
3007
 *                                         narrow down the search based on data
3008
 *                                         collected in the TPL model.
3009
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
3010
 *                                  the luma plane.
3011
 *
3012
 * \return The RD cost for the mode being searched.
3013
 */
3014
static int64_t handle_inter_mode(
3015
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
3016
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
3017
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
3018
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
3019
    int64_t *best_est_rd, const int do_tx_search,
3020
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
3021
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
3022
4.25M
    int64_t *yrd) {
3023
4.25M
  const AV1_COMMON *cm = &cpi->common;
3024
4.25M
  const int num_planes = av1_num_planes(cm);
3025
4.25M
  MACROBLOCKD *xd = &x->e_mbd;
3026
4.25M
  MB_MODE_INFO *mbmi = xd->mi[0];
3027
4.25M
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3028
4.25M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3029
4.25M
  const int is_comp_pred = has_second_ref(mbmi);
3030
4.25M
  const PREDICTION_MODE this_mode = mbmi->mode;
3031
3032
#if CONFIG_REALTIME_ONLY
3033
  const int prune_modes_based_on_tpl = 0;
3034
#else   // CONFIG_REALTIME_ONLY
3035
4.25M
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
3036
4.25M
  const int prune_modes_based_on_tpl =
3037
4.25M
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
3038
1.68M
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
3039
4.25M
#endif  // CONFIG_REALTIME_ONLY
3040
4.25M
  int i;
3041
  // Reference frames for this mode
3042
4.25M
  const int refs[2] = { mbmi->ref_frame[0],
3043
4.25M
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3044
4.25M
  int rate_mv = 0;
3045
4.25M
  int64_t rd = INT64_MAX;
3046
  // Do first prediction into the destination buffer. Do the next
3047
  // prediction into a temporary buffer. Then keep track of which one
3048
  // of these currently holds the best predictor, and use the other
3049
  // one for future predictions. In the end, copy from tmp_buf to
3050
  // dst if necessary.
3051
4.25M
  struct macroblockd_plane *pd = xd->plane;
3052
4.25M
  const BUFFER_SET orig_dst = {
3053
4.25M
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
3054
4.25M
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
3055
4.25M
  };
3056
4.25M
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
3057
4.25M
                                 tmp_buf + 2 * MAX_SB_SQUARE },
3058
4.25M
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
3059
3060
4.25M
  int64_t ret_val = INT64_MAX;
3061
4.25M
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3062
4.25M
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
3063
4.25M
  int64_t best_rd = INT64_MAX;
3064
4.25M
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
3065
4.25M
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3066
4.25M
  int64_t best_yrd = INT64_MAX;
3067
4.25M
  MB_MODE_INFO best_mbmi = *mbmi;
3068
4.25M
  int best_xskip_txfm = 0;
3069
4.25M
  int64_t newmv_ret_val = INT64_MAX;
3070
4.25M
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
3071
3072
  // Do not prune the mode based on inter cost from tpl if the current ref frame
3073
  // is the winner ref in neighbouring blocks.
3074
4.25M
  int ref_match_found_in_above_nb = 0;
3075
4.25M
  int ref_match_found_in_left_nb = 0;
3076
4.25M
  if (prune_modes_based_on_tpl) {
3077
1.69M
    ref_match_found_in_above_nb =
3078
1.69M
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
3079
1.69M
    ref_match_found_in_left_nb =
3080
1.69M
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
3081
1.69M
  }
3082
3083
  // First, perform a simple translation search for each of the indices. If
3084
  // an index performs well, it will be fully searched in the main loop
3085
  // of this function.
3086
4.25M
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
3087
  // Save MV results from first 2 ref_mv_idx.
3088
4.25M
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
3089
4.25M
  int best_ref_mv_idx = -1;
3090
4.25M
  const int idx_mask =
3091
4.25M
      ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
3092
4.25M
  const int16_t mode_ctx =
3093
4.25M
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
3094
4.25M
  const ModeCosts *mode_costs = &x->mode_costs;
3095
4.25M
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
3096
4.25M
  const int base_rate =
3097
4.25M
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
3098
3099
12.7M
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
3100
8.50M
    save_mv[i][0].as_int = INVALID_MV;
3101
8.50M
    save_mv[i][1].as_int = INVALID_MV;
3102
8.50M
  }
3103
4.25M
  args->start_mv_cnt = 0;
3104
3105
  // Main loop of this function. This will  iterate over all of the ref mvs
3106
  // in the dynamic reference list and do the following:
3107
  //    1.) Get the current MV. Create newmv MV if necessary
3108
  //    2.) Search compound type and parameters if applicable
3109
  //    3.) Do interpolation filter search
3110
  //    4.) Build the inter predictor
3111
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
3112
  //        WARPED_CAUSAL)
3113
  //    6.) Update stats if best so far
3114
9.07M
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
3115
4.82M
    mbmi->ref_mv_idx = ref_mv_idx;
3116
3117
4.82M
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
3118
4.82M
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
3119
4.82M
    const int drl_cost = get_drl_cost(
3120
4.82M
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
3121
4.82M
    mode_info[ref_mv_idx].drl_cost = drl_cost;
3122
4.82M
    mode_info[ref_mv_idx].skip = 0;
3123
3124
4.82M
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
3125
      // MV did not perform well in simple translation search. Skip it.
3126
25.4k
      continue;
3127
25.4k
    }
3128
4.79M
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
3129
953k
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
3130
      // Skip mode if TPL model indicates it will not be beneficial.
3131
550k
      if (prune_modes_based_on_tpl_stats(
3132
550k
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
3133
550k
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
3134
86.1k
        continue;
3135
550k
    }
3136
4.71M
    av1_init_rd_stats(rd_stats);
3137
3138
    // Initialize compound mode data
3139
4.71M
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3140
4.71M
    mbmi->comp_group_idx = 0;
3141
4.71M
    mbmi->compound_idx = 1;
3142
4.71M
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
3143
3144
4.71M
    mbmi->num_proj_ref = 0;
3145
4.71M
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3146
3147
    // Compute cost for signalling this DRL index
3148
4.71M
    rd_stats->rate = base_rate;
3149
4.71M
    rd_stats->rate += drl_cost;
3150
3151
4.71M
    int rs = 0;
3152
4.71M
    int compmode_interinter_cost = 0;
3153
3154
4.71M
    int_mv cur_mv[2];
3155
3156
    // TODO(Cherma): Extend this speed feature to support compound mode
3157
4.71M
    int skip_repeated_ref_mv =
3158
4.71M
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
3159
    // Generate the current mv according to the prediction mode
3160
4.71M
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
3161
16.9k
      continue;
3162
16.9k
    }
3163
3164
    // The above call to build_cur_mv does not handle NEWMV modes. Build
3165
    // the mv here if we have NEWMV for any predictors.
3166
4.69M
    if (have_newmv_in_inter_mode(this_mode)) {
3167
#if CONFIG_COLLECT_COMPONENT_TIMING
3168
      start_timing(cpi, handle_newmv_time);
3169
#endif
3170
1.84M
      newmv_ret_val =
3171
1.84M
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
3172
#if CONFIG_COLLECT_COMPONENT_TIMING
3173
      end_timing(cpi, handle_newmv_time);
3174
#endif
3175
3176
1.84M
      if (newmv_ret_val != 0) continue;
3177
3178
1.61M
      if (is_inter_singleref_mode(this_mode) &&
3179
1.61M
          cur_mv[0].as_int != INVALID_MV) {
3180
1.61M
        const MV_REFERENCE_FRAME ref = refs[0];
3181
1.61M
        const unsigned int this_sse = x->pred_sse[ref];
3182
1.61M
        if (this_sse < args->best_single_sse_in_refs[ref]) {
3183
1.51M
          args->best_single_sse_in_refs[ref] = this_sse;
3184
1.51M
        }
3185
3186
1.61M
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
3187
0
          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
3188
0
          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
3189
0
          const double scale_factor[3][11] = {
3190
0
            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
3191
0
            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
3192
0
            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
3193
0
          };
3194
0
          assert(pix_idx >= 0);
3195
0
          assert(th_idx <= 2);
3196
0
          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
3197
0
            continue;
3198
0
        }
3199
1.61M
      }
3200
3201
1.61M
      rd_stats->rate += rate_mv;
3202
1.61M
    }
3203
    // Copy the motion vector for this mode into mbmi struct
3204
8.92M
    for (i = 0; i < is_comp_pred + 1; ++i) {
3205
4.46M
      mbmi->mv[i].as_int = cur_mv[i].as_int;
3206
4.46M
    }
3207
3208
4.46M
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
3209
412
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
3210
353
      continue;
3211
353
    }
3212
3213
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
3214
    // is enabled, and the current MV is similar to a previous one.
3215
4.46M
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
3216
0
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
3217
0
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
3218
0
      continue;
3219
3220
4.46M
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
3221
4.46M
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
3222
768k
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
3223
768k
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
3224
712k
        continue;
3225
712k
      }
3226
768k
    }
3227
3228
3.74M
    int skip_build_pred = 0;
3229
3.74M
    const int mi_row = xd->mi_row;
3230
3.74M
    const int mi_col = xd->mi_col;
3231
3232
    // Handle a compound predictor, continue if it is determined this
3233
    // cannot be the best compound mode
3234
3.74M
    if (is_comp_pred) {
3235
#if CONFIG_COLLECT_COMPONENT_TIMING
3236
      start_timing(cpi, compound_type_rd_time);
3237
#endif
3238
0
      const int not_best_mode = process_compound_inter_mode(
3239
0
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
3240
0
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
3241
0
          &skip_build_pred);
3242
#if CONFIG_COLLECT_COMPONENT_TIMING
3243
      end_timing(cpi, compound_type_rd_time);
3244
#endif
3245
0
      if (not_best_mode) continue;
3246
0
    }
3247
3248
3.74M
    if (!args->skip_ifs) {
3249
#if CONFIG_COLLECT_COMPONENT_TIMING
3250
      start_timing(cpi, interpolation_filter_search_time);
3251
#endif
3252
      // Determine the interpolation filter for this mode
3253
2.39M
      ret_val = av1_interpolation_filter_search(
3254
2.39M
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
3255
2.39M
          &skip_build_pred, args, ref_best_rd);
3256
#if CONFIG_COLLECT_COMPONENT_TIMING
3257
      end_timing(cpi, interpolation_filter_search_time);
3258
#endif
3259
2.39M
      if (args->modelled_rd != NULL && !is_comp_pred) {
3260
2.39M
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3261
2.39M
      }
3262
2.39M
      if (ret_val != 0) {
3263
0
        restore_dst_buf(xd, orig_dst, num_planes);
3264
0
        continue;
3265
2.39M
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3266
2.39M
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3267
143k
        restore_dst_buf(xd, orig_dst, num_planes);
3268
143k
        continue;
3269
143k
      }
3270
3271
      // Compute modelled RD if enabled
3272
2.25M
      if (args->modelled_rd != NULL) {
3273
2.25M
        if (is_comp_pred) {
3274
0
          const int mode0 = compound_ref0_mode(this_mode);
3275
0
          const int mode1 = compound_ref1_mode(this_mode);
3276
0
          const int64_t mrd =
3277
0
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3278
0
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3279
0
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3280
0
            restore_dst_buf(xd, orig_dst, num_planes);
3281
0
            continue;
3282
0
          }
3283
0
        }
3284
2.25M
      }
3285
2.25M
    }
3286
3287
3.60M
    rd_stats->rate += compmode_interinter_cost;
3288
3.60M
    if (skip_build_pred != 1) {
3289
      // Build this inter predictor if it has not been previously built
3290
1.49M
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3291
1.49M
                                    av1_num_planes(cm) - 1);
3292
1.49M
    }
3293
3294
#if CONFIG_COLLECT_COMPONENT_TIMING
3295
    start_timing(cpi, motion_mode_rd_time);
3296
#endif
3297
3.60M
    int rate2_nocoeff = rd_stats->rate;
3298
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3299
    // OBMC_CAUSAL or WARPED_CAUSAL
3300
3.60M
    int64_t this_yrd;
3301
3.60M
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3302
3.60M
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3303
3.60M
                             &orig_dst, best_est_rd, do_tx_search,
3304
3.60M
                             inter_modes_info, 0, &this_yrd);
3305
#if CONFIG_COLLECT_COMPONENT_TIMING
3306
    end_timing(cpi, motion_mode_rd_time);
3307
#endif
3308
3.60M
    assert(
3309
3.60M
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3310
3311
3.60M
    if (ret_val != INT64_MAX) {
3312
1.98M
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3313
1.98M
      const THR_MODES mode_enum = get_prediction_mode_idx(
3314
1.98M
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3315
      // Collect mode stats for multiwinner mode processing
3316
1.98M
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3317
1.98M
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3318
1.98M
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3319
1.98M
                              do_tx_search);
3320
1.98M
      if (tmp_rd < best_rd) {
3321
1.98M
        best_yrd = this_yrd;
3322
        // Update the best rd stats if we found the best mode so far
3323
1.98M
        best_rd_stats = *rd_stats;
3324
1.98M
        best_rd_stats_y = *rd_stats_y;
3325
1.98M
        best_rd_stats_uv = *rd_stats_uv;
3326
1.98M
        best_rd = tmp_rd;
3327
1.98M
        best_mbmi = *mbmi;
3328
1.98M
        best_xskip_txfm = txfm_info->skip_txfm;
3329
1.98M
        memcpy(best_blk_skip, txfm_info->blk_skip,
3330
1.98M
               sizeof(best_blk_skip[0]) * xd->height * xd->width);
3331
1.98M
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3332
1.98M
                       xd->height * xd->width);
3333
1.98M
        motion_mode_cand->rate_mv = rate_mv;
3334
1.98M
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3335
1.98M
      }
3336
3337
1.98M
      if (tmp_rd < ref_best_rd) {
3338
1.92M
        ref_best_rd = tmp_rd;
3339
1.92M
        best_ref_mv_idx = ref_mv_idx;
3340
1.92M
      }
3341
1.98M
    }
3342
3.60M
    restore_dst_buf(xd, orig_dst, num_planes);
3343
3.60M
  }
3344
3345
4.25M
  if (best_rd == INT64_MAX) return INT64_MAX;
3346
3347
  // re-instate status of the best choice
3348
1.94M
  *rd_stats = best_rd_stats;
3349
1.94M
  *rd_stats_y = best_rd_stats_y;
3350
1.94M
  *rd_stats_uv = best_rd_stats_uv;
3351
1.94M
  *yrd = best_yrd;
3352
1.94M
  *mbmi = best_mbmi;
3353
1.94M
  txfm_info->skip_txfm = best_xskip_txfm;
3354
1.94M
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3355
1.94M
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3356
1.94M
  memcpy(txfm_info->blk_skip, best_blk_skip,
3357
1.94M
         sizeof(best_blk_skip[0]) * xd->height * xd->width);
3358
1.94M
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3359
3360
1.94M
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3361
3362
1.94M
  return rd_stats->rdcost;
3363
4.25M
}
3364
3365
/*!\brief Search for the best intrabc predictor
3366
 *
3367
 * \ingroup intra_mode_search
3368
 * \callergraph
3369
 * This function performs a motion search to find the best intrabc predictor.
3370
 *
3371
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3372
 * search before this function).
3373
 */
3374
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3375
                                       PICK_MODE_CONTEXT *ctx,
3376
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3377
9.56M
                                       int64_t best_rd) {
3378
9.56M
  const AV1_COMMON *const cm = &cpi->common;
3379
9.56M
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3380
0
      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3381
9.56M
    return INT64_MAX;
3382
965
  if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 &&
3383
0
      bsize != BLOCK_8X8 && bsize != BLOCK_16X16) {
3384
0
    return INT64_MAX;
3385
0
  }
3386
965
  const int num_planes = av1_num_planes(cm);
3387
3388
965
  MACROBLOCKD *const xd = &x->e_mbd;
3389
965
  const TileInfo *tile = &xd->tile;
3390
965
  MB_MODE_INFO *mbmi = xd->mi[0];
3391
965
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3392
3393
965
  const int mi_row = xd->mi_row;
3394
965
  const int mi_col = xd->mi_col;
3395
965
  const int w = block_size_wide[bsize];
3396
965
  const int h = block_size_high[bsize];
3397
965
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3398
965
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3399
3400
965
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3401
965
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3402
965
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3403
965
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3404
965
                   mbmi_ext->mode_context);
3405
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3406
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3407
965
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3408
965
  int_mv nearestmv, nearmv;
3409
965
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3410
965
                                   0);
3411
3412
965
  if (nearestmv.as_int == INVALID_MV) {
3413
0
    nearestmv.as_int = 0;
3414
0
  }
3415
965
  if (nearmv.as_int == INVALID_MV) {
3416
0
    nearmv.as_int = 0;
3417
0
  }
3418
3419
965
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3420
965
  if (dv_ref.as_int == 0) {
3421
0
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3422
0
  }
3423
  // Ref DV should not have sub-pel.
3424
965
  assert((dv_ref.as_mv.col & 7) == 0);
3425
965
  assert((dv_ref.as_mv.row & 7) == 0);
3426
965
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3427
3428
965
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3429
965
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3430
965
  for (int i = 0; i < num_planes; ++i) {
3431
0
    xd->plane[i].pre[0] = yv12_mb[i];
3432
0
  }
3433
3434
965
  enum IntrabcMotionDirection {
3435
965
    IBC_MOTION_ABOVE,
3436
965
    IBC_MOTION_LEFT,
3437
965
    IBC_MOTION_DIRECTIONS
3438
965
  };
3439
3440
965
  MB_MODE_INFO best_mbmi = *mbmi;
3441
965
  RD_STATS best_rdstats = *rd_stats;
3442
965
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3443
965
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3444
965
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3445
3446
965
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3447
965
  const SEARCH_METHODS search_method =
3448
965
      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3449
965
  const search_site_config *lookahead_search_sites =
3450
965
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3451
965
  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3452
965
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3453
965
                                     &dv_ref.as_mv, start_mv,
3454
965
                                     lookahead_search_sites, search_method,
3455
965
                                     /*fine_search_interval=*/0);
3456
965
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3457
965
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3458
3459
965
  const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level
3460
965
                                                  ? IBC_MOTION_LEFT
3461
965
                                                  : IBC_MOTION_DIRECTIONS;
3462
3463
965
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir;
3464
965
       ++dir) {
3465
0
    switch (dir) {
3466
0
      case IBC_MOTION_ABOVE:
3467
0
        fullms_params.mv_limits.col_min =
3468
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3469
0
        fullms_params.mv_limits.col_max =
3470
0
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3471
0
        fullms_params.mv_limits.row_min =
3472
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3473
0
        fullms_params.mv_limits.row_max =
3474
0
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3475
0
        break;
3476
0
      case IBC_MOTION_LEFT:
3477
0
        fullms_params.mv_limits.col_min =
3478
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3479
0
        fullms_params.mv_limits.col_max =
3480
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3481
        // TODO(aconverse@google.com): Minimize the overlap between above and
3482
        // left areas.
3483
0
        fullms_params.mv_limits.row_min =
3484
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3485
0
        int bottom_coded_mi_edge =
3486
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3487
0
        fullms_params.mv_limits.row_max =
3488
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3489
0
        break;
3490
0
      default: assert(0);
3491
0
    }
3492
0
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3493
0
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3494
0
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3495
0
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3496
3497
0
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3498
3499
0
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3500
0
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3501
0
      continue;
3502
0
    }
3503
3504
0
    const int step_param = cpi->mv_search_params.mv_step_param;
3505
0
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3506
0
    int_mv best_mv;
3507
0
    FULLPEL_MV_STATS best_mv_stats;
3508
0
    int bestsme = INT_MAX;
3509
3510
    // Perform a hash search first, and see if we get any matches.
3511
0
    if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
3512
0
      bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params,
3513
0
                                        intrabc_hash_info, &best_mv.as_fullmv);
3514
0
    }
3515
3516
    // If intrabc_search_level is not 0 and we found a hash search match, do
3517
    // not proceed with pixel search as the hash match is very likely to be the
3518
    // best intrabc candidate anyway.
3519
0
    if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) {
3520
0
      int_mv best_pixel_mv;
3521
0
      const int pixelsme =
3522
0
          av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3523
0
                                &best_pixel_mv.as_fullmv, &best_mv_stats, NULL);
3524
0
      if (pixelsme < bestsme) {
3525
0
        bestsme = pixelsme;
3526
0
        best_mv = best_pixel_mv;
3527
0
      }
3528
0
    }
3529
0
    if (bestsme == INT_MAX) continue;
3530
0
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3531
0
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3532
0
                                get_fullmv_from_mv(&dv)))
3533
0
      continue;
3534
0
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3535
0
                         cm->seq_params->mib_size_log2))
3536
0
      continue;
3537
3538
    // DV should not have sub-pel.
3539
0
    assert((dv.col & 7) == 0);
3540
0
    assert((dv.row & 7) == 0);
3541
0
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3542
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3543
0
    mbmi->use_intrabc = 1;
3544
0
    mbmi->mode = DC_PRED;
3545
0
    mbmi->uv_mode = UV_DC_PRED;
3546
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3547
0
    mbmi->mv[0].as_mv = dv;
3548
0
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3549
0
    mbmi->skip_txfm = 0;
3550
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3551
0
                                  av1_num_planes(cm) - 1);
3552
3553
    // TODO(aconverse@google.com): The full motion field defining discount
3554
    // in MV_COST_WEIGHT is too large. Explore other values.
3555
0
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3556
0
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3557
0
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3558
0
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3559
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3560
0
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3561
0
      continue;
3562
0
    rd_stats_yuv.rdcost =
3563
0
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3564
0
    if (rd_stats_yuv.rdcost < best_rd) {
3565
0
      best_rd = rd_stats_yuv.rdcost;
3566
0
      best_mbmi = *mbmi;
3567
0
      best_rdstats = rd_stats_yuv;
3568
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
3569
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3570
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3571
0
    }
3572
0
  }
3573
965
  *mbmi = best_mbmi;
3574
965
  *rd_stats = best_rdstats;
3575
965
  memcpy(txfm_info->blk_skip, best_blk_skip,
3576
965
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3577
965
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3578
#if CONFIG_RD_DEBUG
3579
  mbmi->rd_stats = *rd_stats;
3580
#endif
3581
965
  return best_rd;
3582
965
}
3583
3584
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3585
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3586
// the typedef will prevent doxygen from finding this function and generating
3587
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3588
// doxygen, we can revert back to using the typedefs.
3589
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3590
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3591
9.56M
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3592
9.56M
  const AV1_COMMON *const cm = &cpi->common;
3593
9.56M
  MACROBLOCKD *const xd = &x->e_mbd;
3594
9.56M
  MB_MODE_INFO *const mbmi = xd->mi[0];
3595
9.56M
  const int num_planes = av1_num_planes(cm);
3596
9.56M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3597
9.56M
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3598
9.56M
  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3599
9.56M
  int64_t dist_y = 0, dist_uv = 0;
3600
3601
9.56M
  ctx->rd_stats.skip_txfm = 0;
3602
9.56M
  mbmi->ref_frame[0] = INTRA_FRAME;
3603
9.56M
  mbmi->ref_frame[1] = NONE_FRAME;
3604
9.56M
  mbmi->use_intrabc = 0;
3605
9.56M
  mbmi->mv[0].as_int = 0;
3606
9.56M
  mbmi->skip_mode = 0;
3607
3608
9.56M
  const int64_t intra_yrd =
3609
9.56M
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3610
9.56M
                                 &y_skip_txfm, bsize, best_rd, ctx);
3611
3612
  // Initialize default mode evaluation params
3613
9.56M
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3614
3615
9.56M
  if (intra_yrd < best_rd) {
3616
    // Search intra modes for uv planes if needed
3617
8.54M
    if (num_planes > 1) {
3618
      // Set up the tx variables for reproducing the y predictions in case we
3619
      // need it for chroma-from-luma.
3620
3.74M
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3621
2.95M
        memcpy(txfm_info->blk_skip, ctx->blk_skip,
3622
2.95M
               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3623
2.95M
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3624
2.95M
      }
3625
3.74M
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3626
3.74M
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3627
3.74M
                                  &dist_uv, &uv_skip_txfm, bsize,
3628
3.74M
                                  max_uv_tx_size);
3629
3.74M
    }
3630
3631
    // Intra block is always coded as non-skip
3632
8.54M
    rd_cost->rate =
3633
8.54M
        rate_y + rate_uv +
3634
8.54M
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3635
8.54M
    rd_cost->dist = dist_y + dist_uv;
3636
8.54M
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3637
8.54M
    rd_cost->skip_txfm = 0;
3638
8.54M
  } else {
3639
1.01M
    rd_cost->rate = INT_MAX;
3640
1.01M
  }
3641
3642
9.56M
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3643
8.07M
    best_rd = rd_cost->rdcost;
3644
9.56M
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3645
0
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3646
0
    memcpy(ctx->blk_skip, txfm_info->blk_skip,
3647
0
           sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3648
0
    assert(rd_cost->rate != INT_MAX);
3649
0
  }
3650
9.56M
  if (rd_cost->rate == INT_MAX) return;
3651
3652
8.54M
  ctx->mic = *xd->mi[0];
3653
8.54M
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3654
8.54M
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3655
8.54M
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3656
8.54M
}
3657
3658
static inline void calc_target_weighted_pred(
3659
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3660
    const uint8_t *above, int above_stride, const uint8_t *left,
3661
    int left_stride);
3662
3663
static inline void rd_pick_skip_mode(
3664
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3665
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3666
279k
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3667
279k
  const AV1_COMMON *const cm = &cpi->common;
3668
279k
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3669
279k
  const int num_planes = av1_num_planes(cm);
3670
279k
  MACROBLOCKD *const xd = &x->e_mbd;
3671
279k
  MB_MODE_INFO *const mbmi = xd->mi[0];
3672
3673
279k
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3674
279k
  RD_STATS skip_mode_rd_stats;
3675
279k
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3676
3677
279k
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3678
279k
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3679
0
    return;
3680
0
  }
3681
3682
279k
  const MV_REFERENCE_FRAME ref_frame =
3683
279k
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3684
279k
  const MV_REFERENCE_FRAME second_ref_frame =
3685
279k
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3686
279k
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3687
279k
  const THR_MODES mode_index =
3688
279k
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3689
3690
279k
  if (mode_index == THR_INVALID) {
3691
0
    return;
3692
0
  }
3693
3694
279k
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3695
279k
       cpi->sf.inter_sf.disable_onesided_comp) &&
3696
279k
      cpi->all_one_sided_refs) {
3697
279k
    return;
3698
279k
  }
3699
3700
39
  mbmi->mode = this_mode;
3701
39
  mbmi->uv_mode = UV_DC_PRED;
3702
39
  mbmi->ref_frame[0] = ref_frame;
3703
39
  mbmi->ref_frame[1] = second_ref_frame;
3704
39
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3705
39
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3706
0
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3707
0
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3708
0
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3709
0
      return;
3710
0
    }
3711
0
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3712
0
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3713
0
                     mbmi_ext->mode_context);
3714
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3715
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3716
0
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3717
0
  }
3718
3719
39
  assert(this_mode == NEAREST_NEARESTMV);
3720
39
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3721
0
    return;
3722
0
  }
3723
3724
39
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3725
39
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3726
39
  mbmi->comp_group_idx = 0;
3727
39
  mbmi->compound_idx = x->compound_idx;
3728
39
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3729
39
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3730
39
  mbmi->ref_mv_idx = 0;
3731
39
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3732
39
  mbmi->palette_mode_info.palette_size[0] = 0;
3733
39
  mbmi->palette_mode_info.palette_size[1] = 0;
3734
3735
39
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3736
3737
39
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3738
39
  for (int i = 0; i < num_planes; i++) {
3739
0
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3740
0
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3741
0
  }
3742
3743
39
  BUFFER_SET orig_dst;
3744
39
  for (int i = 0; i < num_planes; i++) {
3745
0
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3746
0
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3747
0
  }
3748
3749
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3750
39
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3751
39
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3752
39
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3753
0
    const ModeCosts *mode_costs = &x->mode_costs;
3754
0
    best_intra_inter_mode_cost = RDCOST(
3755
0
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3756
0
        rd_cost->dist);
3757
    // Account for non-skip mode rate in total rd stats
3758
0
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3759
0
    av1_rd_cost_update(x->rdmult, rd_cost);
3760
0
  }
3761
3762
  // Obtain the rdcost for skip_mode.
3763
39
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3764
39
               best_intra_inter_mode_cost);
3765
3766
39
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3767
0
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3768
0
    assert(mode_index != THR_INVALID);
3769
0
    search_state->best_mbmode.skip_mode = 1;
3770
0
    search_state->best_mbmode = *mbmi;
3771
0
    memset(search_state->best_mbmode.inter_tx_size,
3772
0
           search_state->best_mbmode.tx_size,
3773
0
           sizeof(search_state->best_mbmode.inter_tx_size));
3774
0
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3775
0
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3776
0
                  xd);
3777
0
    search_state->best_mode_index = mode_index;
3778
3779
    // Update rd_cost
3780
0
    rd_cost->rate = skip_mode_rd_stats.rate;
3781
0
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3782
0
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3783
3784
0
    search_state->best_rd = rd_cost->rdcost;
3785
0
    search_state->best_skip2 = 1;
3786
0
    search_state->best_mode_skippable = 1;
3787
3788
0
    x->txfm_search_info.skip_txfm = 1;
3789
0
  }
3790
39
}
3791
3792
// Get winner mode stats of given mode index
3793
static inline MB_MODE_INFO *get_winner_mode_stats(
3794
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3795
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3796
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3797
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3798
861k
    int mode_idx) {
3799
861k
  MB_MODE_INFO *winner_mbmi;
3800
861k
  if (multi_winner_mode_type) {
3801
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3802
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3803
0
    winner_mbmi = &winner_mode_stat->mbmi;
3804
3805
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3806
0
    *winner_rate_y = winner_mode_stat->rate_y;
3807
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3808
0
    *winner_mode_index = winner_mode_stat->mode_index;
3809
861k
  } else {
3810
861k
    winner_mbmi = best_mbmode;
3811
861k
    *winner_rd_cost = best_rd_cost;
3812
861k
    *winner_rate_y = best_rate_y;
3813
861k
    *winner_rate_uv = best_rate_uv;
3814
861k
    *winner_mode_index = *best_mode_index;
3815
861k
  }
3816
861k
  return winner_mbmi;
3817
861k
}
3818
3819
// speed feature: fast intra/inter transform type search
3820
// Used for speed >= 2
3821
// When this speed feature is on, in rd mode search, only DCT is used.
3822
// After the mode is determined, this function is called, to select
3823
// transform types and get accurate rdcost.
3824
static inline void refine_winner_mode_tx(
3825
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3826
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3827
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3828
932k
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3829
932k
  const AV1_COMMON *const cm = &cpi->common;
3830
932k
  MACROBLOCKD *const xd = &x->e_mbd;
3831
932k
  MB_MODE_INFO *const mbmi = xd->mi[0];
3832
932k
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3833
932k
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3834
932k
  int64_t best_rd;
3835
932k
  const int num_planes = av1_num_planes(cm);
3836
3837
932k
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3838
932k
                                         rd_cost->skip_txfm))
3839
65.8k
    return;
3840
3841
  // Set params for winner mode evaluation
3842
867k
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3843
3844
  // No best mode identified so far
3845
867k
  if (*best_mode_index == THR_INVALID) return;
3846
3847
861k
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3848
1.72M
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3849
861k
    RD_STATS *winner_rd_stats = NULL;
3850
861k
    int winner_rate_y = 0, winner_rate_uv = 0;
3851
861k
    THR_MODES winner_mode_index = 0;
3852
3853
    // TODO(any): Combine best mode and multi-winner mode processing paths
3854
    // Get winner mode stats for current mode index
3855
861k
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3856
861k
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3857
861k
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3858
861k
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3859
3860
861k
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3861
827k
        winner_mode_index != THR_INVALID &&
3862
827k
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3863
827k
                                          rd_cost->skip_txfm)) {
3864
827k
      RD_STATS rd_stats = *winner_rd_stats;
3865
827k
      int skip_blk = 0;
3866
827k
      RD_STATS rd_stats_y, rd_stats_uv;
3867
827k
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3868
3869
827k
      *mbmi = *winner_mbmi;
3870
3871
827k
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3872
3873
      // Select prediction reference frames.
3874
2.31M
      for (int i = 0; i < num_planes; i++) {
3875
1.48M
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3876
1.48M
        if (has_second_ref(mbmi))
3877
0
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3878
1.48M
      }
3879
3880
827k
      if (is_inter_mode(mbmi->mode)) {
3881
249k
        const int mi_row = xd->mi_row;
3882
249k
        const int mi_col = xd->mi_col;
3883
249k
        bool is_predictor_built = false;
3884
249k
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3885
        // Do interpolation filter search for realtime mode if applicable.
3886
249k
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3887
0
            cpi->oxcf.mode == REALTIME &&
3888
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3889
0
            is_inter_mode(prediction_mode) &&
3890
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3891
0
            !is_inter_compound_mode(prediction_mode)) {
3892
0
          is_predictor_built =
3893
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3894
0
        }
3895
249k
        if (!is_predictor_built) {
3896
249k
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3897
249k
                                        av1_num_planes(cm) - 1);
3898
249k
        }
3899
249k
        if (mbmi->motion_mode == OBMC_CAUSAL)
3900
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3901
3902
249k
        av1_subtract_plane(x, bsize, 0);
3903
249k
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3904
249k
            !xd->lossless[mbmi->segment_id]) {
3905
249k
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3906
249k
                                              INT64_MAX);
3907
249k
          assert(rd_stats_y.rate != INT_MAX);
3908
18.4E
        } else {
3909
18.4E
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3910
18.4E
                                            INT64_MAX);
3911
18.4E
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3912
18.4E
                 sizeof(mbmi->inter_tx_size));
3913
18.4E
          for (int i = 0; i < xd->height * xd->width; ++i)
3914
0
            set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3915
18.4E
        }
3916
578k
      } else {
3917
578k
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3918
578k
                                          INT64_MAX);
3919
578k
      }
3920
3921
827k
      if (num_planes > 1) {
3922
329k
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3923
498k
      } else {
3924
498k
        av1_init_rd_stats(&rd_stats_uv);
3925
498k
      }
3926
3927
827k
      const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
3928
3929
827k
      const ModeCosts *mode_costs = &x->mode_costs;
3930
827k
      if (is_inter_mode(mbmi->mode) &&
3931
249k
          (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
3932
249k
          RDCOST(x->rdmult,
3933
249k
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3934
249k
                     rd_stats_uv.rate,
3935
249k
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3936
249k
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3937
827k
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3938
45.4k
        skip_blk = 1;
3939
45.4k
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3940
45.4k
        rd_stats_uv.rate = 0;
3941
45.4k
        rd_stats_y.dist = rd_stats_y.sse;
3942
45.4k
        rd_stats_uv.dist = rd_stats_uv.sse;
3943
782k
      } else {
3944
782k
        skip_blk = 0;
3945
782k
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3946
782k
      }
3947
827k
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3948
827k
                      winner_rate_y - winner_rate_uv;
3949
827k
      int64_t this_rd =
3950
827k
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3951
827k
      if (best_rd > this_rd) {
3952
695k
        *best_mbmode = *mbmi;
3953
695k
        *best_mode_index = winner_mode_index;
3954
695k
        av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3955
695k
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3956
695k
        rd_cost->rate = this_rate;
3957
695k
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3958
695k
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3959
695k
        rd_cost->rdcost = this_rd;
3960
695k
        best_rd = this_rd;
3961
695k
        *best_skip2 = skip_blk;
3962
695k
      }
3963
827k
    }
3964
861k
  }
3965
861k
}
3966
3967
/*!\cond */
3968
typedef struct {
3969
  // Mask for each reference frame, specifying which prediction modes to NOT try
3970
  // during search.
3971
  uint32_t pred_modes[REF_FRAMES];
3972
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3973
  // reference frames (i, j).
3974
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3975
  // (NONE_FRAME).
3976
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3977
} mode_skip_mask_t;
3978
/*!\endcond */
3979
3980
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
3981
static inline void disable_reference(
3982
5.00M
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3983
50.0M
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3984
45.0M
    ref_combo[ref][ref2 + 1] = true;
3985
45.0M
  }
3986
5.00M
}
3987
3988
// Update 'ref_combo' mask to disable all inter references except ALTREF.
3989
static inline void disable_inter_references_except_altref(
3990
0
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3991
0
  disable_reference(LAST_FRAME, ref_combo);
3992
0
  disable_reference(LAST2_FRAME, ref_combo);
3993
0
  disable_reference(LAST3_FRAME, ref_combo);
3994
0
  disable_reference(GOLDEN_FRAME, ref_combo);
3995
0
  disable_reference(BWDREF_FRAME, ref_combo);
3996
0
  disable_reference(ALTREF2_FRAME, ref_combo);
3997
0
}
3998
3999
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
4000
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
4001
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
4002
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
4003
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
4004
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
4005
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
4006
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
4007
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
4008
};
4009
4010
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
4011
4012
932k
static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
4013
932k
  if (ref_set == REF_SET_FULL) {
4014
    // Everything available by default.
4015
932k
    memset(mask, 0, sizeof(*mask));
4016
932k
  } else {
4017
    // All modes available by default.
4018
0
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
4019
    // All references disabled first.
4020
0
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
4021
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4022
0
        mask->ref_combo[ref1][ref2 + 1] = true;
4023
0
      }
4024
0
    }
4025
0
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
4026
0
    int num_ref_combos;
4027
4028
    // Then enable reduced set of references explicitly.
4029
0
    switch (ref_set) {
4030
0
      case REF_SET_REDUCED:
4031
0
        ref_set_combos = reduced_ref_combos;
4032
0
        num_ref_combos =
4033
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
4034
0
        break;
4035
0
      case REF_SET_REALTIME:
4036
0
        ref_set_combos = real_time_ref_combos;
4037
0
        num_ref_combos =
4038
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
4039
0
        break;
4040
0
      default: assert(0); num_ref_combos = 0;
4041
0
    }
4042
4043
0
    for (int i = 0; i < num_ref_combos; ++i) {
4044
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
4045
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
4046
0
    }
4047
0
  }
4048
932k
}
4049
4050
static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
4051
                                       const AV1_COMP *cpi, MACROBLOCK *x,
4052
932k
                                       BLOCK_SIZE bsize) {
4053
932k
  const AV1_COMMON *const cm = &cpi->common;
4054
932k
  const struct segmentation *const seg = &cm->seg;
4055
932k
  MACROBLOCKD *const xd = &x->e_mbd;
4056
932k
  MB_MODE_INFO *const mbmi = xd->mi[0];
4057
932k
  unsigned char segment_id = mbmi->segment_id;
4058
932k
  const SPEED_FEATURES *const sf = &cpi->sf;
4059
932k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
4060
932k
  REF_SET ref_set = REF_SET_FULL;
4061
4062
932k
  if (sf->rt_sf.use_real_time_ref_set)
4063
0
    ref_set = REF_SET_REALTIME;
4064
932k
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
4065
0
    ref_set = REF_SET_REDUCED;
4066
4067
932k
  default_skip_mask(mask, ref_set);
4068
4069
932k
  int min_pred_mv_sad = INT_MAX;
4070
932k
  MV_REFERENCE_FRAME ref_frame;
4071
932k
  if (ref_set == REF_SET_REALTIME) {
4072
    // For real-time encoding, we only look at a subset of ref frames. So the
4073
    // threshold for pruning should be computed from this subset as well.
4074
0
    const int num_rt_refs =
4075
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
4076
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
4077
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
4078
0
      if (ref != INTRA_FRAME) {
4079
0
        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
4080
0
      }
4081
0
    }
4082
932k
  } else {
4083
7.45M
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
4084
6.52M
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
4085
932k
  }
4086
4087
7.44M
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4088
6.51M
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
4089
      // Skip checking missing reference in both single and compound reference
4090
      // modes.
4091
5.00M
      disable_reference(ref_frame, mask->ref_combo);
4092
5.00M
    } else {
4093
      // Skip fixed mv modes for poor references
4094
1.51M
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
4095
2.95k
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4096
2.95k
      }
4097
1.51M
    }
4098
6.51M
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4099
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4100
      // Reference not used for the segment.
4101
0
      disable_reference(ref_frame, mask->ref_combo);
4102
0
    }
4103
6.51M
  }
4104
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
4105
  // is disabled for this segment. This is to prevent the possibility that we
4106
  // end up unable to pick any mode.
4107
932k
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4108
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
4109
    // unless ARNR filtering is enabled in which case we want
4110
    // an unfiltered alternative. We allow near/nearest as well
4111
    // because they may result in zero-zero MVs but be cheaper.
4112
932k
    if (cpi->rc.is_src_frame_alt_ref &&
4113
0
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
4114
0
      disable_inter_references_except_altref(mask->ref_combo);
4115
4116
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4117
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
4118
0
      int_mv near_mv, nearest_mv, global_mv;
4119
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
4120
0
                  &x->mbmi_ext);
4121
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4122
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4123
4124
0
      if (near_mv.as_int != global_mv.as_int)
4125
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
4126
0
      if (nearest_mv.as_int != global_mv.as_int)
4127
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
4128
0
    }
4129
932k
  }
4130
4131
932k
  if (cpi->rc.is_src_frame_alt_ref) {
4132
0
    if (inter_sf->alt_ref_search_fp &&
4133
0
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
4134
0
      mask->pred_modes[ALTREF_FRAME] = 0;
4135
0
      disable_inter_references_except_altref(mask->ref_combo);
4136
0
      disable_reference(INTRA_FRAME, mask->ref_combo);
4137
0
    }
4138
0
  }
4139
4140
932k
  if (inter_sf->alt_ref_search_fp) {
4141
932k
    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
4142
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
4143
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
4144
      // those are past frames
4145
0
      MV_REFERENCE_FRAME start_frame =
4146
0
          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
4147
0
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
4148
0
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4149
0
            0) {
4150
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
4151
          // to the relative dist of LAST_FRAME.
4152
0
          if (inter_sf->alt_ref_search_fp == 1 &&
4153
0
              (abs(cpi->ref_frame_dist_info
4154
0
                       .ref_relative_dist[ref_frame - LAST_FRAME]) >
4155
0
               1.5 * abs(cpi->ref_frame_dist_info
4156
0
                             .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
4157
0
            continue;
4158
0
          }
4159
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
4160
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
4161
0
        }
4162
0
      }
4163
0
    }
4164
932k
  }
4165
4166
932k
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4167
0
    if (x->best_pred_mv_sad[0] < INT_MAX) {
4168
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
4169
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
4170
4171
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
4172
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
4173
0
        ref_frame = prune_ref_list[ref_idx];
4174
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
4175
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4176
0
      }
4177
0
    }
4178
0
  }
4179
4180
932k
  if (bsize > sf->part_sf.max_intra_bsize) {
4181
3.81k
    disable_reference(INTRA_FRAME, mask->ref_combo);
4182
3.81k
  }
4183
4184
932k
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
4185
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4186
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
4187
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
4188
0
    }
4189
0
  }
4190
4191
932k
  mask->pred_modes[INTRA_FRAME] |=
4192
932k
      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
4193
4194
  // Prune reference frames which are not the closest to the current
4195
  // frame and with large pred_mv_sad.
4196
932k
  if (inter_sf->prune_single_ref) {
4197
368k
    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
4198
368k
    const double prune_threshes[2] = { 1.20, 1.05 };
4199
4200
2.94M
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4201
2.57M
      const RefFrameDistanceInfo *const ref_frame_dist_info =
4202
2.57M
          &cpi->ref_frame_dist_info;
4203
2.57M
      const int is_closest_ref =
4204
2.57M
          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
4205
2.20M
          (ref_frame == ref_frame_dist_info->nearest_future_ref);
4206
4207
2.57M
      if (!is_closest_ref) {
4208
2.20M
        const int dir =
4209
2.20M
            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
4210
2.20M
                ? 0
4211
2.20M
                : 1;
4212
2.20M
        if (x->best_pred_mv_sad[dir] < INT_MAX &&
4213
210k
            x->pred_mv_sad[ref_frame] >
4214
210k
                prune_threshes[inter_sf->prune_single_ref - 1] *
4215
210k
                    x->best_pred_mv_sad[dir])
4216
90.8k
          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
4217
2.20M
      }
4218
2.57M
    }
4219
368k
  }
4220
932k
}
4221
4222
static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
4223
                                          HandleInterModeArgs *const args,
4224
932k
                                          int is_hbd) {
4225
932k
  if (is_hbd) {
4226
104k
    const int len = sizeof(uint16_t);
4227
104k
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
4228
104k
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
4229
104k
                                                 (MAX_SB_SQUARE >> 1) * len);
4230
104k
    args->above_pred_buf[2] =
4231
104k
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
4232
104k
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
4233
104k
    args->left_pred_buf[1] =
4234
104k
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
4235
104k
    args->left_pred_buf[2] =
4236
104k
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
4237
827k
  } else {
4238
827k
    args->above_pred_buf[0] = obmc_buffer->above_pred;
4239
827k
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
4240
827k
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
4241
827k
    args->left_pred_buf[0] = obmc_buffer->left_pred;
4242
827k
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
4243
827k
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
4244
827k
  }
4245
932k
}
4246
4247
static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
4248
12.2M
                                  MV_REFERENCE_FRAME ref_frame) {
4249
12.2M
  const AV1_COMMON *const cm = &cpi->common;
4250
12.2M
  MV_REFERENCE_FRAME rf[2];
4251
12.2M
  av1_set_ref_frame(rf, ref_frame);
4252
4253
12.2M
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
4254
4255
5.69M
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
4256
5.69M
                                       cm->cur_frame->ref_display_order_hint)) {
4257
0
    return 1;
4258
0
  }
4259
4260
5.69M
  return 0;
4261
5.69M
}
4262
4263
static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
4264
5.61k
                                                    int skip_ref_frame_mask) {
4265
123k
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
4266
117k
    if (!(skip_ref_frame_mask & (1 << r))) {
4267
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
4268
0
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
4269
0
        return 1;
4270
0
      }
4271
0
    }
4272
117k
  }
4273
5.61k
  return 0;
4274
5.61k
}
4275
4276
static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
4277
37.4k
                                             const MB_MODE_INFO *mi_cache) {
4278
37.4k
  if (!mi_cache) {
4279
37.4k
    return 0;
4280
37.4k
  }
4281
4282
4
  if (ref_frame < REF_FRAMES) {
4283
0
    return (ref_frame == mi_cache->ref_frame[0] ||
4284
0
            ref_frame == mi_cache->ref_frame[1]);
4285
0
  }
4286
4287
  // if we are here, then the current mode is compound.
4288
4
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4289
4
  return ref_frame == cached_ref_type;
4290
4
}
4291
4292
// Please add/modify parameter setting in this function, making it consistent
4293
// and easy to read and maintain.
4294
static inline void set_params_rd_pick_inter_mode(
4295
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4296
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4297
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4298
932k
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4299
932k
  const AV1_COMMON *const cm = &cpi->common;
4300
932k
  MACROBLOCKD *const xd = &x->e_mbd;
4301
932k
  MB_MODE_INFO *const mbmi = xd->mi[0];
4302
932k
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4303
932k
  unsigned char segment_id = mbmi->segment_id;
4304
4305
932k
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4306
932k
  av1_collect_neighbors_ref_counts(xd);
4307
932k
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4308
932k
                           ref_costs_comp);
4309
4310
932k
  const int mi_row = xd->mi_row;
4311
932k
  const int mi_col = xd->mi_col;
4312
932k
  x->best_pred_mv_sad[0] = INT_MAX;
4313
932k
  x->best_pred_mv_sad[1] = INT_MAX;
4314
4315
7.45M
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4316
6.52M
       ++ref_frame) {
4317
6.52M
    x->pred_mv_sad[ref_frame] = INT_MAX;
4318
6.52M
    mbmi_ext->mode_context[ref_frame] = 0;
4319
6.52M
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4320
6.52M
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4321
      // Skip the ref frame if the mask says skip and the ref is not used by
4322
      // compound ref.
4323
1.51M
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4324
2.29k
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4325
2.29k
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4326
2.29k
        continue;
4327
2.29k
      }
4328
1.51M
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4329
1.51M
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4330
1.51M
    }
4331
6.52M
    if (cpi->sf.inter_sf.alt_ref_search_fp ||
4332
0
        cpi->sf.inter_sf.prune_single_ref ||
4333
6.52M
        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4334
      // Store the best pred_mv_sad across all past frames
4335
6.52M
      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4336
6.52M
          0)
4337
1.51M
        x->best_pred_mv_sad[0] =
4338
1.51M
            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4339
5.00M
      else
4340
        // Store the best pred_mv_sad across all future frames
4341
5.00M
        x->best_pred_mv_sad[1] =
4342
5.00M
            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4343
6.52M
    }
4344
6.52M
  }
4345
4346
932k
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4347
    // No second reference on RT ref set, so no need to initialize
4348
932k
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4349
20.4M
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4350
19.5M
      mbmi_ext->mode_context[ref_frame] = 0;
4351
19.5M
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4352
19.5M
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4353
19.5M
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4354
18.8M
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4355
18.8M
        continue;
4356
18.8M
      }
4357
4358
730k
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4359
2.66k
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4360
2.66k
        continue;
4361
2.66k
      }
4362
      // Ref mv list population is not required, when compound references are
4363
      // pruned.
4364
727k
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4365
4366
47
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4367
47
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4368
47
                       mbmi_ext->mode_context);
4369
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4370
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4371
47
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4372
47
    }
4373
932k
  }
4374
4375
932k
  av1_count_overlappable_neighbors(cm, xd);
4376
932k
  const FRAME_UPDATE_TYPE update_type =
4377
932k
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4378
932k
  int use_actual_frame_probs = 1;
4379
932k
  int prune_obmc;
4380
#if CONFIG_FPMT_TEST
4381
  use_actual_frame_probs =
4382
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4383
  if (!use_actual_frame_probs) {
4384
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4385
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4386
  }
4387
#endif
4388
932k
  if (use_actual_frame_probs) {
4389
932k
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4390
932k
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4391
932k
  }
4392
932k
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4393
0
    if (check_num_overlappable_neighbors(mbmi) &&
4394
0
        is_motion_variation_allowed_bsize(bsize)) {
4395
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4396
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4397
0
                                       MAX_SB_SIZE >> 1 };
4398
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4399
0
                                        MAX_SB_SIZE >> 1 };
4400
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4401
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4402
0
                                          dst_width1, dst_height1,
4403
0
                                          args->above_pred_stride);
4404
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4405
0
                                         dst_width2, dst_height2,
4406
0
                                         args->left_pred_stride);
4407
0
      const int num_planes = av1_num_planes(cm);
4408
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4409
0
                           mi_col, 0, num_planes);
4410
0
      calc_target_weighted_pred(
4411
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4412
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4413
0
    }
4414
0
  }
4415
4416
932k
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4417
4418
  // Set params for mode evaluation
4419
932k
  set_mode_eval_params(cpi, x, MODE_EVAL);
4420
4421
932k
  x->comp_rd_stats_idx = 0;
4422
4423
8.38M
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4424
7.45M
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4425
7.45M
  }
4426
932k
}
4427
4428
static inline void init_single_inter_mode_search_state(
4429
932k
    InterModeSearchState *search_state) {
4430
2.79M
  for (int dir = 0; dir < 2; ++dir) {
4431
9.32M
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4432
37.2M
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4433
29.8M
        SingleInterModeState *state;
4434
4435
29.8M
        state = &search_state->single_state[dir][mode][ref_frame];
4436
29.8M
        state->ref_frame = NONE_FRAME;
4437
29.8M
        state->rd = INT64_MAX;
4438
4439
29.8M
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4440
29.8M
        state->ref_frame = NONE_FRAME;
4441
29.8M
        state->rd = INT64_MAX;
4442
4443
29.8M
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4444
29.8M
      }
4445
7.45M
    }
4446
1.86M
  }
4447
4448
8.39M
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4449
7.45M
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4450
7.45M
    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4451
7.45M
  }
4452
932k
  av1_zero(search_state->single_state_cnt);
4453
932k
  av1_zero(search_state->single_state_modelled_cnt);
4454
932k
}
4455
4456
static inline void init_inter_mode_search_state(
4457
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4458
932k
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4459
932k
  init_intra_mode_search_state(&search_state->intra_search_state);
4460
932k
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4461
4462
932k
  search_state->best_rd = best_rd_so_far;
4463
932k
  search_state->best_skip_rd[0] = INT64_MAX;
4464
932k
  search_state->best_skip_rd[1] = INT64_MAX;
4465
4466
932k
  av1_zero(search_state->best_mbmode);
4467
4468
932k
  search_state->best_rate_y = INT_MAX;
4469
4470
932k
  search_state->best_rate_uv = INT_MAX;
4471
4472
932k
  search_state->best_mode_skippable = 0;
4473
4474
932k
  search_state->best_skip2 = 0;
4475
4476
932k
  search_state->best_mode_index = THR_INVALID;
4477
4478
932k
  const MACROBLOCKD *const xd = &x->e_mbd;
4479
932k
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4480
932k
  const unsigned char segment_id = mbmi->segment_id;
4481
4482
932k
  search_state->num_available_refs = 0;
4483
932k
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4484
932k
  memset(search_state->dist_order_refs, -1,
4485
932k
         sizeof(search_state->dist_order_refs));
4486
4487
7.45M
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4488
6.52M
    search_state->mode_threshold[i] = 0;
4489
932k
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4490
20.4M
  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4491
19.5M
    search_state->mode_threshold[i] =
4492
19.5M
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4493
19.5M
        RD_THRESH_FAC_FRAC_BITS;
4494
4495
932k
  search_state->best_intra_rd = INT64_MAX;
4496
4497
932k
  search_state->best_pred_sse = UINT_MAX;
4498
4499
932k
  av1_zero(search_state->single_newmv);
4500
932k
  av1_zero(search_state->single_newmv_rate);
4501
932k
  av1_zero(search_state->single_newmv_valid);
4502
4.66M
  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4503
14.9M
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4504
100M
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4505
89.3M
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4506
89.3M
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4507
89.3M
      }
4508
11.1M
    }
4509
3.72M
  }
4510
4511
3.73M
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4512
2.79M
    search_state->best_pred_rd[i] = INT64_MAX;
4513
2.79M
  }
4514
4515
932k
  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4516
120M
    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4517
119M
      search_state->mode_threshold[i] =
4518
119M
          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4519
119M
          RD_THRESH_FAC_FRAC_BITS;
4520
4521
8.37M
    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4522
29.7M
      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4523
200M
        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4524
178M
          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4525
178M
          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4526
178M
        }
4527
22.3M
      }
4528
7.44M
    }
4529
4530
932k
    init_single_inter_mode_search_state(search_state);
4531
932k
  }
4532
932k
}
4533
4534
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4535
                           const MV_REFERENCE_FRAME *ref_frame,
4536
38.8M
                           const PREDICTION_MODE this_mode) {
4537
38.8M
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4538
382k
    return true;
4539
382k
  }
4540
4541
38.4M
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4542
38.8M
}
4543
4544
static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4545
                                      BLOCK_SIZE bsize,
4546
                                      PREDICTION_MODE curr_mode,
4547
144M
                                      const MV_REFERENCE_FRAME *ref_frames) {
4548
144M
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4549
144M
  if (comp_pred) {
4550
118M
    if (!is_comp_ref_allowed(bsize)) return 1;
4551
118M
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4552
105M
      return 1;
4553
105M
    }
4554
4555
12.6M
    const AV1_COMMON *const cm = &cpi->common;
4556
12.6M
    if (frame_is_intra_only(cm)) return 1;
4557
4558
12.6M
    const CurrentFrame *const current_frame = &cm->current_frame;
4559
12.6M
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4560
4561
12.6M
    const struct segmentation *const seg = &cm->seg;
4562
12.6M
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4563
    // Do not allow compound prediction if the segment level reference frame
4564
    // feature is in use as in this case there can only be one reference.
4565
12.6M
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4566
12.6M
  }
4567
4568
38.8M
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4569
    // Mode must be compatible
4570
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4571
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4572
0
  }
4573
4574
38.3M
  return 0;
4575
38.3M
}
4576
4577
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4578
3.44k
                                        BLOCK_SIZE bsize, int mib_size) {
4579
3.44k
  const int sb_size_mask = mib_size - 1;
4580
3.44k
  const MACROBLOCKD *const xd = &x->e_mbd;
4581
3.44k
  const int mi_row = xd->mi_row;
4582
3.44k
  const int mi_col = xd->mi_col;
4583
3.44k
  const int mi_row_in_sb = mi_row & sb_size_mask;
4584
3.44k
  const int mi_col_in_sb = mi_col & sb_size_mask;
4585
3.44k
  const int mi_w = mi_size_wide[bsize];
4586
3.44k
  const int mi_h = mi_size_high[bsize];
4587
3.44k
  int picked_ref_frames_mask = 0;
4588
17.2k
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4589
41.5k
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4590
27.7k
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4591
27.7k
    }
4592
13.7k
  }
4593
3.44k
  return picked_ref_frames_mask;
4594
3.44k
}
4595
4596
// Check if reference frame pair of the current block matches with the given
4597
// block.
4598
static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4599
1.38M
                                       const MV_REFERENCE_FRAME *ref_frames) {
4600
1.38M
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4601
350k
          (ref_frames[1] == mbmi->ref_frame[1]));
4602
1.38M
}
4603
4604
// Case 1: return 0, means don't skip this mode
4605
// Case 2: return 1, means skip this mode completely
4606
// Case 3: return 2, means skip compound only, but still try single motion modes
4607
static int inter_mode_search_order_independent_skip(
4608
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4609
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4610
38.8M
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4611
38.8M
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4612
27.4M
    return 1;
4613
27.4M
  }
4614
4615
11.4M
  const int ref_type = av1_ref_frame_type(ref_frame);
4616
11.4M
  if (!cpi->sf.rt_sf.use_real_time_ref_set)
4617
11.5M
    if (prune_ref_frame(cpi, x, ref_type)) return 1;
4618
4619
  // This is only used in motion vector unit test.
4620
5.64M
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4621
0
      ref_frame[0] == INTRA_FRAME)
4622
0
    return 1;
4623
4624
5.64M
  const AV1_COMMON *const cm = &cpi->common;
4625
5.64M
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4626
971k
    return 1;
4627
971k
  }
4628
4629
  // Reuse the prediction mode in cache
4630
4.67M
  if (x->use_mb_mode_cache) {
4631
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4632
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4633
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4634
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4635
4636
    // If the cached mode is intra, then we just need to match the mode.
4637
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4638
0
      return 1;
4639
0
    }
4640
4641
    // If the cached mode is single inter mode, then we match the mode and
4642
    // reference frame.
4643
0
    if (cached_mode_is_single) {
4644
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4645
0
        return 1;
4646
0
      }
4647
0
    } else {
4648
      // If the cached mode is compound, then we need to consider several cases.
4649
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4650
0
      if (mode_is_single) {
4651
        // If the mode is single, we know the modes can't match. But we might
4652
        // still want to search it if compound mode depends on the current mode.
4653
0
        int skip_motion_mode_only = 0;
4654
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4655
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4656
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4657
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4658
0
        } else if (cached_mode == NEW_NEWMV) {
4659
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4660
0
                                   ref_frame[0] == cached_frame[1]);
4661
0
        }
4662
4663
0
        return 1 + skip_motion_mode_only;
4664
0
      } else {
4665
        // If both modes are compound, then everything must match.
4666
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4667
0
            ref_frame[1] != cached_frame[1]) {
4668
0
          return 1;
4669
0
        }
4670
0
      }
4671
0
    }
4672
0
  }
4673
4674
4.67M
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4675
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4676
  // valid partition is required, do not skip mode.
4677
4.67M
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4678
698k
      x->must_find_valid_partition)
4679
0
    return 0;
4680
4681
4.67M
  const SPEED_FEATURES *const sf = &cpi->sf;
4682
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4683
  // frames
4684
4.67M
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4685
4.73M
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4686
1.02M
    const MACROBLOCKD *const xd = &x->e_mbd;
4687
1.02M
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4688
836k
        xd->up_available) {
4689
693k
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4690
693k
                                                    { 1, 1, 0 },
4691
693k
                                                    { 2, 1, 0 } };
4692
693k
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4693
4694
693k
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4695
693k
             qindex_sub_range < 3);
4696
693k
      const int num_ref_frame_pair_match_thresh =
4697
693k
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4698
693k
                    [qindex_sub_range];
4699
4700
693k
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4701
693k
             num_ref_frame_pair_match_thresh >= 0);
4702
693k
      int num_ref_frame_pair_match = 0;
4703
4704
693k
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4705
693k
      num_ref_frame_pair_match +=
4706
693k
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4707
4708
      // Pruning based on ref frame pair match with neighbors.
4709
693k
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4710
693k
    }
4711
1.02M
  }
4712
4713
4.27M
  int skip_motion_mode = 0;
4714
4.27M
  if (mbmi->partition != PARTITION_NONE) {
4715
32.4k
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4716
32.4k
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4717
      // Since the compound ref modes depends on the motion estimation result of
4718
      // two single ref modes (best mv of single ref modes as the start point),
4719
      // if current single ref mode is marked skip, we need to check if it will
4720
      // be used in compound ref modes.
4721
3.32k
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4722
        // Found a not skipped compound ref mode which contains current
4723
        // single ref. So this single ref can't be skipped completely
4724
        // Just skip its motion mode search, still try its simple
4725
        // transition mode.
4726
0
        skip_motion_mode = 1;
4727
0
        skip_ref = 0;
4728
0
      }
4729
3.32k
    }
4730
    // If we are reusing the prediction from cache, and the current frame is
4731
    // required by the cache, then we cannot prune it.
4732
32.4k
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4733
0
      skip_ref = 0;
4734
      // If the cache only needs the current reference type for compound
4735
      // prediction, then we can skip motion mode search.
4736
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4737
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4738
0
    }
4739
32.4k
    if (skip_ref) return 1;
4740
32.4k
  }
4741
4742
4.26M
  if (ref_frame[0] == INTRA_FRAME) {
4743
0
    if (mode != DC_PRED) {
4744
      // Disable intra modes other than DC_PRED for blocks with low variance
4745
      // Threshold for intra skipping based on source variance
4746
      // TODO(debargha): Specialize the threshold for super block sizes
4747
0
      const unsigned int skip_intra_var_thresh = 64;
4748
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4749
0
          x->source_variance < skip_intra_var_thresh)
4750
0
        return 1;
4751
0
    }
4752
0
  }
4753
4754
4.26M
  if (skip_motion_mode) return 2;
4755
4756
4.26M
  return 0;
4757
4.26M
}
4758
4759
static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4760
                             const MV_REFERENCE_FRAME *ref_frames,
4761
190M
                             const AV1_COMMON *cm) {
4762
190M
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4763
190M
  mbmi->ref_mv_idx = 0;
4764
190M
  mbmi->mode = curr_mode;
4765
190M
  mbmi->uv_mode = UV_DC_PRED;
4766
190M
  mbmi->ref_frame[0] = ref_frames[0];
4767
190M
  mbmi->ref_frame[1] = ref_frames[1];
4768
190M
  pmi->palette_size[0] = 0;
4769
190M
  pmi->palette_size[1] = 0;
4770
190M
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4771
190M
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4772
190M
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4773
190M
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4774
190M
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4775
190M
}
4776
4777
static inline void collect_single_states(MACROBLOCK *x,
4778
                                         InterModeSearchState *search_state,
4779
2.56M
                                         const MB_MODE_INFO *const mbmi) {
4780
2.56M
  int i, j;
4781
2.56M
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4782
2.56M
  const PREDICTION_MODE this_mode = mbmi->mode;
4783
2.56M
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4784
2.56M
  const int mode_offset = INTER_OFFSET(this_mode);
4785
2.56M
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4786
4787
  // Simple rd
4788
2.56M
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4789
2.90M
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4790
343k
    const int64_t rd =
4791
343k
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4792
343k
    if (rd < simple_rd) simple_rd = rd;
4793
343k
  }
4794
4795
  // Insertion sort of single_state
4796
2.56M
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4797
2.56M
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4798
2.56M
  i = search_state->single_state_cnt[dir][mode_offset];
4799
2.75M
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4800
189k
    state_s[j] = state_s[j - 1];
4801
2.56M
  state_s[j] = this_state_s;
4802
2.56M
  search_state->single_state_cnt[dir][mode_offset]++;
4803
4804
  // Modelled rd
4805
2.56M
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4806
2.90M
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4807
343k
    const int64_t rd =
4808
343k
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4809
343k
    if (rd < modelled_rd) modelled_rd = rd;
4810
343k
  }
4811
4812
  // Insertion sort of single_state_modelled
4813
2.56M
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4814
2.56M
  SingleInterModeState *state_m =
4815
2.56M
      search_state->single_state_modelled[dir][mode_offset];
4816
2.56M
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4817
2.87M
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4818
310k
    state_m[j] = state_m[j - 1];
4819
2.56M
  state_m[j] = this_state_m;
4820
2.56M
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4821
2.56M
}
4822
4823
static inline void analyze_single_states(const AV1_COMP *cpi,
4824
0
                                         InterModeSearchState *search_state) {
4825
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4826
0
  assert(prune_level >= 1);
4827
0
  int i, j, dir, mode;
4828
4829
0
  for (dir = 0; dir < 2; ++dir) {
4830
0
    int64_t best_rd;
4831
0
    SingleInterModeState(*state)[FWD_REFS];
4832
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4833
4834
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4835
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4836
    // have same motion vectors). Always keep the best of each mode
4837
    // because it might form the best possible combination with other mode.
4838
0
    state = search_state->single_state[dir];
4839
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4840
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4841
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4842
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4843
0
        if (state[mode][i].rd != INT64_MAX &&
4844
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4845
0
          state[mode][i].valid = 0;
4846
0
        }
4847
0
      }
4848
0
    }
4849
4850
0
    state = search_state->single_state_modelled[dir];
4851
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4852
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4853
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4854
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4855
0
        if (state[mode][i].rd != INT64_MAX &&
4856
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4857
0
          state[mode][i].valid = 0;
4858
0
        }
4859
0
      }
4860
0
    }
4861
0
  }
4862
4863
  // Ordering by simple rd first, then by modelled rd
4864
0
  for (dir = 0; dir < 2; ++dir) {
4865
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4866
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4867
0
      const int state_cnt_m =
4868
0
          search_state->single_state_modelled_cnt[dir][mode];
4869
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4870
0
      SingleInterModeState *state_m =
4871
0
          search_state->single_state_modelled[dir][mode];
4872
0
      int count = 0;
4873
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4874
0
      for (i = 0; i < state_cnt_s; ++i) {
4875
0
        if (state_s[i].rd == INT64_MAX) break;
4876
0
        if (state_s[i].valid) {
4877
0
          search_state->single_rd_order[dir][mode][count++] =
4878
0
              state_s[i].ref_frame;
4879
0
        }
4880
0
      }
4881
0
      if (count >= max_candidates) continue;
4882
4883
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4884
0
        if (state_m[i].rd == INT64_MAX) break;
4885
0
        if (!state_m[i].valid) continue;
4886
0
        const int ref_frame = state_m[i].ref_frame;
4887
0
        int match = 0;
4888
        // Check if existing already
4889
0
        for (j = 0; j < count; ++j) {
4890
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4891
0
            match = 1;
4892
0
            break;
4893
0
          }
4894
0
        }
4895
0
        if (match) continue;
4896
        // Check if this ref_frame is removed in simple rd
4897
0
        int valid = 1;
4898
0
        for (j = 0; j < state_cnt_s; ++j) {
4899
0
          if (ref_frame == state_s[j].ref_frame) {
4900
0
            valid = state_s[j].valid;
4901
0
            break;
4902
0
          }
4903
0
        }
4904
0
        if (valid) {
4905
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4906
0
        }
4907
0
      }
4908
0
    }
4909
0
  }
4910
0
}
4911
4912
static int compound_skip_get_candidates(
4913
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4914
0
    const int dir, const PREDICTION_MODE mode) {
4915
0
  const int mode_offset = INTER_OFFSET(mode);
4916
0
  const SingleInterModeState *state =
4917
0
      search_state->single_state[dir][mode_offset];
4918
0
  const SingleInterModeState *state_modelled =
4919
0
      search_state->single_state_modelled[dir][mode_offset];
4920
4921
0
  int max_candidates = 0;
4922
0
  for (int i = 0; i < FWD_REFS; ++i) {
4923
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4924
0
    max_candidates++;
4925
0
  }
4926
4927
0
  int candidates = max_candidates;
4928
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4929
0
    candidates = AOMMIN(2, max_candidates);
4930
0
  }
4931
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4932
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4933
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4934
0
      candidates = 1;
4935
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4936
0
  }
4937
4938
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4939
    // Limit the number of candidates to 1 in each direction for compound
4940
    // prediction
4941
0
    candidates = AOMMIN(1, candidates);
4942
0
  }
4943
0
  return candidates;
4944
0
}
4945
4946
static int compound_skip_by_single_states(
4947
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4948
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4949
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4950
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4951
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4952
0
                        compound_ref1_mode(this_mode) };
4953
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4954
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4955
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4956
0
  int ref_searched[2] = { 0, 0 };
4957
0
  int ref_mv_match[2] = { 1, 1 };
4958
0
  int i, j;
4959
4960
0
  for (i = 0; i < 2; ++i) {
4961
0
    const SingleInterModeState *state =
4962
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
4963
0
    const int state_cnt =
4964
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4965
0
    for (j = 0; j < state_cnt; ++j) {
4966
0
      if (state[j].ref_frame == refs[i]) {
4967
0
        ref_searched[i] = 1;
4968
0
        break;
4969
0
      }
4970
0
    }
4971
0
  }
4972
4973
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4974
0
  for (i = 0; i < 2; ++i) {
4975
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4976
0
      continue;
4977
0
    }
4978
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4979
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4980
0
      int_mv single_mv;
4981
0
      int_mv comp_mv;
4982
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4983
0
                  &x->mbmi_ext);
4984
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4985
0
      if (single_mv.as_int != comp_mv.as_int) {
4986
0
        ref_mv_match[i] = 0;
4987
0
        break;
4988
0
      }
4989
0
    }
4990
0
  }
4991
4992
0
  for (i = 0; i < 2; ++i) {
4993
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
4994
0
    const int candidates =
4995
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4996
0
    const MV_REFERENCE_FRAME *ref_order =
4997
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4998
0
    int match = 0;
4999
0
    for (j = 0; j < candidates; ++j) {
5000
0
      if (refs[i] == ref_order[j]) {
5001
0
        match = 1;
5002
0
        break;
5003
0
      }
5004
0
    }
5005
0
    if (!match) return 1;
5006
0
  }
5007
5008
0
  return 0;
5009
0
}
5010
5011
// Check if ref frames of current block matches with given block.
5012
static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
5013
                                   const MV_REFERENCE_FRAME *ref_frames,
5014
0
                                   int *const is_ref_match) {
5015
0
  if (is_inter_block(mbmi)) {
5016
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
5017
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
5018
0
    if (has_second_ref(mbmi)) {
5019
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
5020
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
5021
0
    }
5022
0
  }
5023
0
}
5024
5025
// Prune compound mode using ref frames of neighbor blocks.
5026
static inline int compound_skip_using_neighbor_refs(
5027
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
5028
0
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
5029
  // Exclude non-extended compound modes from pruning
5030
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5031
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5032
0
    return 0;
5033
5034
0
  if (prune_ext_comp_using_neighbors >= 3) return 1;
5035
5036
0
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
5037
                                // 1 - match for backward refs
5038
  // Check if ref frames of this block matches with left neighbor.
5039
0
  if (xd->left_available)
5040
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
5041
5042
  // Check if ref frames of this block matches with above neighbor.
5043
0
  if (xd->up_available)
5044
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
5045
5046
  // Combine ref frame match with neighbors in forward and backward refs.
5047
0
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
5048
5049
  // Pruning based on ref frame match with neighbors.
5050
0
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
5051
0
  return 1;
5052
0
}
5053
5054
// Update best single mode for the given reference frame based on simple rd.
5055
static inline void update_best_single_mode(InterModeSearchState *search_state,
5056
                                           const PREDICTION_MODE this_mode,
5057
                                           const MV_REFERENCE_FRAME ref_frame,
5058
4.25M
                                           int64_t this_rd) {
5059
4.25M
  if (this_rd < search_state->best_single_rd[ref_frame]) {
5060
1.92M
    search_state->best_single_rd[ref_frame] = this_rd;
5061
1.92M
    search_state->best_single_mode[ref_frame] = this_mode;
5062
1.92M
  }
5063
4.25M
}
5064
5065
// Prune compound mode using best single mode for the same reference.
5066
static inline int skip_compound_using_best_single_mode_ref(
5067
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
5068
    const PREDICTION_MODE *best_single_mode,
5069
0
    int prune_comp_using_best_single_mode_ref) {
5070
  // Exclude non-extended compound modes from pruning
5071
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5072
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5073
0
    return 0;
5074
5075
0
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
5076
0
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
5077
  // Get ref frame direction corresponding to NEWMV
5078
  // 0 - NEWMV corresponding to forward direction
5079
  // 1 - NEWMV corresponding to backward direction
5080
0
  const int newmv_dir = comp_mode_ref0 != NEWMV;
5081
5082
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
5083
  // have NEWMV as single mode winner.
5084
  // Example: For an extended-compound mode,
5085
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
5086
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
5087
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
5088
  //   ALTREF_FRAME is NEWMV
5089
0
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
5090
0
  if (single_mode == NEWMV) return 0;
5091
5092
  // Avoid pruning the compound mode when best single mode is not available
5093
0
  if (prune_comp_using_best_single_mode_ref == 1)
5094
0
    if (single_mode == MB_MODE_COUNT) return 0;
5095
0
  return 1;
5096
0
}
5097
5098
0
static int compare_int64(const void *a, const void *b) {
5099
0
  int64_t a64 = *((int64_t *)a);
5100
0
  int64_t b64 = *((int64_t *)b);
5101
0
  if (a64 < b64) {
5102
0
    return -1;
5103
0
  } else if (a64 == b64) {
5104
0
    return 0;
5105
0
  } else {
5106
0
    return 1;
5107
0
  }
5108
0
}
5109
5110
static inline void update_search_state(
5111
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
5112
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
5113
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
5114
2.61M
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
5115
2.61M
  const MACROBLOCKD *xd = &x->e_mbd;
5116
2.61M
  const MB_MODE_INFO *mbmi = xd->mi[0];
5117
2.61M
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5118
2.61M
  const int skip_txfm =
5119
2.61M
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
5120
2.61M
  const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5121
5122
2.61M
  search_state->best_rd = new_best_rd_stats->rdcost;
5123
2.61M
  search_state->best_mode_index = new_best_mode;
5124
2.61M
  *best_rd_stats_dst = *new_best_rd_stats;
5125
2.61M
  search_state->best_mbmode = *mbmi;
5126
2.61M
  search_state->best_skip2 = skip_txfm;
5127
2.61M
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
5128
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
5129
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
5130
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
5131
  // These two values will be updated when av1_txfm_search is called.
5132
2.61M
  if (txfm_search_done) {
5133
2.50M
    search_state->best_rate_y =
5134
2.50M
        new_best_rd_stats_y->rate +
5135
2.50M
        x->mode_costs.skip_txfm_cost[skip_ctx]
5136
2.50M
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
5137
2.50M
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
5138
2.50M
  }
5139
2.61M
  search_state->best_y_rdcost = *new_best_rd_stats_y;
5140
2.61M
  memcpy(ctx->blk_skip, txfm_info->blk_skip,
5141
2.61M
         sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
5142
2.61M
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5143
2.61M
}
5144
5145
// Find the best RD for a reference frame (among single reference modes)
5146
// and store +10% of it in the 0-th element in ref_frame_rd.
5147
0
static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
5148
0
  assert(ref_frame_rd[0] == INT64_MAX);
5149
0
  int64_t ref_copy[REF_FRAMES - 1];
5150
0
  memcpy(ref_copy, ref_frame_rd + 1,
5151
0
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
5152
0
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
5153
5154
0
  int64_t cutoff = ref_copy[0];
5155
  // The cut-off is within 10% of the best.
5156
0
  if (cutoff != INT64_MAX) {
5157
0
    assert(cutoff < INT64_MAX / 200);
5158
0
    cutoff = (110 * cutoff) / 100;
5159
0
  }
5160
0
  ref_frame_rd[0] = cutoff;
5161
0
}
5162
5163
// Check if either frame is within the cutoff.
5164
static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
5165
                                        MV_REFERENCE_FRAME frame1,
5166
0
                                        MV_REFERENCE_FRAME frame2) {
5167
0
  assert(frame2 > 0);
5168
0
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
5169
0
         ref_frame_rd[frame2] <= ref_frame_rd[0];
5170
0
}
5171
5172
static inline void evaluate_motion_mode_for_winner_candidates(
5173
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
5174
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
5175
    PICK_MODE_CONTEXT *const ctx,
5176
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
5177
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
5178
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
5179
369k
    InterModeSearchState *const search_state, int64_t *yrd) {
5180
369k
  const AV1_COMMON *const cm = &cpi->common;
5181
369k
  const int num_planes = av1_num_planes(cm);
5182
369k
  MACROBLOCKD *const xd = &x->e_mbd;
5183
369k
  MB_MODE_INFO *const mbmi = xd->mi[0];
5184
369k
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
5185
369k
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
5186
5187
1.07M
  for (int cand = 0; cand < num_best_cand; cand++) {
5188
705k
    RD_STATS rd_stats;
5189
705k
    RD_STATS rd_stats_y;
5190
705k
    RD_STATS rd_stats_uv;
5191
705k
    av1_init_rd_stats(&rd_stats);
5192
705k
    av1_init_rd_stats(&rd_stats_y);
5193
705k
    av1_init_rd_stats(&rd_stats_uv);
5194
705k
    int rate_mv;
5195
5196
705k
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
5197
705k
    args->skip_motion_mode =
5198
705k
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
5199
705k
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
5200
705k
    rd_stats.rate =
5201
705k
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
5202
5203
    // Continue if the best candidate is compound.
5204
705k
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
5205
5206
705k
    x->txfm_search_info.skip_txfm = 0;
5207
705k
    struct macroblockd_plane *pd = xd->plane;
5208
705k
    const BUFFER_SET orig_dst = {
5209
705k
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
5210
705k
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
5211
705k
    };
5212
5213
705k
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5214
    // Initialize motion mode to simple translation
5215
    // Calculation of switchable rate depends on it.
5216
705k
    mbmi->motion_mode = 0;
5217
705k
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5218
2.23M
    for (int i = 0; i < num_planes; i++) {
5219
1.53M
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5220
1.53M
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5221
1.53M
    }
5222
5223
705k
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
5224
705k
                           search_state->best_skip_rd[1] };
5225
705k
    int64_t this_yrd = INT64_MAX;
5226
705k
    int64_t ret_value = motion_mode_rd(
5227
705k
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
5228
705k
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
5229
705k
        do_tx_search, inter_modes_info, 1, &this_yrd);
5230
5231
705k
    if (ret_value != INT64_MAX) {
5232
52.1k
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5233
52.1k
      const THR_MODES mode_enum = get_prediction_mode_idx(
5234
52.1k
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5235
      // Collect mode stats for multiwinner mode processing
5236
52.1k
      store_winner_mode_stats(
5237
52.1k
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
5238
52.1k
          mode_enum, NULL, bsize, rd_stats.rdcost,
5239
52.1k
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
5240
5241
52.1k
      int64_t best_scaled_rd = search_state->best_rd;
5242
52.1k
      int64_t this_scaled_rd = rd_stats.rdcost;
5243
52.1k
      if (search_state->best_mode_index != THR_INVALID)
5244
52.0k
        increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5245
52.0k
                              &this_scaled_rd,
5246
52.0k
                              cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
5247
5248
52.1k
      if (this_scaled_rd < best_scaled_rd) {
5249
48.1k
        *yrd = this_yrd;
5250
48.1k
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5251
48.1k
                            &rd_stats_uv, mode_enum, x, do_tx_search);
5252
48.1k
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
5253
48.1k
      }
5254
52.1k
    }
5255
705k
  }
5256
369k
}
5257
5258
/*!\cond */
5259
// Arguments for speed feature pruning of inter mode search
5260
typedef struct {
5261
  int *skip_motion_mode;
5262
  mode_skip_mask_t *mode_skip_mask;
5263
  InterModeSearchState *search_state;
5264
  int skip_ref_frame_mask;
5265
  int reach_first_comp_mode;
5266
  int mode_thresh_mul_fact;
5267
  int num_single_modes_processed;
5268
  int prune_cpd_using_sr_stats_ready;
5269
} InterModeSFArgs;
5270
/*!\endcond */
5271
5272
static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
5273
                           int64_t *ref_frame_rd, int midx,
5274
143M
                           InterModeSFArgs *args, int is_low_temp_var) {
5275
143M
  const SPEED_FEATURES *const sf = &cpi->sf;
5276
143M
  MACROBLOCKD *const xd = &x->e_mbd;
5277
  // Get the actual prediction mode we are trying in this iteration
5278
143M
  const THR_MODES mode_enum = av1_default_mode_order[midx];
5279
143M
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5280
143M
  const PREDICTION_MODE this_mode = mode_def->mode;
5281
143M
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5282
143M
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5283
143M
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5284
143M
  const int comp_pred = second_ref_frame > INTRA_FRAME;
5285
5286
143M
  if (ref_frame == INTRA_FRAME) return 1;
5287
5288
143M
  const FRAME_UPDATE_TYPE update_type =
5289
143M
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5290
143M
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5291
0
      comp_pred) {
5292
0
    return 1;
5293
0
  }
5294
5295
  // This is for real time encoding.
5296
143M
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5297
0
      this_mode != NEARESTMV)
5298
0
    return 1;
5299
5300
  // Check if this mode should be skipped because it is incompatible with the
5301
  // current frame
5302
143M
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5303
105M
    return 1;
5304
38.2M
  const int ret = inter_mode_search_order_independent_skip(
5305
38.2M
      cpi, x, args->mode_skip_mask, args->search_state,
5306
38.2M
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5307
38.2M
  if (ret == 1) return 1;
5308
3.72M
  *(args->skip_motion_mode) = (ret == 2);
5309
5310
  // We've reached the first compound prediction mode, get stats from the
5311
  // single reference predictors to help with pruning.
5312
  // Disable this pruning logic if interpolation filter search was skipped for
5313
  // single prediction modes as it can result in aggressive pruning of compound
5314
  // prediction modes due to the absence of modelled_rd populated by
5315
  // av1_interpolation_filter_search().
5316
  // TODO(Remya): Check the impact of the sf
5317
  // 'prune_comp_search_by_single_result' if compound prediction modes are
5318
  // enabled in future for REALTIME encode.
5319
3.72M
  if (!sf->interp_sf.skip_interp_filter_search &&
5320
2.56M
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5321
0
      args->reach_first_comp_mode == 0) {
5322
0
    analyze_single_states(cpi, args->search_state);
5323
0
    args->reach_first_comp_mode = 1;
5324
0
  }
5325
5326
  // Prune aggressively when best mode is skippable.
5327
3.72M
  int mul_fact = args->search_state->best_mode_skippable
5328
3.72M
                     ? args->mode_thresh_mul_fact
5329
3.72M
                     : (1 << MODE_THRESH_QBITS);
5330
3.72M
  int64_t mode_threshold =
5331
3.72M
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5332
3.72M
      MODE_THRESH_QBITS;
5333
5334
3.72M
  if (args->search_state->best_rd < mode_threshold) return 1;
5335
5336
  // Skip this compound mode based on the RD results from the single prediction
5337
  // modes
5338
3.70M
  if (!sf->interp_sf.skip_interp_filter_search &&
5339
2.56M
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5340
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5341
0
                                       ref_frame, second_ref_frame, x))
5342
0
      return 1;
5343
0
  }
5344
5345
4.32M
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5346
    // After we done with single reference modes, find the 2nd best RD
5347
    // for a reference frame. Only search compound modes that have a reference
5348
    // frame at least as good as the 2nd best.
5349
0
    if (!args->prune_cpd_using_sr_stats_ready &&
5350
0
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5351
0
      find_top_ref(ref_frame_rd);
5352
0
      args->prune_cpd_using_sr_stats_ready = 1;
5353
0
    }
5354
0
    if (args->prune_cpd_using_sr_stats_ready &&
5355
0
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5356
0
      return 1;
5357
0
  }
5358
5359
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5360
3.70M
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5361
4.32M
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5362
0
    return 1;
5363
0
  }
5364
5365
4.32M
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5366
0
    if (compound_skip_using_neighbor_refs(
5367
0
            xd, this_mode, ref_frames,
5368
0
            sf->inter_sf.prune_ext_comp_using_neighbors))
5369
0
      return 1;
5370
0
  }
5371
5372
4.32M
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5373
0
    if (skip_compound_using_best_single_mode_ref(
5374
0
            this_mode, ref_frames, args->search_state->best_single_mode,
5375
0
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5376
0
      return 1;
5377
0
  }
5378
5379
3.70M
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5380
1.75M
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5381
1.75M
    if (skip_nearest_near_mv_using_refmv_weight(
5382
1.75M
            x, this_mode, ref_frame_type,
5383
1.75M
            args->search_state->best_mbmode.mode)) {
5384
      // Ensure the mode is pruned only when the current block has obtained a
5385
      // valid inter mode.
5386
67.6k
      assert(is_inter_mode(args->search_state->best_mbmode.mode));
5387
67.6k
      return 1;
5388
67.6k
    }
5389
1.75M
  }
5390
5391
3.64M
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5392
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5393
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5394
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5395
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5396
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5397
0
        return 1;
5398
0
    }
5399
0
  }
5400
5401
3.64M
  return 0;
5402
3.64M
}
5403
5404
static void record_best_compound(REFERENCE_MODE reference_mode,
5405
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5406
                                 InterModeSearchState *search_state,
5407
1.95M
                                 int compmode_cost) {
5408
1.95M
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5409
5410
1.95M
  if (reference_mode == REFERENCE_MODE_SELECT) {
5411
1.94M
    single_rate = rd_stats->rate - compmode_cost;
5412
1.94M
    hybrid_rate = rd_stats->rate;
5413
1.94M
  } else {
5414
63
    single_rate = rd_stats->rate;
5415
63
    hybrid_rate = rd_stats->rate + compmode_cost;
5416
63
  }
5417
5418
1.95M
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5419
1.95M
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5420
5421
1.95M
  if (!comp_pred) {
5422
1.94M
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5423
1.89M
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5424
1.94M
  } else {
5425
210
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5426
0
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5427
210
  }
5428
1.95M
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5429
1.89M
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5430
1.95M
}
5431
5432
// Does a transform search over a list of the best inter mode candidates.
5433
// This is called if the original mode search computed an RD estimate
5434
// for the transform search rather than doing a full search.
5435
static void tx_search_best_inter_candidates(
5436
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5437
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5438
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5439
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5440
44.5k
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5441
44.5k
  AV1_COMMON *const cm = &cpi->common;
5442
44.5k
  MACROBLOCKD *const xd = &x->e_mbd;
5443
44.5k
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5444
44.5k
  const ModeCosts *mode_costs = &x->mode_costs;
5445
44.5k
  const int num_planes = av1_num_planes(cm);
5446
44.5k
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5447
44.5k
  MB_MODE_INFO *const mbmi = xd->mi[0];
5448
44.5k
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5449
44.5k
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5450
44.5k
  search_state->best_rd = best_rd_so_far;
5451
44.5k
  search_state->best_mode_index = THR_INVALID;
5452
  // Initialize best mode stats for winner mode processing
5453
44.5k
  x->winner_mode_count = 0;
5454
44.5k
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5455
44.5k
                          NULL, bsize, best_rd_so_far,
5456
44.5k
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5457
44.5k
  inter_modes_info->num =
5458
44.5k
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5459
44.5k
          ? inter_modes_info->num
5460
44.5k
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5461
44.5k
  const int64_t top_est_rd =
5462
44.5k
      inter_modes_info->num > 0
5463
44.5k
          ? inter_modes_info
5464
44.5k
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5465
44.5k
          : INT64_MAX;
5466
44.5k
  *yrd = INT64_MAX;
5467
44.5k
  int64_t best_rd_in_this_partition = INT64_MAX;
5468
44.5k
  int num_inter_mode_cands = inter_modes_info->num;
5469
44.5k
  int newmv_mode_evaled = 0;
5470
44.5k
  int max_allowed_cands = INT_MAX;
5471
44.5k
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5472
    // The bound on the no. of inter mode candidates, beyond which the
5473
    // candidates are limited if a newmv mode got evaluated, is set as
5474
    // max_allowed_cands + 1.
5475
15.5k
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5476
15.5k
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5477
15.5k
    max_allowed_cands =
5478
15.5k
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5479
15.5k
  }
5480
5481
44.5k
  int num_mode_thresh = INT_MAX;
5482
44.5k
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5483
    // Bound the no. of transform searches per prediction mode beyond a
5484
    // threshold.
5485
15.5k
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5486
15.5k
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5487
15.5k
    num_mode_thresh =
5488
15.5k
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5489
15.5k
  }
5490
5491
44.5k
  int num_tx_cands = 0;
5492
44.5k
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5493
  // Iterate over best inter mode candidates and perform tx search
5494
222k
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5495
178k
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5496
178k
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5497
178k
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5498
178k
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5499
178k
    if (curr_est_rd * 0.80 > top_est_rd) break;
5500
5501
177k
    if (num_tx_cands > num_mode_thresh) {
5502
4.36k
      if ((prediction_mode != NEARESTMV &&
5503
2.56k
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5504
2.41k
          (prediction_mode == NEARESTMV &&
5505
1.79k
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5506
2.37k
        continue;
5507
4.36k
    }
5508
5509
175k
    txfm_info->skip_txfm = 0;
5510
175k
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5511
5512
    // Select prediction reference frames.
5513
175k
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5514
472k
    for (int i = 0; i < num_planes; i++) {
5515
296k
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5516
296k
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5517
296k
    }
5518
5519
175k
    bool is_predictor_built = false;
5520
5521
    // Initialize RD stats
5522
175k
    RD_STATS rd_stats;
5523
175k
    RD_STATS rd_stats_y;
5524
175k
    RD_STATS rd_stats_uv;
5525
175k
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5526
175k
    int64_t skip_rd = INT64_MAX;
5527
175k
    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5528
175k
        cm->seq_params->enable_masked_compound,
5529
175k
        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5530
175k
        /*eval_motion_mode=*/0);
5531
175k
    if (txfm_rd_gate_level) {
5532
      // Check if the mode is good enough based on skip RD
5533
50.6k
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5534
50.6k
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5535
50.6k
      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5536
50.6k
                                      skip_rd, txfm_rd_gate_level, 0);
5537
50.6k
      if (!eval_txfm) continue;
5538
50.6k
    }
5539
5540
    // Build the prediction for this mode
5541
160k
    if (!is_predictor_built) {
5542
160k
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5543
160k
                                    av1_num_planes(cm) - 1);
5544
160k
    }
5545
160k
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5546
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5547
0
    }
5548
5549
160k
    num_tx_cands++;
5550
160k
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5551
160k
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5552
160k
    int64_t this_yrd = INT64_MAX;
5553
    // Do the transform search
5554
160k
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5555
160k
                         mode_rate, search_state->best_rd)) {
5556
99.1k
      continue;
5557
99.1k
    } else {
5558
61.6k
      const int y_rate =
5559
61.6k
          rd_stats.skip_txfm
5560
61.6k
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5561
61.6k
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5562
61.6k
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5563
5564
61.6k
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5565
47.9k
        inter_mode_data_push(
5566
47.9k
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5567
47.9k
            rd_stats_y.rate + rd_stats_uv.rate +
5568
47.9k
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5569
47.9k
      }
5570
61.6k
    }
5571
5572
61.6k
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5573
5574
61.6k
    const THR_MODES mode_enum = get_prediction_mode_idx(
5575
61.6k
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5576
5577
    // Collect mode stats for multiwinner mode processing
5578
61.6k
    const int txfm_search_done = 1;
5579
61.6k
    store_winner_mode_stats(
5580
61.6k
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5581
61.6k
        NULL, bsize, rd_stats.rdcost,
5582
61.6k
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5583
5584
61.6k
    int64_t best_scaled_rd = search_state->best_rd;
5585
61.6k
    int64_t this_scaled_rd = rd_stats.rdcost;
5586
61.6k
    increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5587
61.6k
                          &this_scaled_rd,
5588
61.6k
                          cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
5589
61.6k
    if (this_scaled_rd < best_rd_in_this_partition) {
5590
61.5k
      best_rd_in_this_partition = rd_stats.rdcost;
5591
61.5k
      *yrd = this_yrd;
5592
61.5k
    }
5593
5594
61.6k
    if (this_scaled_rd < best_scaled_rd) {
5595
61.5k
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5596
61.5k
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5597
61.5k
      search_state->best_skip_rd[0] = skip_rd;
5598
      // Limit the total number of modes to be evaluated if the first is valid
5599
      // and transform skip or compound
5600
61.5k
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5601
18.1k
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5602
          // Evaluate more candidates at high quantizers where occurrence of
5603
          // transform skip is high.
5604
266
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5605
266
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5606
266
          num_inter_mode_cands =
5607
266
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5608
17.9k
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5609
0
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5610
          // Evaluate more candidates at low quantizers where occurrence of
5611
          // single reference mode is high.
5612
0
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5613
0
                                                { 10, 7, 5, 3 } };
5614
0
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5615
0
          num_inter_mode_cands = AOMMIN(
5616
0
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5617
0
        }
5618
18.1k
      }
5619
61.5k
    }
5620
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5621
    // a newmv mode was evaluated already.
5622
61.6k
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5623
61.6k
  }
5624
44.5k
}
5625
5626
// Indicates number of winner simple translation modes to be used
5627
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5628
5629
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5630
// speed feature. This list consists of modes that have only searched
5631
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5632
// modes after the initial RD search.
5633
static void handle_winner_cand(
5634
    MB_MODE_INFO *const mbmi,
5635
    motion_mode_best_st_candidate *best_motion_mode_cands,
5636
    int max_winner_motion_mode_cand, int64_t this_rd,
5637
725k
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5638
  // Number of current motion mode candidates in list
5639
725k
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5640
725k
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5641
5642
  // find the best location to insert new motion mode candidate
5643
751k
  for (int j = 0; j < num_motion_mode_cand; j++) {
5644
375k
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5645
350k
      valid_motion_mode_cand_loc = j;
5646
350k
      break;
5647
350k
    }
5648
375k
  }
5649
5650
  // Insert motion mode if location is found
5651
725k
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5652
722k
    if (num_motion_mode_cand > 0 &&
5653
357k
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5654
353k
      memmove(
5655
353k
          &best_motion_mode_cands
5656
353k
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5657
353k
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5658
353k
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5659
353k
           valid_motion_mode_cand_loc) *
5660
353k
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5661
722k
    motion_mode_cand->mbmi = *mbmi;
5662
722k
    motion_mode_cand->rd_cost = this_rd;
5663
722k
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5664
722k
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5665
722k
        *motion_mode_cand;
5666
722k
    best_motion_mode_cands->num_motion_mode_cand =
5667
722k
        AOMMIN(max_winner_motion_mode_cand,
5668
722k
               best_motion_mode_cands->num_motion_mode_cand + 1);
5669
722k
  }
5670
725k
}
5671
5672
/*!\brief Search intra modes in interframes
5673
 *
5674
 * \ingroup intra_mode_search
5675
 *
5676
 * This function searches for the best intra mode when the current frame is an
5677
 * interframe. This function however does *not* handle luma palette mode.
5678
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5679
 *
5680
 * This function will first iterate through the luma mode candidates to find the
5681
 * best luma intra mode. Once the best luma mode it's found, it will then search
5682
 * for the best chroma mode. Because palette mode is currently not handled by
5683
 * here, a cache of uv mode is stored in
5684
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5685
 * av1_search_palette_mode.
5686
 *
5687
 * \param[in,out] search_state      Struct keep track of the prediction mode
5688
 *                                  search state in interframe.
5689
 *
5690
 * \param[in]     cpi               Top-level encoder structure.
5691
 * \param[in,out] x                 Pointer to struct holding all the data for
5692
 *                                  the current prediction block.
5693
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5694
 *                                  prediction modes searched.
5695
 * \param[in]     bsize             Current block size.
5696
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5697
 *                                  copy the tx_type and txfm_skip arrays.
5698
 *                                  for only the Y plane.
5699
 * \param[in]     sf_args           Stores the list of intra mode candidates
5700
 *                                  to be searched.
5701
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5702
 *                                      current ref frame is an intra frame.
5703
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5704
 *                                  terminate chroma intra mode search.
5705
 *
5706
 * \remark If a new best mode is found, search_state and rd_costs are updated
5707
 * correspondingly. While x is also modified, it is only used as a temporary
5708
 * buffer, and the final decisions are stored in search_state.
5709
 */
5710
static inline void search_intra_modes_in_interframe(
5711
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5712
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5713
    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5714
933k
    int64_t yrd_threshold) {
5715
933k
  const AV1_COMMON *const cm = &cpi->common;
5716
933k
  const SPEED_FEATURES *const sf = &cpi->sf;
5717
933k
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5718
933k
  MACROBLOCKD *const xd = &x->e_mbd;
5719
933k
  MB_MODE_INFO *const mbmi = xd->mi[0];
5720
933k
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5721
5722
933k
  int is_best_y_mode_intra = 0;
5723
933k
  RD_STATS best_intra_rd_stats_y;
5724
933k
  int64_t best_rd_y = INT64_MAX;
5725
933k
  int best_mode_cost_y = -1;
5726
933k
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5727
933k
  THR_MODES best_mode_enum = THR_INVALID;
5728
933k
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5729
933k
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5730
933k
  const int num_4x4 = bsize_to_num_blk(bsize);
5731
5732
  // Performs luma search
5733
933k
  int64_t best_model_rd = INT64_MAX;
5734
933k
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5735
4.66M
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5736
3.73M
    top_intra_model_rd[i] = INT64_MAX;
5737
3.73M
  }
5738
5739
933k
  if (cpi->oxcf.algo_cfg.sharpness) {
5740
0
    int bh = mi_size_high[bsize];
5741
0
    int bw = mi_size_wide[bsize];
5742
0
    if (bh > 4 || bw > 4) return;
5743
0
  }
5744
5745
50.5M
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5746
49.7M
    if (sf->intra_sf.skip_intra_in_interframe &&
5747
49.7M
        search_state->intra_search_state.skip_intra_modes)
5748
119k
      break;
5749
49.6M
    set_y_mode_and_delta_angle(
5750
49.6M
        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5751
49.6M
    assert(mbmi->mode < INTRA_MODE_END);
5752
5753
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5754
49.6M
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5755
727k
      continue;
5756
5757
48.9M
    const THR_MODES mode_enum =
5758
48.9M
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5759
48.9M
    if ((!intra_mode_cfg->enable_smooth_intra ||
5760
48.9M
         cpi->sf.intra_sf.disable_smooth_intra) &&
5761
48.9M
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5762
47.3M
         mbmi->mode == SMOOTH_V_PRED))
5763
2.40M
      continue;
5764
46.5M
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5765
0
      continue;
5766
46.5M
    if (av1_is_directional_mode(mbmi->mode) &&
5767
44.8M
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5768
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5769
0
      continue;
5770
46.5M
    const PREDICTION_MODE this_mode = mbmi->mode;
5771
5772
46.5M
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5773
46.5M
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5774
46.5M
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5775
46.5M
    x->txfm_search_info.skip_txfm = 0;
5776
5777
46.5M
    if (this_mode != DC_PRED) {
5778
      // Only search the oblique modes if the best so far is
5779
      // one of the neighboring directional modes
5780
45.6M
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5781
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5782
0
        if (search_state->best_mode_index != THR_INVALID &&
5783
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5784
0
          continue;
5785
0
      }
5786
45.6M
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5787
0
        if (conditional_skipintra(
5788
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5789
0
          continue;
5790
0
      }
5791
45.6M
    }
5792
5793
46.5M
    RD_STATS intra_rd_stats_y;
5794
46.5M
    int mode_cost_y;
5795
46.5M
    int64_t intra_rd_y = INT64_MAX;
5796
46.5M
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5797
46.5M
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5798
46.5M
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5799
46.5M
        &best_model_rd, top_intra_model_rd);
5800
5801
46.5M
    if (intra_rd_y < INT64_MAX) {
5802
1.20M
      adjust_cost(cpi, x, &intra_rd_y);
5803
1.20M
    }
5804
5805
46.5M
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5806
773k
      is_best_y_mode_intra = 1;
5807
773k
      if (intra_rd_y < best_rd_y) {
5808
630k
        best_intra_rd_stats_y = intra_rd_stats_y;
5809
630k
        best_mode_cost_y = mode_cost_y;
5810
630k
        best_rd_y = intra_rd_y;
5811
630k
        best_mbmi = *mbmi;
5812
630k
        best_mode_enum = mode_enum;
5813
630k
        memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5814
630k
               sizeof(best_blk_skip[0]) * num_4x4);
5815
630k
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5816
630k
      }
5817
773k
    }
5818
46.5M
  }
5819
5820
933k
  if (!is_best_y_mode_intra) {
5821
318k
    return;
5822
318k
  }
5823
5824
933k
  assert(best_rd_y < INT64_MAX);
5825
5826
  // Restores the best luma mode
5827
614k
  *mbmi = best_mbmi;
5828
614k
  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5829
614k
         sizeof(best_blk_skip[0]) * num_4x4);
5830
614k
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5831
5832
  // Performs chroma search
5833
614k
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5834
614k
  av1_init_rd_stats(&intra_rd_stats);
5835
614k
  av1_init_rd_stats(&intra_rd_stats_uv);
5836
614k
  const int num_planes = av1_num_planes(cm);
5837
614k
  if (num_planes > 1) {
5838
269k
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5839
269k
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5840
269k
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5841
5842
269k
    if (!intra_uv_mode_valid) {
5843
157
      return;
5844
157
    }
5845
269k
  }
5846
5847
  // Merge the luma and chroma rd stats
5848
614k
  assert(best_mode_cost_y >= 0);
5849
614k
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5850
614k
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5851
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5852
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5853
    // (prediction granularity), so we account for it in the full rate,
5854
    // not the tokenonly rate.
5855
587k
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5856
587k
  }
5857
5858
614k
  const ModeCosts *mode_costs = &x->mode_costs;
5859
614k
  const PREDICTION_MODE mode = mbmi->mode;
5860
614k
  if (num_planes > 1 && xd->is_chroma_ref) {
5861
269k
    const int uv_mode_cost =
5862
269k
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5863
269k
    intra_rd_stats.rate +=
5864
269k
        intra_rd_stats_uv.rate +
5865
269k
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5866
269k
  }
5867
5868
  // Intra block is always coded as non-skip
5869
614k
  intra_rd_stats.skip_txfm = 0;
5870
614k
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5871
  // Add in the cost of the no skip flag.
5872
614k
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5873
614k
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5874
  // Calculate the final RD estimate for this mode.
5875
614k
  const int64_t this_rd =
5876
614k
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5877
  // Keep record of best intra rd
5878
614k
  if (this_rd < search_state->best_intra_rd) {
5879
614k
    search_state->best_intra_rd = this_rd;
5880
614k
    intra_search_state->best_intra_mode = mode;
5881
614k
  }
5882
5883
2.45M
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5884
1.84M
    search_state->best_pred_rd[i] =
5885
1.84M
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5886
1.84M
  }
5887
5888
614k
  intra_rd_stats.rdcost = this_rd;
5889
5890
614k
  adjust_rdcost(cpi, x, &intra_rd_stats);
5891
5892
  // Collect mode stats for multiwinner mode processing
5893
614k
  const int txfm_search_done = 1;
5894
614k
  store_winner_mode_stats(
5895
614k
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5896
614k
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5897
614k
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5898
614k
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5899
605k
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5900
605k
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5901
605k
                        best_mode_enum, x, txfm_search_done);
5902
605k
  }
5903
614k
}
5904
5905
#if !CONFIG_REALTIME_ONLY
5906
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5907
// features in intra mode pruning.
5908
static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5909
                                                MACROBLOCK *x, BLOCK_SIZE bsize,
5910
                                                int mi_row, int mi_col,
5911
                                                int64_t *inter_cost,
5912
932k
                                                int64_t *intra_cost) {
5913
932k
  const AV1_COMMON *const cm = &cpi->common;
5914
  // Only consider full SB.
5915
932k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5916
932k
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5917
932k
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5918
932k
                  (block_size_high[sb_size] / tpl_bsize_1d);
5919
932k
  SuperBlockEnc *sb_enc = &x->sb_enc;
5920
932k
  if (sb_enc->tpl_data_count == len) {
5921
201k
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5922
201k
    const int tpl_stride = sb_enc->tpl_stride;
5923
201k
    const int tplw = mi_size_wide[tpl_bsize];
5924
201k
    const int tplh = mi_size_high[tpl_bsize];
5925
201k
    const int nw = mi_size_wide[bsize] / tplw;
5926
201k
    const int nh = mi_size_high[bsize] / tplh;
5927
201k
    if (nw >= 1 && nh >= 1) {
5928
1.31k
      const int of_h = mi_row % mi_size_high[sb_size];
5929
1.31k
      const int of_w = mi_col % mi_size_wide[sb_size];
5930
1.31k
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5931
5932
4.31k
      for (int k = 0; k < nh; k++) {
5933
11.4k
        for (int l = 0; l < nw; l++) {
5934
8.41k
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5935
8.41k
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5936
8.41k
        }
5937
2.99k
      }
5938
1.31k
      *inter_cost /= nw * nh;
5939
1.31k
      *intra_cost /= nw * nh;
5940
1.31k
    }
5941
201k
  }
5942
932k
}
5943
#endif  // !CONFIG_REALTIME_ONLY
5944
5945
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5946
// intra mode search.
5947
static inline void skip_intra_modes_in_interframe(
5948
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5949
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5950
933k
    int64_t inter_cost, int64_t intra_cost) {
5951
933k
  MACROBLOCKD *const xd = &x->e_mbd;
5952
933k
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5953
933k
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5954
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5955
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5956
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5957
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5958
0
        x->source_variance > 128) {
5959
0
      search_state->intra_search_state.skip_intra_modes = 1;
5960
0
      return;
5961
0
    }
5962
0
  }
5963
5964
933k
  const unsigned int src_var_thresh_intra_skip = 1;
5965
933k
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5966
933k
  if (!(skip_intra_in_interframe &&
5967
933k
        (x->source_variance > src_var_thresh_intra_skip)))
5968
2.42k
    return;
5969
5970
  // Prune intra search based on best inter mode being transfrom skip.
5971
930k
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5972
113k
    const int qindex_thresh[2] = { 200, MAXQ };
5973
113k
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5974
113k
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5975
90.3k
        (x->qindex <= qindex_thresh[ind])) {
5976
90.3k
      search_state->intra_search_state.skip_intra_modes = 1;
5977
90.3k
      return;
5978
90.3k
    } else if ((skip_intra_in_interframe >= 4) &&
5979
23.2k
               (inter_cost < 0 || intra_cost < 0)) {
5980
23.0k
      search_state->intra_search_state.skip_intra_modes = 1;
5981
23.0k
      return;
5982
23.0k
    }
5983
113k
  }
5984
  // Use ML model to prune intra search.
5985
817k
  if (inter_cost >= 0 && intra_cost >= 0) {
5986
1.06k
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5987
1.06k
                                     ? &av1_intrap_nn_config
5988
1.06k
                                     : &av1_intrap_hd_nn_config;
5989
1.06k
    float nn_features[6];
5990
1.06k
    float scores[2] = { 0.0f };
5991
5992
1.06k
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5993
1.06k
    nn_features[1] = (float)mi_size_wide_log2[bsize];
5994
1.06k
    nn_features[2] = (float)mi_size_high_log2[bsize];
5995
1.06k
    nn_features[3] = (float)intra_cost;
5996
1.06k
    nn_features[4] = (float)inter_cost;
5997
1.06k
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5998
1.06k
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5999
1.06k
    nn_features[5] = (float)(ac_q_max / ac_q);
6000
6001
1.06k
    av1_nn_predict(nn_features, nn_config, 1, scores);
6002
6003
    // For two parameters, the max prob returned from av1_nn_softmax equals
6004
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
6005
    // calling of av1_nn_softmax.
6006
1.06k
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
6007
1.06k
    assert(skip_intra_in_interframe <= 5);
6008
1.06k
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
6009
0
      search_state->intra_search_state.skip_intra_modes = 1;
6010
0
    }
6011
1.06k
  }
6012
817k
}
6013
6014
static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
6015
4.25M
                                             int is_single_pred) {
6016
4.25M
  const MODE encoding_mode = cpi->oxcf.mode;
6017
4.25M
  if (encoding_mode == REALTIME) {
6018
0
    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
6019
0
            (cpi->sf.interp_sf.skip_interp_filter_search ||
6020
0
             cpi->sf.winner_mode_sf.winner_mode_ifs));
6021
4.25M
  } else if (encoding_mode == GOOD) {
6022
    // Skip interpolation filter search for single prediction modes.
6023
4.25M
    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
6024
4.25M
  }
6025
18.4E
  return false;
6026
4.25M
}
6027
6028
static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
6029
932k
                                     BLOCK_SIZE bsize) {
6030
932k
  const AV1_COMMON *const cm = &cpi->common;
6031
932k
  const SPEED_FEATURES *const sf = &cpi->sf;
6032
6033
932k
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
6034
0
      !sf->rt_sf.short_circuit_low_temp_var ||
6035
932k
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
6036
932k
    return 0;
6037
932k
  }
6038
6039
18.4E
  const int mi_row = x->e_mbd.mi_row;
6040
18.4E
  const int mi_col = x->e_mbd.mi_col;
6041
18.4E
  int is_low_temp_var = 0;
6042
6043
18.4E
  if (cm->seq_params->sb_size == BLOCK_64X64)
6044
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
6045
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6046
18.4E
  else
6047
18.4E
    is_low_temp_var = av1_get_force_skip_low_temp_var(
6048
18.4E
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6049
6050
18.4E
  return is_low_temp_var;
6051
932k
}
6052
6053
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
6054
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
6055
                            struct macroblock *x, struct RD_STATS *rd_cost,
6056
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
6057
932k
                            int64_t best_rd_so_far) {
6058
932k
  AV1_COMMON *const cm = &cpi->common;
6059
932k
  const FeatureFlags *const features = &cm->features;
6060
932k
  const int num_planes = av1_num_planes(cm);
6061
932k
  const SPEED_FEATURES *const sf = &cpi->sf;
6062
932k
  MACROBLOCKD *const xd = &x->e_mbd;
6063
932k
  MB_MODE_INFO *const mbmi = xd->mi[0];
6064
932k
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
6065
932k
  int i;
6066
932k
  const ModeCosts *mode_costs = &x->mode_costs;
6067
932k
  const int *comp_inter_cost =
6068
932k
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6069
6070
932k
  InterModeSearchState search_state;
6071
932k
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
6072
932k
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
6073
932k
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
6074
932k
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
6075
932k
  };
6076
932k
  HandleInterModeArgs args = { { NULL },
6077
932k
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
6078
932k
                               { NULL },
6079
932k
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
6080
932k
                                 MAX_SB_SIZE >> 1 },
6081
932k
                               NULL,
6082
932k
                               NULL,
6083
932k
                               NULL,
6084
932k
                               search_state.modelled_rd,
6085
932k
                               INT_MAX,
6086
932k
                               INT_MAX,
6087
932k
                               search_state.simple_rd,
6088
932k
                               0,
6089
932k
                               false,
6090
932k
                               interintra_modes,
6091
932k
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
6092
932k
                               { { 0, 0 } },
6093
932k
                               { 0 },
6094
932k
                               0,
6095
932k
                               0,
6096
932k
                               -1,
6097
932k
                               -1,
6098
932k
                               -1,
6099
932k
                               { 0 },
6100
932k
                               { 0 },
6101
932k
                               UINT_MAX };
6102
  // Currently, is_low_temp_var is used in real time encoding.
6103
932k
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
6104
6105
27.9M
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
6106
  // Indicates the appropriate number of simple translation winner modes for
6107
  // exhaustive motion mode evaluation
6108
932k
  const int max_winner_motion_mode_cand =
6109
932k
      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
6110
932k
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
6111
932k
  motion_mode_candidate motion_mode_cand;
6112
932k
  motion_mode_best_st_candidate best_motion_mode_cands;
6113
  // Initializing the number of motion mode candidates to zero.
6114
932k
  best_motion_mode_cands.num_motion_mode_cand = 0;
6115
10.2M
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
6116
9.32M
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
6117
6118
8.39M
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6119
6120
932k
  av1_invalid_rd_stats(rd_cost);
6121
6122
8.39M
  for (i = 0; i < REF_FRAMES; ++i) {
6123
7.45M
    x->warp_sample_info[i].num = -1;
6124
7.45M
  }
6125
6126
  // Ref frames that are selected by square partition blocks.
6127
932k
  int picked_ref_frames_mask = 0;
6128
932k
  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
6129
932k
      mbmi->partition != PARTITION_NONE) {
6130
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
6131
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
6132
    // implies prune for vert, horiz and extended partition blocks.
6133
6.37k
    if ((mbmi->partition != PARTITION_VERT &&
6134
10
         mbmi->partition != PARTITION_HORZ) ||
6135
6.37k
        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
6136
3.43k
      picked_ref_frames_mask =
6137
3.43k
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
6138
3.43k
    }
6139
6.37k
  }
6140
6141
#if CONFIG_COLLECT_COMPONENT_TIMING
6142
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
6143
#endif
6144
  // Skip ref frames that never selected by square blocks.
6145
932k
  const int skip_ref_frame_mask =
6146
932k
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
6147
932k
  mode_skip_mask_t mode_skip_mask;
6148
932k
  unsigned int ref_costs_single[REF_FRAMES];
6149
932k
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6150
932k
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
6151
  // init params, set frame modes, speed features
6152
932k
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
6153
932k
                                skip_ref_frame_mask, ref_costs_single,
6154
932k
                                ref_costs_comp, yv12_mb);
6155
#if CONFIG_COLLECT_COMPONENT_TIMING
6156
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
6157
#endif
6158
6159
932k
  int64_t best_est_rd = INT64_MAX;
6160
932k
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
6161
  // If do_tx_search is 0, only estimated RD should be computed.
6162
  // If do_tx_search is 1, all modes have TX search performed.
6163
932k
  const int do_tx_search =
6164
932k
      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
6165
897k
        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
6166
689k
         num_pels_log2_lookup[bsize] > 8));
6167
932k
  InterModesInfo *inter_modes_info = x->inter_modes_info;
6168
932k
  inter_modes_info->num = 0;
6169
6170
  // Temporary buffers used by handle_inter_mode().
6171
932k
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
6172
6173
  // The best RD found for the reference frame, among single reference modes.
6174
  // Note that the 0-th element will contain a cut-off that is later used
6175
  // to determine if we should skip a compound mode.
6176
932k
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
6177
932k
                                       INT64_MAX, INT64_MAX, INT64_MAX,
6178
932k
                                       INT64_MAX, INT64_MAX };
6179
6180
  // Prepared stats used later to check if we could skip intra mode eval.
6181
932k
  int64_t inter_cost = -1;
6182
932k
  int64_t intra_cost = -1;
6183
  // Need to tweak the threshold for hdres speed 0 & 1.
6184
932k
  const int mi_row = xd->mi_row;
6185
932k
  const int mi_col = xd->mi_col;
6186
6187
  // Obtain the relevant tpl stats for pruning inter modes
6188
932k
  PruneInfoFromTpl inter_cost_info_from_tpl;
6189
932k
#if !CONFIG_REALTIME_ONLY
6190
932k
  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
6191
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
6192
    // prune_ref_by_selective_ref_frame()
6193
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
6194
    // prune_ref_by_selective_ref_frame()
6195
    // Populating valid_refs[idx] = 1 ensures that
6196
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
6197
    // pruned ref frame.
6198
368k
    int valid_refs[INTER_REFS_PER_FRAME];
6199
2.94M
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
6200
2.58M
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
6201
2.58M
      valid_refs[frame - 1] =
6202
2.58M
          x->tpl_keep_ref_frame[frame] ||
6203
2.58M
          !prune_ref_by_selective_ref_frame(
6204
2.58M
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
6205
2.58M
    }
6206
368k
    av1_zero(inter_cost_info_from_tpl);
6207
368k
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
6208
368k
                              &inter_cost_info_from_tpl);
6209
368k
  }
6210
6211
932k
  const int do_pruning =
6212
932k
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
6213
932k
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
6214
932k
      cpi->oxcf.algo_cfg.enable_tpl_model)
6215
932k
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
6216
932k
                                 &intra_cost);
6217
932k
#endif  // !CONFIG_REALTIME_ONLY
6218
6219
  // Initialize best mode stats for winner mode processing.
6220
932k
  const int max_winner_mode_count =
6221
932k
      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
6222
932k
  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
6223
932k
  x->winner_mode_count = 0;
6224
932k
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
6225
932k
                          NULL, bsize, best_rd_so_far,
6226
932k
                          sf->winner_mode_sf.multi_winner_mode_type, 0);
6227
6228
932k
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
6229
932k
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
6230
    // Higher multiplication factor values for lower quantizers.
6231
932k
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
6232
932k
  }
6233
6234
  // Initialize arguments for mode loop speed features
6235
932k
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
6236
932k
                              &mode_skip_mask,
6237
932k
                              &search_state,
6238
932k
                              skip_ref_frame_mask,
6239
932k
                              0,
6240
932k
                              mode_thresh_mul_fact,
6241
932k
                              0,
6242
932k
                              0 };
6243
932k
  int64_t best_inter_yrd = INT64_MAX;
6244
6245
  // This is the main loop of this function. It loops over all possible inter
6246
  // modes and calls handle_inter_mode() to compute the RD for each.
6247
  // Here midx is just an iterator index that should not be used by itself
6248
  // except to keep track of the number of modes searched. It should be used
6249
  // with av1_default_mode_order to get the enum that defines the mode, which
6250
  // can be used with av1_mode_defs to get the prediction mode and the ref
6251
  // frames.
6252
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
6253
  // good speedup for real time case. If we decide to use compound mode in real
6254
  // time, maybe we can modify av1_default_mode_order table.
6255
932k
  THR_MODES mode_start = THR_INTER_MODE_START;
6256
932k
  THR_MODES mode_end = THR_INTER_MODE_END;
6257
932k
  const CurrentFrame *const current_frame = &cm->current_frame;
6258
932k
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
6259
0
    mode_start = SINGLE_REF_MODE_START;
6260
0
    mode_end = SINGLE_REF_MODE_END;
6261
0
  }
6262
6263
145M
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
6264
    // Get the actual prediction mode we are trying in this iteration
6265
144M
    const THR_MODES mode_enum = av1_default_mode_order[midx];
6266
144M
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
6267
144M
    const PREDICTION_MODE this_mode = mode_def->mode;
6268
144M
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
6269
6270
144M
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
6271
144M
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
6272
144M
    const int is_single_pred =
6273
144M
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
6274
144M
    const int comp_pred = second_ref_frame > INTRA_FRAME;
6275
6276
144M
    init_mbmi(mbmi, this_mode, ref_frames, cm);
6277
6278
144M
    txfm_info->skip_txfm = 0;
6279
144M
    sf_args.num_single_modes_processed += is_single_pred;
6280
144M
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
6281
#if CONFIG_COLLECT_COMPONENT_TIMING
6282
    start_timing(cpi, skip_inter_mode_time);
6283
#endif
6284
    // Apply speed features to decide if this inter mode can be skipped
6285
144M
    const int is_skip_inter_mode = skip_inter_mode(
6286
144M
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
6287
#if CONFIG_COLLECT_COMPONENT_TIMING
6288
    end_timing(cpi, skip_inter_mode_time);
6289
#endif
6290
144M
    if (is_skip_inter_mode) continue;
6291
6292
    // Select prediction reference frames.
6293
11.9M
    for (i = 0; i < num_planes; i++) {
6294
7.73M
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
6295
7.73M
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
6296
7.73M
    }
6297
6298
4.18M
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
6299
4.18M
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
6300
4.18M
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
6301
4.18M
    mbmi->ref_mv_idx = 0;
6302
6303
4.18M
    const int64_t ref_best_rd = search_state.best_rd;
6304
4.18M
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
6305
4.18M
    av1_init_rd_stats(&rd_stats);
6306
6307
4.18M
    const int ref_frame_cost = comp_pred
6308
4.18M
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
6309
4.18M
                                   : ref_costs_single[ref_frame];
6310
4.18M
    const int compmode_cost =
6311
18.4E
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6312
4.18M
    const int real_compmode_cost =
6313
4.18M
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6314
4.25M
            ? compmode_cost
6315
18.4E
            : 0;
6316
    // Point to variables that are maintained between loop iterations
6317
4.18M
    args.single_newmv = search_state.single_newmv;
6318
4.18M
    args.single_newmv_rate = search_state.single_newmv_rate;
6319
4.18M
    args.single_newmv_valid = search_state.single_newmv_valid;
6320
4.18M
    args.single_comp_cost = real_compmode_cost;
6321
4.18M
    args.ref_frame_cost = ref_frame_cost;
6322
4.18M
    args.best_pred_sse = search_state.best_pred_sse;
6323
4.18M
    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6324
4.18M
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6325
4.18M
                           search_state.best_skip_rd[1] };
6326
4.18M
    int64_t this_yrd = INT64_MAX;
6327
#if CONFIG_COLLECT_COMPONENT_TIMING
6328
    start_timing(cpi, handle_inter_mode_time);
6329
#endif
6330
4.18M
    int64_t this_rd = handle_inter_mode(
6331
4.18M
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6332
4.18M
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6333
4.18M
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6334
4.18M
        &this_yrd);
6335
#if CONFIG_COLLECT_COMPONENT_TIMING
6336
    end_timing(cpi, handle_inter_mode_time);
6337
#endif
6338
4.25M
    if (current_frame->reference_mode != SINGLE_REFERENCE) {
6339
4.25M
      if (!args.skip_ifs &&
6340
2.56M
          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6341
2.56M
          is_inter_singleref_mode(this_mode)) {
6342
2.56M
        collect_single_states(x, &search_state, mbmi);
6343
2.56M
      }
6344
6345
4.25M
      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6346
4.25M
          is_inter_singleref_mode(this_mode))
6347
4.25M
        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6348
4.25M
    }
6349
6350
4.18M
    if (this_rd == INT64_MAX) continue;
6351
6352
1.88M
    if (mbmi->skip_txfm) {
6353
175k
      rd_stats_y.rate = 0;
6354
175k
      rd_stats_uv.rate = 0;
6355
175k
    }
6356
6357
1.95M
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6358
1.95M
        this_rd < ref_frame_rd[ref_frame]) {
6359
1.92M
      ref_frame_rd[ref_frame] = this_rd;
6360
1.92M
    }
6361
6362
1.88M
    adjust_cost(cpi, x, &this_rd);
6363
1.88M
    adjust_rdcost(cpi, x, &rd_stats);
6364
6365
    // Did this mode help, i.e., is it the new best mode
6366
1.89M
    if (this_rd < search_state.best_rd) {
6367
1.89M
      assert(IMPLIES(comp_pred,
6368
1.89M
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
6369
1.89M
      search_state.best_pred_sse = x->pred_sse[ref_frame];
6370
1.89M
      best_inter_yrd = this_yrd;
6371
1.89M
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6372
1.89M
                          &rd_stats_uv, mode_enum, x, do_tx_search);
6373
1.89M
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6374
      // skip_rd[0] is the best total rd for a skip mode so far.
6375
      // skip_rd[1] is the best total rd for a skip mode so far in luma.
6376
      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6377
      // When do_tx_search = 0, skip_rd[1] is updated.
6378
1.89M
      search_state.best_skip_rd[1] = skip_rd[1];
6379
1.89M
    }
6380
1.88M
    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6381
      // Add this mode to motion mode candidate list for motion mode search
6382
      // if using motion_mode_for_winner_cand speed feature
6383
725k
      handle_winner_cand(mbmi, &best_motion_mode_cands,
6384
725k
                         max_winner_motion_mode_cand, this_rd,
6385
725k
                         &motion_mode_cand, args.skip_motion_mode);
6386
725k
    }
6387
6388
    /* keep record of best compound/single-only prediction */
6389
1.88M
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6390
1.88M
                         x->rdmult, &search_state, compmode_cost);
6391
1.88M
  }
6392
6393
#if CONFIG_COLLECT_COMPONENT_TIMING
6394
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6395
#endif
6396
932k
  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6397
    // For the single ref winner candidates, evaluate other motion modes (non
6398
    // simple translation).
6399
369k
    evaluate_motion_mode_for_winner_candidates(
6400
369k
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6401
369k
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6402
369k
        &search_state, &best_inter_yrd);
6403
369k
  }
6404
#if CONFIG_COLLECT_COMPONENT_TIMING
6405
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6406
#endif
6407
6408
#if CONFIG_COLLECT_COMPONENT_TIMING
6409
  start_timing(cpi, do_tx_search_time);
6410
#endif
6411
932k
  if (do_tx_search != 1) {
6412
    // A full tx search has not yet been done, do tx search for
6413
    // top mode candidates
6414
44.5k
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6415
44.5k
                                    yv12_mb, mi_row, mi_col, &search_state,
6416
44.5k
                                    rd_cost, ctx, &best_inter_yrd);
6417
44.5k
  }
6418
#if CONFIG_COLLECT_COMPONENT_TIMING
6419
  end_timing(cpi, do_tx_search_time);
6420
#endif
6421
6422
#if CONFIG_COLLECT_COMPONENT_TIMING
6423
  start_timing(cpi, handle_intra_mode_time);
6424
#endif
6425
  // Gate intra mode evaluation if best of inter is skip except when source
6426
  // variance is extremely low and also based on max intra bsize.
6427
932k
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6428
932k
                                 intra_cost);
6429
6430
932k
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6431
932k
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6432
932k
                                   &sf_args, intra_ref_frame_cost,
6433
932k
                                   best_inter_yrd);
6434
#if CONFIG_COLLECT_COMPONENT_TIMING
6435
  end_timing(cpi, handle_intra_mode_time);
6436
#endif
6437
6438
#if CONFIG_COLLECT_COMPONENT_TIMING
6439
  start_timing(cpi, refine_winner_mode_tx_time);
6440
#endif
6441
932k
  int winner_mode_count =
6442
932k
      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6443
  // In effect only when fast tx search speed features are enabled.
6444
932k
  refine_winner_mode_tx(
6445
932k
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6446
932k
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6447
932k
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6448
#if CONFIG_COLLECT_COMPONENT_TIMING
6449
  end_timing(cpi, refine_winner_mode_tx_time);
6450
#endif
6451
6452
  // Initialize default mode evaluation params
6453
932k
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6454
6455
  // Only try palette mode when the best mode so far is an intra mode.
6456
932k
  const int try_palette =
6457
932k
      cpi->oxcf.tool_cfg.enable_palette &&
6458
932k
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6459
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6460
932k
  RD_STATS this_rd_cost;
6461
932k
  int this_skippable = 0;
6462
932k
  if (try_palette) {
6463
#if CONFIG_COLLECT_COMPONENT_TIMING
6464
    start_timing(cpi, av1_search_palette_mode_time);
6465
#endif
6466
0
    this_skippable = av1_search_palette_mode(
6467
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6468
0
        ctx, &this_rd_cost, search_state.best_rd);
6469
#if CONFIG_COLLECT_COMPONENT_TIMING
6470
    end_timing(cpi, av1_search_palette_mode_time);
6471
#endif
6472
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6473
0
      search_state.best_mode_index = THR_DC;
6474
0
      mbmi->mv[0].as_int = 0;
6475
0
      rd_cost->rate = this_rd_cost.rate;
6476
0
      rd_cost->dist = this_rd_cost.dist;
6477
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6478
0
      search_state.best_rd = rd_cost->rdcost;
6479
0
      search_state.best_mbmode = *mbmi;
6480
0
      search_state.best_skip2 = 0;
6481
0
      search_state.best_mode_skippable = this_skippable;
6482
0
      memcpy(ctx->blk_skip, txfm_info->blk_skip,
6483
0
             sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6484
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6485
0
    }
6486
0
  }
6487
6488
932k
  search_state.best_mbmode.skip_mode = 0;
6489
932k
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6490
279k
      cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) {
6491
279k
    const struct segmentation *const seg = &cm->seg;
6492
279k
    unsigned char segment_id = mbmi->segment_id;
6493
279k
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6494
279k
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6495
279k
    }
6496
279k
  }
6497
6498
  // Make sure that the ref_mv_idx is only nonzero when we're
6499
  // using a mode which can support ref_mv_idx
6500
932k
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6501
32.2k
      !(search_state.best_mbmode.mode == NEWMV ||
6502
8.71k
        search_state.best_mbmode.mode == NEW_NEWMV ||
6503
8.71k
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6504
0
    search_state.best_mbmode.ref_mv_idx = 0;
6505
0
  }
6506
6507
932k
  if (search_state.best_mode_index == THR_INVALID ||
6508
927k
      search_state.best_rd >= best_rd_so_far) {
6509
5.51k
    rd_cost->rate = INT_MAX;
6510
5.51k
    rd_cost->rdcost = INT64_MAX;
6511
5.51k
    return;
6512
5.51k
  }
6513
6514
927k
  const InterpFilter interp_filter = features->interp_filter;
6515
927k
  assert((interp_filter == SWITCHABLE) ||
6516
927k
         (interp_filter ==
6517
927k
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6518
927k
         !is_inter_block(&search_state.best_mbmode));
6519
927k
  assert((interp_filter == SWITCHABLE) ||
6520
927k
         (interp_filter ==
6521
927k
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6522
927k
         !is_inter_block(&search_state.best_mbmode));
6523
6524
927k
  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6525
927k
    av1_update_rd_thresh_fact(
6526
927k
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6527
927k
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6528
927k
  }
6529
6530
  // macroblock modes
6531
927k
  *mbmi = search_state.best_mbmode;
6532
927k
  txfm_info->skip_txfm |= search_state.best_skip2;
6533
6534
  // Note: this section is needed since the mode may have been forced to
6535
  // GLOBALMV by the all-zero mode handling of ref-mv.
6536
927k
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6537
    // Correct the interp filters for GLOBALMV
6538
1.04k
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6539
1.04k
      int_interpfilters filters =
6540
1.04k
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6541
1.04k
      assert(mbmi->interp_filters.as_int == filters.as_int);
6542
1.04k
      (void)filters;
6543
1.04k
    }
6544
1.04k
  }
6545
6546
927k
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6547
6548
927k
  assert(search_state.best_mode_index != THR_INVALID);
6549
6550
#if CONFIG_INTERNAL_STATS
6551
  store_coding_context(x, ctx, search_state.best_mode_index,
6552
                       search_state.best_mode_skippable);
6553
#else
6554
927k
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6555
927k
#endif  // CONFIG_INTERNAL_STATS
6556
6557
927k
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6558
0
    assert(try_palette);
6559
0
    av1_restore_uv_color_map(cpi, x);
6560
0
  }
6561
927k
}
6562
6563
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6564
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6565
                                        int mi_row, int mi_col,
6566
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6567
                                        PICK_MODE_CONTEXT *ctx,
6568
0
                                        int64_t best_rd_so_far) {
6569
0
  const AV1_COMMON *const cm = &cpi->common;
6570
0
  const FeatureFlags *const features = &cm->features;
6571
0
  MACROBLOCKD *const xd = &x->e_mbd;
6572
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6573
0
  unsigned char segment_id = mbmi->segment_id;
6574
0
  const int comp_pred = 0;
6575
0
  int i;
6576
0
  unsigned int ref_costs_single[REF_FRAMES];
6577
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6578
0
  const ModeCosts *mode_costs = &x->mode_costs;
6579
0
  const int *comp_inter_cost =
6580
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6581
0
  InterpFilter best_filter = SWITCHABLE;
6582
0
  int64_t this_rd = INT64_MAX;
6583
0
  int rate2 = 0;
6584
0
  const int64_t distortion2 = 0;
6585
0
  (void)mi_row;
6586
0
  (void)mi_col;
6587
0
  (void)tile_data;
6588
6589
0
  av1_collect_neighbors_ref_counts(xd);
6590
6591
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6592
0
                           ref_costs_comp);
6593
6594
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6595
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6596
6597
0
  rd_cost->rate = INT_MAX;
6598
6599
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6600
6601
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6602
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6603
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6604
0
  mbmi->mode = GLOBALMV;
6605
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6606
0
  mbmi->uv_mode = UV_DC_PRED;
6607
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6608
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6609
0
  else
6610
0
    mbmi->ref_frame[0] = LAST_FRAME;
6611
0
  mbmi->ref_frame[1] = NONE_FRAME;
6612
0
  mbmi->mv[0].as_int =
6613
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6614
0
                           features->allow_high_precision_mv, bsize, mi_col,
6615
0
                           mi_row, features->cur_frame_force_integer_mv)
6616
0
          .as_int;
6617
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6618
0
  x->txfm_search_info.skip_txfm = 1;
6619
6620
0
  mbmi->ref_mv_idx = 0;
6621
6622
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6623
0
  av1_count_overlappable_neighbors(cm, xd);
6624
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6625
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6626
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6627
    // Select the samples according to motion vector difference
6628
0
    if (mbmi->num_proj_ref > 1) {
6629
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6630
0
                                             mbmi->num_proj_ref, bsize);
6631
0
    }
6632
0
  }
6633
6634
0
  const InterpFilter interp_filter = features->interp_filter;
6635
0
  set_default_interp_filters(mbmi, interp_filter);
6636
6637
0
  if (interp_filter != SWITCHABLE) {
6638
0
    best_filter = interp_filter;
6639
0
  } else {
6640
0
    best_filter = EIGHTTAP_REGULAR;
6641
0
    if (av1_is_interp_needed(xd)) {
6642
0
      int rs;
6643
0
      int best_rs = INT_MAX;
6644
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6645
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6646
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6647
0
                                     cm->seq_params->enable_dual_filter);
6648
0
        if (rs < best_rs) {
6649
0
          best_rs = rs;
6650
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6651
0
        }
6652
0
      }
6653
0
    }
6654
0
  }
6655
  // Set the appropriate filter
6656
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6657
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6658
0
                                   cm->seq_params->enable_dual_filter);
6659
6660
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6661
0
    rate2 += comp_inter_cost[comp_pred];
6662
6663
  // Estimate the reference frame signaling cost and add it
6664
  // to the rolling cost variable.
6665
0
  rate2 += ref_costs_single[LAST_FRAME];
6666
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6667
6668
0
  rd_cost->rate = rate2;
6669
0
  rd_cost->dist = distortion2;
6670
0
  rd_cost->rdcost = this_rd;
6671
6672
0
  if (this_rd >= best_rd_so_far) {
6673
0
    rd_cost->rate = INT_MAX;
6674
0
    rd_cost->rdcost = INT64_MAX;
6675
0
    return;
6676
0
  }
6677
6678
0
  assert((interp_filter == SWITCHABLE) ||
6679
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6680
6681
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6682
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6683
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6684
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6685
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6686
0
  }
6687
6688
#if CONFIG_INTERNAL_STATS
6689
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6690
#else
6691
0
  store_coding_context(x, ctx, 0);
6692
0
#endif  // CONFIG_INTERNAL_STATS
6693
0
}
6694
6695
/*!\cond */
6696
struct calc_target_weighted_pred_ctxt {
6697
  const OBMCBuffer *obmc_buffer;
6698
  const uint8_t *tmp;
6699
  int tmp_stride;
6700
  int overlap;
6701
};
6702
/*!\endcond */
6703
6704
static inline void calc_target_weighted_pred_above(
6705
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6706
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6707
0
  (void)nb_mi;
6708
0
  (void)num_planes;
6709
0
  (void)rel_mi_row;
6710
0
  (void)dir;
6711
6712
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6713
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6714
6715
0
  const int bw = xd->width << MI_SIZE_LOG2;
6716
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6717
6718
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6719
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6720
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6721
0
  const int is_hbd = is_cur_buf_hbd(xd);
6722
6723
0
  if (!is_hbd) {
6724
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6725
0
      const uint8_t m0 = mask1d[row];
6726
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6727
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6728
0
        wsrc[col] = m1 * tmp[col];
6729
0
        mask[col] = m0;
6730
0
      }
6731
0
      wsrc += bw;
6732
0
      mask += bw;
6733
0
      tmp += ctxt->tmp_stride;
6734
0
    }
6735
0
  } else {
6736
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6737
6738
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6739
0
      const uint8_t m0 = mask1d[row];
6740
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6741
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6742
0
        wsrc[col] = m1 * tmp16[col];
6743
0
        mask[col] = m0;
6744
0
      }
6745
0
      wsrc += bw;
6746
0
      mask += bw;
6747
0
      tmp16 += ctxt->tmp_stride;
6748
0
    }
6749
0
  }
6750
0
}
6751
6752
static inline void calc_target_weighted_pred_left(
6753
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6754
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6755
0
  (void)nb_mi;
6756
0
  (void)num_planes;
6757
0
  (void)rel_mi_col;
6758
0
  (void)dir;
6759
6760
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6761
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6762
6763
0
  const int bw = xd->width << MI_SIZE_LOG2;
6764
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6765
6766
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6767
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6768
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6769
0
  const int is_hbd = is_cur_buf_hbd(xd);
6770
6771
0
  if (!is_hbd) {
6772
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6773
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6774
0
        const uint8_t m0 = mask1d[col];
6775
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6776
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6777
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6778
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6779
0
      }
6780
0
      wsrc += bw;
6781
0
      mask += bw;
6782
0
      tmp += ctxt->tmp_stride;
6783
0
    }
6784
0
  } else {
6785
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6786
6787
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6788
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6789
0
        const uint8_t m0 = mask1d[col];
6790
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6791
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6792
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6793
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6794
0
      }
6795
0
      wsrc += bw;
6796
0
      mask += bw;
6797
0
      tmp16 += ctxt->tmp_stride;
6798
0
    }
6799
0
  }
6800
0
}
6801
6802
// This function has a structure similar to av1_build_obmc_inter_prediction
6803
//
6804
// The OBMC predictor is computed as:
6805
//
6806
//  PObmc(x,y) =
6807
//    AOM_BLEND_A64(Mh(x),
6808
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6809
//                  PLeft(x, y))
6810
//
6811
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6812
// rounding, this can be written as:
6813
//
6814
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6815
//    Mh(x) * Mv(y) * P(x,y) +
6816
//      Mh(x) * Cv(y) * Pabove(x,y) +
6817
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6818
//
6819
// Where :
6820
//
6821
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6822
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6823
//
6824
// This function computes 'wsrc' and 'mask' as:
6825
//
6826
//  wsrc(x, y) =
6827
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6828
//      Mh(x) * Cv(y) * Pabove(x,y) +
6829
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6830
//
6831
//  mask(x, y) = Mh(x) * Mv(y)
6832
//
6833
// These can then be used to efficiently approximate the error for any
6834
// predictor P in the context of the provided neighbouring predictors by
6835
// computing:
6836
//
6837
//  error(x, y) =
6838
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6839
//
6840
static inline void calc_target_weighted_pred(
6841
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6842
    const uint8_t *above, int above_stride, const uint8_t *left,
6843
0
    int left_stride) {
6844
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6845
0
  const int bw = xd->width << MI_SIZE_LOG2;
6846
0
  const int bh = xd->height << MI_SIZE_LOG2;
6847
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6848
0
  int32_t *mask_buf = obmc_buffer->mask;
6849
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6850
6851
0
  const int is_hbd = is_cur_buf_hbd(xd);
6852
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6853
6854
  // plane 0 should not be sub-sampled
6855
0
  assert(xd->plane[0].subsampling_x == 0);
6856
0
  assert(xd->plane[0].subsampling_y == 0);
6857
6858
0
  av1_zero_array(wsrc_buf, bw * bh);
6859
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6860
6861
  // handle above row
6862
0
  if (xd->up_available) {
6863
0
    const int overlap =
6864
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6865
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6866
0
                                                   above_stride, overlap };
6867
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6868
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6869
0
                                  calc_target_weighted_pred_above, &ctxt);
6870
0
  }
6871
6872
0
  for (int i = 0; i < bw * bh; ++i) {
6873
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6874
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6875
0
  }
6876
6877
  // handle left column
6878
0
  if (xd->left_available) {
6879
0
    const int overlap =
6880
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6881
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6882
0
                                                   left_stride, overlap };
6883
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6884
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6885
0
                                 calc_target_weighted_pred_left, &ctxt);
6886
0
  }
6887
6888
0
  if (!is_hbd) {
6889
0
    const uint8_t *src = x->plane[0].src.buf;
6890
6891
0
    for (int row = 0; row < bh; ++row) {
6892
0
      for (int col = 0; col < bw; ++col) {
6893
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6894
0
      }
6895
0
      wsrc_buf += bw;
6896
0
      src += x->plane[0].src.stride;
6897
0
    }
6898
0
  } else {
6899
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6900
6901
0
    for (int row = 0; row < bh; ++row) {
6902
0
      for (int col = 0; col < bw; ++col) {
6903
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6904
0
      }
6905
0
      wsrc_buf += bw;
6906
0
      src += x->plane[0].src.stride;
6907
0
    }
6908
0
  }
6909
0
}