Coverage Report

Created: 2025-06-22 08:04

/src/aom/av1/encoder/rdopt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/av1_rtcd.h"
19
20
#include "aom_dsp/aom_dsp_common.h"
21
#include "aom_dsp/blend.h"
22
#include "aom_mem/aom_mem.h"
23
#include "aom_ports/aom_timer.h"
24
#include "aom_ports/mem.h"
25
26
#include "av1/common/av1_common_int.h"
27
#include "av1/common/cfl.h"
28
#include "av1/common/blockd.h"
29
#include "av1/common/common.h"
30
#include "av1/common/common_data.h"
31
#include "av1/common/entropy.h"
32
#include "av1/common/entropymode.h"
33
#include "av1/common/idct.h"
34
#include "av1/common/mvref_common.h"
35
#include "av1/common/obmc.h"
36
#include "av1/common/pred_common.h"
37
#include "av1/common/quant_common.h"
38
#include "av1/common/reconinter.h"
39
#include "av1/common/reconintra.h"
40
#include "av1/common/scan.h"
41
#include "av1/common/seg_common.h"
42
#include "av1/common/txb_common.h"
43
#include "av1/common/warped_motion.h"
44
45
#include "av1/encoder/aq_variance.h"
46
#include "av1/encoder/av1_quantize.h"
47
#include "av1/encoder/cost.h"
48
#include "av1/encoder/compound_type.h"
49
#include "av1/encoder/encodemb.h"
50
#include "av1/encoder/encodemv.h"
51
#include "av1/encoder/encoder.h"
52
#include "av1/encoder/encodetxb.h"
53
#include "av1/encoder/hybrid_fwd_txfm.h"
54
#include "av1/encoder/interp_search.h"
55
#include "av1/encoder/intra_mode_search.h"
56
#include "av1/encoder/intra_mode_search_utils.h"
57
#include "av1/encoder/mcomp.h"
58
#include "av1/encoder/ml.h"
59
#include "av1/encoder/mode_prune_model_weights.h"
60
#include "av1/encoder/model_rd.h"
61
#include "av1/encoder/motion_search_facade.h"
62
#include "av1/encoder/palette.h"
63
#include "av1/encoder/pustats.h"
64
#include "av1/encoder/random.h"
65
#include "av1/encoder/ratectrl.h"
66
#include "av1/encoder/rd.h"
67
#include "av1/encoder/rdopt.h"
68
#include "av1/encoder/reconinter_enc.h"
69
#include "av1/encoder/tokenize.h"
70
#include "av1/encoder/tpl_model.h"
71
#include "av1/encoder/tx_search.h"
72
#include "av1/encoder/var_based_part.h"
73
74
0
#define LAST_NEW_MV_INDEX 6
75
76
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77
// The values are kept in Q12 format and equation used to derive is
78
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79
0
#define MODE_THRESH_QBITS 12
80
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104
  4144,  4120,  4096
105
};
106
107
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108
  THR_NEARESTMV,
109
  THR_NEARESTL2,
110
  THR_NEARESTL3,
111
  THR_NEARESTB,
112
  THR_NEARESTA2,
113
  THR_NEARESTA,
114
  THR_NEARESTG,
115
116
  THR_NEWMV,
117
  THR_NEWL2,
118
  THR_NEWL3,
119
  THR_NEWB,
120
  THR_NEWA2,
121
  THR_NEWA,
122
  THR_NEWG,
123
124
  THR_NEARMV,
125
  THR_NEARL2,
126
  THR_NEARL3,
127
  THR_NEARB,
128
  THR_NEARA2,
129
  THR_NEARA,
130
  THR_NEARG,
131
132
  THR_GLOBALMV,
133
  THR_GLOBALL2,
134
  THR_GLOBALL3,
135
  THR_GLOBALB,
136
  THR_GLOBALA2,
137
  THR_GLOBALA,
138
  THR_GLOBALG,
139
140
  THR_COMP_NEAREST_NEARESTLA,
141
  THR_COMP_NEAREST_NEARESTL2A,
142
  THR_COMP_NEAREST_NEARESTL3A,
143
  THR_COMP_NEAREST_NEARESTGA,
144
  THR_COMP_NEAREST_NEARESTLB,
145
  THR_COMP_NEAREST_NEARESTL2B,
146
  THR_COMP_NEAREST_NEARESTL3B,
147
  THR_COMP_NEAREST_NEARESTGB,
148
  THR_COMP_NEAREST_NEARESTLA2,
149
  THR_COMP_NEAREST_NEARESTL2A2,
150
  THR_COMP_NEAREST_NEARESTL3A2,
151
  THR_COMP_NEAREST_NEARESTGA2,
152
  THR_COMP_NEAREST_NEARESTLL2,
153
  THR_COMP_NEAREST_NEARESTLL3,
154
  THR_COMP_NEAREST_NEARESTLG,
155
  THR_COMP_NEAREST_NEARESTBA,
156
157
  THR_COMP_NEAR_NEARLB,
158
  THR_COMP_NEW_NEWLB,
159
  THR_COMP_NEW_NEARESTLB,
160
  THR_COMP_NEAREST_NEWLB,
161
  THR_COMP_NEW_NEARLB,
162
  THR_COMP_NEAR_NEWLB,
163
  THR_COMP_GLOBAL_GLOBALLB,
164
165
  THR_COMP_NEAR_NEARLA,
166
  THR_COMP_NEW_NEWLA,
167
  THR_COMP_NEW_NEARESTLA,
168
  THR_COMP_NEAREST_NEWLA,
169
  THR_COMP_NEW_NEARLA,
170
  THR_COMP_NEAR_NEWLA,
171
  THR_COMP_GLOBAL_GLOBALLA,
172
173
  THR_COMP_NEAR_NEARL2A,
174
  THR_COMP_NEW_NEWL2A,
175
  THR_COMP_NEW_NEARESTL2A,
176
  THR_COMP_NEAREST_NEWL2A,
177
  THR_COMP_NEW_NEARL2A,
178
  THR_COMP_NEAR_NEWL2A,
179
  THR_COMP_GLOBAL_GLOBALL2A,
180
181
  THR_COMP_NEAR_NEARL3A,
182
  THR_COMP_NEW_NEWL3A,
183
  THR_COMP_NEW_NEARESTL3A,
184
  THR_COMP_NEAREST_NEWL3A,
185
  THR_COMP_NEW_NEARL3A,
186
  THR_COMP_NEAR_NEWL3A,
187
  THR_COMP_GLOBAL_GLOBALL3A,
188
189
  THR_COMP_NEAR_NEARGA,
190
  THR_COMP_NEW_NEWGA,
191
  THR_COMP_NEW_NEARESTGA,
192
  THR_COMP_NEAREST_NEWGA,
193
  THR_COMP_NEW_NEARGA,
194
  THR_COMP_NEAR_NEWGA,
195
  THR_COMP_GLOBAL_GLOBALGA,
196
197
  THR_COMP_NEAR_NEARL2B,
198
  THR_COMP_NEW_NEWL2B,
199
  THR_COMP_NEW_NEARESTL2B,
200
  THR_COMP_NEAREST_NEWL2B,
201
  THR_COMP_NEW_NEARL2B,
202
  THR_COMP_NEAR_NEWL2B,
203
  THR_COMP_GLOBAL_GLOBALL2B,
204
205
  THR_COMP_NEAR_NEARL3B,
206
  THR_COMP_NEW_NEWL3B,
207
  THR_COMP_NEW_NEARESTL3B,
208
  THR_COMP_NEAREST_NEWL3B,
209
  THR_COMP_NEW_NEARL3B,
210
  THR_COMP_NEAR_NEWL3B,
211
  THR_COMP_GLOBAL_GLOBALL3B,
212
213
  THR_COMP_NEAR_NEARGB,
214
  THR_COMP_NEW_NEWGB,
215
  THR_COMP_NEW_NEARESTGB,
216
  THR_COMP_NEAREST_NEWGB,
217
  THR_COMP_NEW_NEARGB,
218
  THR_COMP_NEAR_NEWGB,
219
  THR_COMP_GLOBAL_GLOBALGB,
220
221
  THR_COMP_NEAR_NEARLA2,
222
  THR_COMP_NEW_NEWLA2,
223
  THR_COMP_NEW_NEARESTLA2,
224
  THR_COMP_NEAREST_NEWLA2,
225
  THR_COMP_NEW_NEARLA2,
226
  THR_COMP_NEAR_NEWLA2,
227
  THR_COMP_GLOBAL_GLOBALLA2,
228
229
  THR_COMP_NEAR_NEARL2A2,
230
  THR_COMP_NEW_NEWL2A2,
231
  THR_COMP_NEW_NEARESTL2A2,
232
  THR_COMP_NEAREST_NEWL2A2,
233
  THR_COMP_NEW_NEARL2A2,
234
  THR_COMP_NEAR_NEWL2A2,
235
  THR_COMP_GLOBAL_GLOBALL2A2,
236
237
  THR_COMP_NEAR_NEARL3A2,
238
  THR_COMP_NEW_NEWL3A2,
239
  THR_COMP_NEW_NEARESTL3A2,
240
  THR_COMP_NEAREST_NEWL3A2,
241
  THR_COMP_NEW_NEARL3A2,
242
  THR_COMP_NEAR_NEWL3A2,
243
  THR_COMP_GLOBAL_GLOBALL3A2,
244
245
  THR_COMP_NEAR_NEARGA2,
246
  THR_COMP_NEW_NEWGA2,
247
  THR_COMP_NEW_NEARESTGA2,
248
  THR_COMP_NEAREST_NEWGA2,
249
  THR_COMP_NEW_NEARGA2,
250
  THR_COMP_NEAR_NEWGA2,
251
  THR_COMP_GLOBAL_GLOBALGA2,
252
253
  THR_COMP_NEAR_NEARLL2,
254
  THR_COMP_NEW_NEWLL2,
255
  THR_COMP_NEW_NEARESTLL2,
256
  THR_COMP_NEAREST_NEWLL2,
257
  THR_COMP_NEW_NEARLL2,
258
  THR_COMP_NEAR_NEWLL2,
259
  THR_COMP_GLOBAL_GLOBALLL2,
260
261
  THR_COMP_NEAR_NEARLL3,
262
  THR_COMP_NEW_NEWLL3,
263
  THR_COMP_NEW_NEARESTLL3,
264
  THR_COMP_NEAREST_NEWLL3,
265
  THR_COMP_NEW_NEARLL3,
266
  THR_COMP_NEAR_NEWLL3,
267
  THR_COMP_GLOBAL_GLOBALLL3,
268
269
  THR_COMP_NEAR_NEARLG,
270
  THR_COMP_NEW_NEWLG,
271
  THR_COMP_NEW_NEARESTLG,
272
  THR_COMP_NEAREST_NEWLG,
273
  THR_COMP_NEW_NEARLG,
274
  THR_COMP_NEAR_NEWLG,
275
  THR_COMP_GLOBAL_GLOBALLG,
276
277
  THR_COMP_NEAR_NEARBA,
278
  THR_COMP_NEW_NEWBA,
279
  THR_COMP_NEW_NEARESTBA,
280
  THR_COMP_NEAREST_NEWBA,
281
  THR_COMP_NEW_NEARBA,
282
  THR_COMP_NEAR_NEWBA,
283
  THR_COMP_GLOBAL_GLOBALBA,
284
285
  THR_DC,
286
  THR_PAETH,
287
  THR_SMOOTH,
288
  THR_SMOOTH_V,
289
  THR_SMOOTH_H,
290
  THR_H_PRED,
291
  THR_V_PRED,
292
  THR_D135_PRED,
293
  THR_D203_PRED,
294
  THR_D157_PRED,
295
  THR_D67_PRED,
296
  THR_D113_PRED,
297
  THR_D45_PRED,
298
};
299
300
/*!\cond */
301
typedef struct SingleInterModeState {
302
  int64_t rd;
303
  MV_REFERENCE_FRAME ref_frame;
304
  int valid;
305
} SingleInterModeState;
306
307
typedef struct InterModeSearchState {
308
  int64_t best_rd;
309
  int64_t best_skip_rd[2];
310
  MB_MODE_INFO best_mbmode;
311
  int best_rate_y;
312
  int best_rate_uv;
313
  int best_mode_skippable;
314
  int best_skip2;
315
  THR_MODES best_mode_index;
316
  int num_available_refs;
317
  int64_t dist_refs[REF_FRAMES];
318
  int dist_order_refs[REF_FRAMES];
319
  int64_t mode_threshold[MAX_MODES];
320
  int64_t best_intra_rd;
321
  unsigned int best_pred_sse;
322
323
  /*!
324
   * \brief Keep track of best intra rd for use in compound mode.
325
   */
326
  int64_t best_pred_rd[REFERENCE_MODES];
327
  // Save a set of single_newmv for each checked ref_mv.
328
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332
  // The rd of simple translation in single inter modes
333
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int64_t best_single_rd[REF_FRAMES];
335
  PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337
  // Single search results by [directions][modes][reference frames]
338
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341
                                            [FWD_REFS];
342
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344
  IntraModeSearchState intra_search_state;
345
  RD_STATS best_y_rdcost;
346
} InterModeSearchState;
347
/*!\endcond */
348
349
0
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350
0
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352
0
    md->ready = 0;
353
0
    md->num = 0;
354
0
    md->dist_sum = 0;
355
0
    md->ld_sum = 0;
356
0
    md->sse_sum = 0;
357
0
    md->sse_sse_sum = 0;
358
0
    md->sse_ld_sum = 0;
359
0
  }
360
0
}
361
362
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363
                             int64_t sse, int *est_residue_cost,
364
0
                             int64_t *est_dist) {
365
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366
0
  if (md->ready) {
367
0
    if (sse < md->dist_mean) {
368
0
      *est_residue_cost = 0;
369
0
      *est_dist = sse;
370
0
    } else {
371
0
      *est_dist = (int64_t)round(md->dist_mean);
372
0
      const double est_ld = md->a * sse + md->b;
373
      // Clamp estimated rate cost by INT_MAX / 2.
374
      // TODO(angiebird@google.com): find better solution than clamping.
375
0
      if (fabs(est_ld) < 1e-2) {
376
0
        *est_residue_cost = INT_MAX / 2;
377
0
      } else {
378
0
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379
0
        if (est_residue_cost_dbl < 0) {
380
0
          *est_residue_cost = 0;
381
0
        } else {
382
0
          *est_residue_cost =
383
0
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384
0
        }
385
0
      }
386
0
      if (*est_residue_cost <= 0) {
387
0
        *est_residue_cost = 0;
388
0
        *est_dist = sse;
389
0
      }
390
0
    }
391
0
    return 1;
392
0
  }
393
0
  return 0;
394
0
}
395
396
0
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397
0
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398
0
    const int block_idx = inter_mode_data_block_idx(bsize);
399
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400
0
    if (block_idx == -1) continue;
401
0
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402
0
      continue;
403
0
    } else {
404
0
      if (md->ready == 0) {
405
0
        md->dist_mean = md->dist_sum / md->num;
406
0
        md->ld_mean = md->ld_sum / md->num;
407
0
        md->sse_mean = md->sse_sum / md->num;
408
0
        md->sse_sse_mean = md->sse_sse_sum / md->num;
409
0
        md->sse_ld_mean = md->sse_ld_sum / md->num;
410
0
      } else {
411
0
        const double factor = 3;
412
0
        md->dist_mean =
413
0
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414
0
        md->ld_mean =
415
0
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416
0
        md->sse_mean =
417
0
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418
0
        md->sse_sse_mean =
419
0
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420
0
            (factor + 1);
421
0
        md->sse_ld_mean =
422
0
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423
0
            (factor + 1);
424
0
      }
425
426
0
      const double my = md->ld_mean;
427
0
      const double mx = md->sse_mean;
428
0
      const double dx = sqrt(md->sse_sse_mean);
429
0
      const double dxy = md->sse_ld_mean;
430
431
0
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432
0
      md->b = my - md->a * mx;
433
0
      md->ready = 1;
434
435
0
      md->num = 0;
436
0
      md->dist_sum = 0;
437
0
      md->ld_sum = 0;
438
0
      md->sse_sum = 0;
439
0
      md->sse_sse_sum = 0;
440
0
      md->sse_ld_sum = 0;
441
0
    }
442
0
    (void)rdmult;
443
0
  }
444
0
}
445
446
static inline void inter_mode_data_push(TileDataEnc *tile_data,
447
                                        BLOCK_SIZE bsize, int64_t sse,
448
0
                                        int64_t dist, int residue_cost) {
449
0
  if (residue_cost == 0 || sse == dist) return;
450
0
  const int block_idx = inter_mode_data_block_idx(bsize);
451
0
  if (block_idx == -1) return;
452
0
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453
0
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454
0
    const double ld = (sse - dist) * 1. / residue_cost;
455
0
    ++rd_model->num;
456
0
    rd_model->dist_sum += dist;
457
0
    rd_model->ld_sum += ld;
458
0
    rd_model->sse_sum += sse;
459
0
    rd_model->sse_sse_sum += (double)sse * (double)sse;
460
0
    rd_model->sse_ld_sum += sse * ld;
461
0
  }
462
0
}
463
464
static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
465
                                         int mode_rate, int64_t sse, int64_t rd,
466
                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
467
                                         RD_STATS *rd_cost_uv,
468
0
                                         const MB_MODE_INFO *mbmi) {
469
0
  const int num = inter_modes_info->num;
470
0
  assert(num < MAX_INTER_MODES);
471
0
  inter_modes_info->mbmi_arr[num] = *mbmi;
472
0
  inter_modes_info->mode_rate_arr[num] = mode_rate;
473
0
  inter_modes_info->sse_arr[num] = sse;
474
0
  inter_modes_info->est_rd_arr[num] = rd;
475
0
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
476
0
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
477
0
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
478
0
  ++inter_modes_info->num;
479
0
}
480
481
0
static int compare_rd_idx_pair(const void *a, const void *b) {
482
0
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
483
    // To avoid inconsistency in qsort() ordering when two elements are equal,
484
    // using idx as tie breaker. Refer aomedia:2928
485
0
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
486
0
      return 0;
487
0
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
488
0
      return 1;
489
0
    else
490
0
      return -1;
491
0
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
492
0
    return 1;
493
0
  } else {
494
0
    return -1;
495
0
  }
496
0
}
497
498
static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
499
0
                                         RdIdxPair *rd_idx_pair_arr) {
500
0
  if (inter_modes_info->num == 0) {
501
0
    return;
502
0
  }
503
0
  for (int i = 0; i < inter_modes_info->num; ++i) {
504
0
    rd_idx_pair_arr[i].idx = i;
505
0
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
506
0
  }
507
0
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
508
0
        compare_rd_idx_pair);
509
0
}
510
511
// Similar to get_horver_correlation, but also takes into account first
512
// row/column, when computing horizontal/vertical correlation.
513
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
514
                                       int width, int height, float *hcorr,
515
0
                                       float *vcorr) {
516
  // The following notation is used:
517
  // x - current pixel
518
  // y - left neighbor pixel
519
  // z - top neighbor pixel
520
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
521
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
522
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
523
524
  // First, process horizontal correlation on just the first row
525
0
  x_sum += diff[0];
526
0
  x2_sum += diff[0] * diff[0];
527
0
  x_firstrow += diff[0];
528
0
  x2_firstrow += diff[0] * diff[0];
529
0
  for (int j = 1; j < width; ++j) {
530
0
    const int16_t x = diff[j];
531
0
    const int16_t y = diff[j - 1];
532
0
    x_sum += x;
533
0
    x_firstrow += x;
534
0
    x2_sum += x * x;
535
0
    x2_firstrow += x * x;
536
0
    xy_sum += x * y;
537
0
  }
538
539
  // Process vertical correlation in the first column
540
0
  x_firstcol += diff[0];
541
0
  x2_firstcol += diff[0] * diff[0];
542
0
  for (int i = 1; i < height; ++i) {
543
0
    const int16_t x = diff[i * stride];
544
0
    const int16_t z = diff[(i - 1) * stride];
545
0
    x_sum += x;
546
0
    x_firstcol += x;
547
0
    x2_sum += x * x;
548
0
    x2_firstcol += x * x;
549
0
    xz_sum += x * z;
550
0
  }
551
552
  // Now process horiz and vert correlation through the rest unit
553
0
  for (int i = 1; i < height; ++i) {
554
0
    for (int j = 1; j < width; ++j) {
555
0
      const int16_t x = diff[i * stride + j];
556
0
      const int16_t y = diff[i * stride + j - 1];
557
0
      const int16_t z = diff[(i - 1) * stride + j];
558
0
      x_sum += x;
559
0
      x2_sum += x * x;
560
0
      xy_sum += x * y;
561
0
      xz_sum += x * z;
562
0
    }
563
0
  }
564
565
0
  for (int j = 0; j < width; ++j) {
566
0
    x_finalrow += diff[(height - 1) * stride + j];
567
0
    x2_finalrow +=
568
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
569
0
  }
570
0
  for (int i = 0; i < height; ++i) {
571
0
    x_finalcol += diff[i * stride + width - 1];
572
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
573
0
  }
574
575
0
  int64_t xhor_sum = x_sum - x_finalcol;
576
0
  int64_t xver_sum = x_sum - x_finalrow;
577
0
  int64_t y_sum = x_sum - x_firstcol;
578
0
  int64_t z_sum = x_sum - x_firstrow;
579
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
580
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
581
0
  int64_t y2_sum = x2_sum - x2_firstcol;
582
0
  int64_t z2_sum = x2_sum - x2_firstrow;
583
584
0
  const float num_hor = (float)(height * (width - 1));
585
0
  const float num_ver = (float)((height - 1) * width);
586
587
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
588
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
589
590
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
591
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
592
593
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
594
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
595
596
0
  if (xhor_var_n > 0 && y_var_n > 0) {
597
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
598
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
599
0
  } else {
600
0
    *hcorr = 1.0;
601
0
  }
602
0
  if (xver_var_n > 0 && z_var_n > 0) {
603
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
604
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
605
0
  } else {
606
0
    *vcorr = 1.0;
607
0
  }
608
0
}
609
610
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
611
0
                       int64_t *sse_y) {
612
0
  const AV1_COMMON *cm = &cpi->common;
613
0
  const int num_planes = av1_num_planes(cm);
614
0
  const MACROBLOCKD *xd = &x->e_mbd;
615
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
616
0
  int64_t total_sse = 0;
617
0
  for (int plane = 0; plane < num_planes; ++plane) {
618
0
    if (plane && !xd->is_chroma_ref) break;
619
0
    const struct macroblock_plane *const p = &x->plane[plane];
620
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
621
0
    const BLOCK_SIZE bs =
622
0
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
623
0
    unsigned int sse;
624
625
0
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
626
0
                            pd->dst.stride, &sse);
627
0
    total_sse += sse;
628
0
    if (!plane && sse_y) *sse_y = sse;
629
0
  }
630
0
  total_sse <<= 4;
631
0
  return total_sse;
632
0
}
633
634
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
635
0
                          intptr_t block_size, int64_t *ssz) {
636
0
  int i;
637
0
  int64_t error = 0, sqcoeff = 0;
638
639
0
  for (i = 0; i < block_size; i++) {
640
0
    const int diff = coeff[i] - dqcoeff[i];
641
0
    error += diff * diff;
642
0
    sqcoeff += coeff[i] * coeff[i];
643
0
  }
644
645
0
  *ssz = sqcoeff;
646
0
  return error;
647
0
}
648
649
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
650
0
                             intptr_t block_size) {
651
0
  int64_t error = 0;
652
653
0
  for (int i = 0; i < block_size; i++) {
654
0
    const int diff = coeff[i] - dqcoeff[i];
655
0
    error += diff * diff;
656
0
  }
657
658
0
  return error;
659
0
}
660
661
#if CONFIG_AV1_HIGHBITDEPTH
662
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
663
                                 const tran_low_t *dqcoeff, intptr_t block_size,
664
0
                                 int64_t *ssz, int bd) {
665
0
  int i;
666
0
  int64_t error = 0, sqcoeff = 0;
667
0
  int shift = 2 * (bd - 8);
668
0
  int rounding = (1 << shift) >> 1;
669
670
0
  for (i = 0; i < block_size; i++) {
671
0
    const int64_t diff = coeff[i] - dqcoeff[i];
672
0
    error += diff * diff;
673
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
674
0
  }
675
0
  error = (error + rounding) >> shift;
676
0
  sqcoeff = (sqcoeff + rounding) >> shift;
677
678
0
  *ssz = sqcoeff;
679
0
  return error;
680
0
}
681
#endif
682
683
static int conditional_skipintra(PREDICTION_MODE mode,
684
0
                                 PREDICTION_MODE best_intra_mode) {
685
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
686
0
      best_intra_mode != D135_PRED)
687
0
    return 1;
688
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
689
0
      best_intra_mode != D45_PRED)
690
0
    return 1;
691
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
692
0
      best_intra_mode != D45_PRED)
693
0
    return 1;
694
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
695
0
      best_intra_mode != D135_PRED)
696
0
    return 1;
697
0
  return 0;
698
0
}
699
700
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
701
0
                       int16_t mode_context) {
702
0
  if (is_inter_compound_mode(mode)) {
703
0
    return mode_costs
704
0
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
705
0
  }
706
707
0
  int mode_cost = 0;
708
0
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
709
710
0
  assert(is_inter_mode(mode));
711
712
0
  if (mode == NEWMV) {
713
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
714
0
    return mode_cost;
715
0
  } else {
716
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
717
0
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
718
719
0
    if (mode == GLOBALMV) {
720
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
721
0
      return mode_cost;
722
0
    } else {
723
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
724
0
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
725
0
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
726
0
      return mode_cost;
727
0
    }
728
0
  }
729
0
}
730
731
static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
732
0
                                              int ref_idx) {
733
0
  return ref_idx ? compound_ref1_mode(this_mode)
734
0
                 : compound_ref0_mode(this_mode);
735
0
}
736
737
static inline void estimate_ref_frame_costs(
738
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
739
    int segment_id, unsigned int *ref_costs_single,
740
0
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
741
0
  int seg_ref_active =
742
0
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
743
0
  if (seg_ref_active) {
744
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
745
0
    int ref_frame;
746
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
747
0
      memset(ref_costs_comp[ref_frame], 0,
748
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
749
0
  } else {
750
0
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
751
0
    ref_costs_single[INTRA_FRAME] =
752
0
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
753
0
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
754
755
0
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
756
0
      ref_costs_single[i] = base_cost;
757
758
0
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
759
0
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
760
0
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
761
0
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
762
0
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
763
0
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
764
765
    // Determine cost of a single ref frame, where frame types are represented
766
    // by a tree:
767
    // Level 0: add cost whether this ref is a forward or backward ref
768
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
769
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
773
0
    ref_costs_single[ALTREF2_FRAME] +=
774
0
        mode_costs->single_ref_cost[ctx_p1][0][1];
775
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
776
777
    // Level 1: if this ref is forward ref,
778
    // add cost whether it is last/last2 or last3/golden
779
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
780
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
781
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
782
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
783
784
    // Level 1: if this ref is backward ref
785
    // then add cost whether this ref is altref or backward ref
786
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
787
0
    ref_costs_single[ALTREF2_FRAME] +=
788
0
        mode_costs->single_ref_cost[ctx_p2][1][0];
789
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
790
791
    // Level 2: further add cost whether this ref is last or last2
792
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
793
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
794
795
    // Level 2: last3 or golden
796
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
797
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
798
799
    // Level 2: bwdref or altref2
800
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
801
0
    ref_costs_single[ALTREF2_FRAME] +=
802
0
        mode_costs->single_ref_cost[ctx_p6][5][1];
803
804
0
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
805
      // Similar to single ref, determine cost of compound ref frames.
806
      // cost_compound_refs = cost_first_ref + cost_second_ref
807
0
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
808
0
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
809
0
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
810
0
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
811
0
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
812
813
0
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
814
0
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
815
816
0
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
817
0
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
818
0
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
819
0
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
820
0
      ref_bicomp_costs[ALTREF_FRAME] = 0;
821
822
      // cost of first ref frame
823
0
      ref_bicomp_costs[LAST_FRAME] +=
824
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
825
0
      ref_bicomp_costs[LAST2_FRAME] +=
826
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827
0
      ref_bicomp_costs[LAST3_FRAME] +=
828
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
829
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
830
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831
832
0
      ref_bicomp_costs[LAST_FRAME] +=
833
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
834
0
      ref_bicomp_costs[LAST2_FRAME] +=
835
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
836
837
0
      ref_bicomp_costs[LAST3_FRAME] +=
838
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
839
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
840
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
841
842
      // cost of second ref frame
843
0
      ref_bicomp_costs[BWDREF_FRAME] +=
844
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
845
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
846
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847
0
      ref_bicomp_costs[ALTREF_FRAME] +=
848
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
849
850
0
      ref_bicomp_costs[BWDREF_FRAME] +=
851
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
852
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
853
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
854
855
      // cost: if one ref frame is forward ref, the other ref is backward ref
856
0
      int ref0, ref1;
857
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
858
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
859
0
          ref_costs_comp[ref0][ref1] =
860
0
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
861
0
        }
862
0
      }
863
864
      // cost: if both ref frames are the same side.
865
0
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
866
0
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
867
0
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
868
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
869
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
870
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
871
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
872
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
873
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
874
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
875
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
876
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
877
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
878
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
879
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
880
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
881
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
882
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
883
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
884
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
885
0
    } else {
886
0
      int ref0, ref1;
887
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
888
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
889
0
          ref_costs_comp[ref0][ref1] = 512;
890
0
      }
891
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
892
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
893
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
894
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
895
0
    }
896
0
  }
897
0
}
898
899
static inline void store_coding_context(
900
#if CONFIG_INTERNAL_STATS
901
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
902
#else
903
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
904
#endif  // CONFIG_INTERNAL_STATS
905
0
    int skippable) {
906
0
  MACROBLOCKD *const xd = &x->e_mbd;
907
908
  // Take a snapshot of the coding context so it can be
909
  // restored if we decide to encode this way
910
0
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
911
0
  ctx->skippable = skippable;
912
#if CONFIG_INTERNAL_STATS
913
  ctx->best_mode_index = mode_index;
914
#endif  // CONFIG_INTERNAL_STATS
915
0
  ctx->mic = *xd->mi[0];
916
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
917
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
918
0
}
919
920
static inline void setup_buffer_ref_mvs_inter(
921
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
922
0
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
923
0
  const AV1_COMMON *cm = &cpi->common;
924
0
  const int num_planes = av1_num_planes(cm);
925
0
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
926
0
      av1_get_scaled_ref_frame(cpi, ref_frame);
927
0
  MACROBLOCKD *const xd = &x->e_mbd;
928
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
929
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
930
0
  const struct scale_factors *const sf =
931
0
      get_ref_scale_factors_const(cm, ref_frame);
932
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
933
0
  assert(yv12 != NULL);
934
935
0
  if (scaled_ref_frame) {
936
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
937
    // support scaling.
938
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
939
0
                         num_planes);
940
0
  } else {
941
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
942
0
  }
943
944
  // Gets an initial list of candidate vectors from neighbours and orders them
945
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
946
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
947
0
                   mbmi_ext->mode_context);
948
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
949
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
950
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
951
  // Further refinement that is encode side only to test the top few candidates
952
  // in full and choose the best as the center point for subsequent searches.
953
  // The current implementation doesn't support scaling.
954
0
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
955
0
              ref_frame, block_size);
956
957
  // Go back to unscaled reference.
958
0
  if (scaled_ref_frame) {
959
    // We had temporarily setup pred block based on scaled reference above. Go
960
    // back to unscaled reference now, for subsequent use.
961
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
962
0
  }
963
0
}
964
965
0
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
966
0
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
967
968
// TODO(jingning): this mv clamping function should be block size dependent.
969
0
static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
970
0
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
971
0
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
972
0
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
973
0
                                     xd->mb_to_bottom_edge +
974
0
                                         RIGHT_BOTTOM_MARGIN };
975
0
  clamp_mv(mv, &mv_limits);
976
0
}
977
978
/* If the current mode shares the same mv with other modes with higher cost,
979
 * skip this mode. */
980
static int skip_repeated_mv(const AV1_COMMON *const cm,
981
                            const MACROBLOCK *const x,
982
                            PREDICTION_MODE this_mode,
983
                            const MV_REFERENCE_FRAME ref_frames[2],
984
0
                            InterModeSearchState *search_state) {
985
0
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
986
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
987
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
988
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
989
0
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
990
0
  if (!is_comp_pred) {
991
0
    if (this_mode == NEARMV) {
992
0
      if (ref_mv_count == 0) {
993
        // NEARMV has the same motion vector as NEARESTMV
994
0
        compare_mode = NEARESTMV;
995
0
      }
996
0
      if (ref_mv_count == 1 &&
997
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
998
        // NEARMV has the same motion vector as GLOBALMV
999
0
        compare_mode = GLOBALMV;
1000
0
      }
1001
0
    }
1002
0
    if (this_mode == GLOBALMV) {
1003
0
      if (ref_mv_count == 0 &&
1004
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1005
        // GLOBALMV has the same motion vector as NEARESTMV
1006
0
        compare_mode = NEARESTMV;
1007
0
      }
1008
0
      if (ref_mv_count == 1) {
1009
        // GLOBALMV has the same motion vector as NEARMV
1010
0
        compare_mode = NEARMV;
1011
0
      }
1012
0
    }
1013
1014
0
    if (compare_mode != MB_MODE_COUNT) {
1015
      // Use modelled_rd to check whether compare mode was searched
1016
0
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1017
0
          INT64_MAX) {
1018
0
        const int16_t mode_ctx =
1019
0
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1020
0
        const int compare_cost =
1021
0
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1022
0
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1023
1024
        // Only skip if the mode cost is larger than compare mode cost
1025
0
        if (this_cost > compare_cost) {
1026
0
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1027
0
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1028
0
          return 1;
1029
0
        }
1030
0
      }
1031
0
    }
1032
0
  }
1033
0
  return 0;
1034
0
}
1035
1036
static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1037
                                     const AV1_COMMON *cm,
1038
0
                                     const MACROBLOCK *x) {
1039
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1040
0
  *out_mv = in_mv;
1041
0
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1042
0
                     cm->features.cur_frame_force_integer_mv);
1043
0
  clamp_mv2(&out_mv->as_mv, xd);
1044
0
  return av1_is_fullmv_in_range(&x->mv_limits,
1045
0
                                get_fullmv_from_mv(&out_mv->as_mv));
1046
0
}
1047
1048
// To use single newmv directly for compound modes, need to clamp the mv to the
1049
// valid mv range. Without this, encoder would generate out of range mv, and
1050
// this is seen in 8k encoding.
1051
static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1052
0
                                     int ref_idx) {
1053
0
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1054
0
  SubpelMvLimits mv_limits;
1055
1056
0
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1057
0
  clamp_mv(&mv->as_mv, &mv_limits);
1058
0
}
1059
1060
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1061
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1062
                            int *const rate_mv, HandleInterModeArgs *const args,
1063
0
                            inter_mode_info *mode_info) {
1064
0
  MACROBLOCKD *const xd = &x->e_mbd;
1065
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1066
0
  const int is_comp_pred = has_second_ref(mbmi);
1067
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1068
0
  const int refs[2] = { mbmi->ref_frame[0],
1069
0
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1070
0
  const int ref_mv_idx = mbmi->ref_mv_idx;
1071
1072
0
  if (is_comp_pred) {
1073
0
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1074
0
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1075
0
    if (this_mode == NEW_NEWMV) {
1076
0
      if (valid_mv0) {
1077
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1078
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1079
0
      }
1080
0
      if (valid_mv1) {
1081
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1082
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1083
0
      }
1084
0
      *rate_mv = 0;
1085
0
      for (int i = 0; i < 2; ++i) {
1086
0
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1087
0
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1088
0
                                    x->mv_costs->nmv_joint_cost,
1089
0
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1090
0
      }
1091
0
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1092
0
      if (valid_mv1) {
1093
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1094
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1095
0
      }
1096
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1097
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1098
0
                                 x->mv_costs->nmv_joint_cost,
1099
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1100
0
    } else {
1101
0
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1102
0
      if (valid_mv0) {
1103
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1104
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1105
0
      }
1106
0
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1107
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1108
0
                                 x->mv_costs->nmv_joint_cost,
1109
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1110
0
    }
1111
0
  } else {
1112
    // Single ref case.
1113
0
    const int ref_idx = 0;
1114
0
    int search_range = INT_MAX;
1115
1116
0
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1117
0
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1118
0
      int min_mv_diff = INT_MAX;
1119
0
      int best_match = -1;
1120
0
      MV prev_ref_mv[2] = { { 0 } };
1121
0
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1122
0
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1123
0
                                                     idx, &x->mbmi_ext)
1124
0
                               .as_mv;
1125
0
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1126
0
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1127
1128
0
        if (min_mv_diff > ref_mv_diff) {
1129
0
          min_mv_diff = ref_mv_diff;
1130
0
          best_match = idx;
1131
0
        }
1132
0
      }
1133
1134
0
      if (min_mv_diff < (16 << 3)) {
1135
0
        if (args->single_newmv_valid[best_match][refs[0]]) {
1136
0
          search_range = min_mv_diff;
1137
0
          search_range +=
1138
0
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1139
0
                         prev_ref_mv[best_match].row),
1140
0
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1141
0
                         prev_ref_mv[best_match].col));
1142
          // Get full pixel search range.
1143
0
          search_range = (search_range + 4) >> 3;
1144
0
        }
1145
0
      }
1146
0
    }
1147
1148
0
    int_mv best_mv;
1149
0
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1150
0
                             mode_info, &best_mv, args);
1151
0
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1152
1153
0
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1154
0
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1155
0
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1156
0
    cur_mv[0].as_int = best_mv.as_int;
1157
1158
    // Return after single_newmv is set.
1159
0
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1160
0
  }
1161
1162
0
  return 0;
1163
0
}
1164
1165
static inline void update_mode_start_end_index(
1166
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1167
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1168
0
    int interintra_allowed, int eval_motion_mode) {
1169
0
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1170
0
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1171
0
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1172
0
    if (!eval_motion_mode) {
1173
0
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1174
0
    } else {
1175
      // Set the start index appropriately to process motion modes other than
1176
      // simple translation
1177
0
      *mode_index_start = 1;
1178
0
    }
1179
0
  }
1180
0
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1181
0
    *mode_index_end = SIMPLE_TRANSLATION;
1182
0
}
1183
1184
/*!\brief AV1 motion mode search
1185
 *
1186
 * \ingroup inter_mode_search
1187
 * Function to search over and determine the motion mode. It will update
1188
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1189
 * WARPED_CAUSAL and determine any necessary side information for the selected
1190
 * motion mode. It will also perform the full transform search, unless the
1191
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1192
 * than an RD corresponding to a full transform search. It will return the
1193
 * RD for the final motion_mode.
1194
 * Do the RD search for a given inter mode and compute all information relevant
1195
 * to the input mode. It will compute the best MV,
1196
 * compound parameters (if the mode is a compound mode) and interpolation filter
1197
 * parameters.
1198
 *
1199
 * \param[in]     cpi               Top-level encoder structure.
1200
 * \param[in]     tile_data         Pointer to struct holding adaptive
1201
 *                                  data/contexts/models for the tile during
1202
 *                                  encoding.
1203
 * \param[in]     x                 Pointer to struct holding all the data for
1204
 *                                  the current macroblock.
1205
 * \param[in]     bsize             Current block size.
1206
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1207
 *                                  information.
1208
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1209
 *                                  for only the Y plane.
1210
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1211
 *                                  for only the UV planes.
1212
 * \param[in]     args              HandleInterModeArgs struct holding
1213
 *                                  miscellaneous arguments for inter mode
1214
 *                                  search. See the documentation for this
1215
 *                                  struct for a description of each member.
1216
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1217
 *                                  It is used for early termination of this
1218
 *                                  search if the RD exceeds this value.
1219
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1220
 *                                  best total RD for a skip mode so far, and
1221
 *                                  skip_rd[1] is the best RD for a skip mode so
1222
 *                                  far in luma. This is used as a speed feature
1223
 *                                  to skip the transform search if the computed
1224
 *                                  skip RD for the current mode is not better
1225
 *                                  than the best skip_rd so far.
1226
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1227
 *                                  This will be modified if a motion search is
1228
 *                                  done in the motion mode search.
1229
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1230
 *                                  prediction. This will eventually hold the
1231
 *                                  final prediction, and the tmp_dst info will
1232
 *                                  be copied here.
1233
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1234
 *                                  do_tx_search (see below) is 0.
1235
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1236
 *                                  a full transform search. This will compute
1237
 *                                  an estimated RD for the modes without the
1238
 *                                  transform search and later perform the full
1239
 *                                  transform search on the best candidates.
1240
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1241
 *                                  information to perform a full transform
1242
 *                                  search only on winning candidates searched
1243
 *                                  with an estimate for transform coding RD.
1244
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1245
 *                                  motion modes other than SIMPLE_TRANSLATION.
1246
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1247
 *                                  the luma plane.
1248
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1249
 * current motion mode being tested should be skipped. It returns 0 if the
1250
 * motion mode search is a success.
1251
 */
1252
static int64_t motion_mode_rd(
1253
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1254
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1255
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1256
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1257
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1258
0
    int eval_motion_mode, int64_t *yrd) {
1259
0
  const AV1_COMMON *const cm = &cpi->common;
1260
0
  const FeatureFlags *const features = &cm->features;
1261
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1262
0
  const int num_planes = av1_num_planes(cm);
1263
0
  MACROBLOCKD *xd = &x->e_mbd;
1264
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1265
0
  const int is_comp_pred = has_second_ref(mbmi);
1266
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1267
0
  const int rate2_nocoeff = rd_stats->rate;
1268
0
  int best_xskip_txfm = 0;
1269
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1270
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1271
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1272
0
  const int rate_mv0 = *rate_mv;
1273
0
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1274
0
                                 is_interintra_allowed(mbmi) &&
1275
0
                                 mbmi->compound_idx;
1276
0
  WARP_SAMPLE_INFO *const warp_sample_info =
1277
0
      &x->warp_sample_info[mbmi->ref_frame[0]];
1278
0
  int *pts0 = warp_sample_info->pts;
1279
0
  int *pts_inref0 = warp_sample_info->pts_inref;
1280
1281
0
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1282
0
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1283
0
  av1_invalid_rd_stats(&best_rd_stats);
1284
0
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1285
0
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1286
0
  *yrd = INT64_MAX;
1287
0
  if (features->switchable_motion_mode) {
1288
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1289
    // is allowed.
1290
0
    last_motion_mode_allowed = motion_mode_allowed(
1291
0
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1292
0
  }
1293
1294
0
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1295
    // Collect projection samples used in least squares approximation of
1296
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1297
0
    if (warp_sample_info->num < 0) {
1298
0
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1299
0
    }
1300
0
    mbmi->num_proj_ref = warp_sample_info->num;
1301
0
  }
1302
0
  const int total_samples = mbmi->num_proj_ref;
1303
0
  if (total_samples == 0) {
1304
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1305
    // warped parameters.
1306
0
    last_motion_mode_allowed = OBMC_CAUSAL;
1307
0
  }
1308
1309
0
  const MB_MODE_INFO base_mbmi = *mbmi;
1310
0
  MB_MODE_INFO best_mbmi;
1311
0
  const int interp_filter = features->interp_filter;
1312
0
  const int switchable_rate =
1313
0
      av1_is_interp_needed(xd)
1314
0
          ? av1_get_switchable_rate(x, xd, interp_filter,
1315
0
                                    cm->seq_params->enable_dual_filter)
1316
0
          : 0;
1317
0
  int64_t best_rd = INT64_MAX;
1318
0
  int best_rate_mv = rate_mv0;
1319
0
  const int mi_row = xd->mi_row;
1320
0
  const int mi_col = xd->mi_col;
1321
0
  int mode_index_start, mode_index_end;
1322
0
  const int txfm_rd_gate_level =
1323
0
      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1324
0
                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1325
0
                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
1326
1327
  // Modify the start and end index according to speed features. For example,
1328
  // if SIMPLE_TRANSLATION has already been searched according to
1329
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1330
  // to avoid searching it again.
1331
0
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1332
0
                              last_motion_mode_allowed, interintra_allowed,
1333
0
                              eval_motion_mode);
1334
  // Main function loop. This loops over all of the possible motion modes and
1335
  // computes RD to determine the best one. This process includes computing
1336
  // any necessary side information for the motion mode and performing the
1337
  // transform search.
1338
0
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1339
0
       mode_index++) {
1340
0
    if (args->skip_motion_mode && mode_index) continue;
1341
0
    int tmp_rate2 = rate2_nocoeff;
1342
0
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1343
0
    int tmp_rate_mv = rate_mv0;
1344
1345
0
    *mbmi = base_mbmi;
1346
0
    if (is_interintra_mode) {
1347
      // Only use SIMPLE_TRANSLATION for interintra
1348
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1349
0
    } else {
1350
0
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1351
0
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1352
0
    }
1353
1354
    // Do not search OBMC if the probability of selecting it is below a
1355
    // predetermined threshold for this update_type and block size.
1356
0
    const FRAME_UPDATE_TYPE update_type =
1357
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1358
0
    int use_actual_frame_probs = 1;
1359
0
    int prune_obmc;
1360
#if CONFIG_FPMT_TEST
1361
    use_actual_frame_probs =
1362
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1363
    if (!use_actual_frame_probs) {
1364
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1365
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1366
    }
1367
#endif
1368
0
    if (use_actual_frame_probs) {
1369
0
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1370
0
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1371
0
    }
1372
0
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1373
0
        mbmi->motion_mode == OBMC_CAUSAL)
1374
0
      continue;
1375
1376
0
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1377
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1378
      // The prediction is calculated before motion_mode_rd() is called in
1379
      // handle_inter_mode()
1380
0
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1381
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1382
      // OBMC_CAUSAL not allowed for compound prediction
1383
0
      assert(!is_comp_pred);
1384
0
      if (have_newmv_in_inter_mode(this_mode)) {
1385
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1386
0
                                 &mbmi->mv[0], NULL);
1387
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1388
0
      }
1389
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1390
        // Build the predictor according to the current motion vector if it has
1391
        // not already been built
1392
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1393
0
                                      0, av1_num_planes(cm) - 1);
1394
0
      }
1395
      // Build the inter predictor by blending the predictor corresponding to
1396
      // this MV, and the neighboring blocks using the OBMC model
1397
0
      av1_build_obmc_inter_prediction(
1398
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1399
0
          args->left_pred_buf, args->left_pred_stride);
1400
0
#if !CONFIG_REALTIME_ONLY
1401
0
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1402
0
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1403
0
      mbmi->motion_mode = WARPED_CAUSAL;
1404
0
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1405
0
      mbmi->interp_filters =
1406
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1407
1408
0
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1409
0
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1410
      // Select the samples according to motion vector difference
1411
0
      if (mbmi->num_proj_ref > 1) {
1412
0
        mbmi->num_proj_ref = av1_selectSamples(
1413
0
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1414
0
      }
1415
1416
      // Compute the warped motion parameters with a least squares fit
1417
      //  using the collected samples
1418
0
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1419
0
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1420
0
                               &mbmi->wm_params, mi_row, mi_col)) {
1421
0
        assert(!is_comp_pred);
1422
0
        if (have_newmv_in_inter_mode(this_mode)) {
1423
          // Refine MV for NEWMV mode
1424
0
          const int_mv mv0 = mbmi->mv[0];
1425
0
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1426
0
          const int num_proj_ref0 = mbmi->num_proj_ref;
1427
1428
0
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1429
0
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1430
0
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1431
0
                                            &ref_mv.as_mv, NULL);
1432
1433
          // Refine MV in a small range.
1434
0
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1435
0
                               total_samples, cpi->sf.mv_sf.warp_search_method,
1436
0
                               cpi->sf.mv_sf.warp_search_iters);
1437
1438
0
          if (mv0.as_int != mbmi->mv[0].as_int) {
1439
            // Keep the refined MV and WM parameters.
1440
0
            tmp_rate_mv = av1_mv_bit_cost(
1441
0
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1442
0
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1443
0
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1444
0
          } else {
1445
            // Restore the old MV and WM parameters.
1446
0
            mbmi->mv[0] = mv0;
1447
0
            mbmi->wm_params = wm_params0;
1448
0
            mbmi->num_proj_ref = num_proj_ref0;
1449
0
          }
1450
0
        }
1451
1452
        // Build the warped predictor
1453
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1454
0
                                      av1_num_planes(cm) - 1);
1455
0
      } else {
1456
0
        continue;
1457
0
      }
1458
0
#endif  // !CONFIG_REALTIME_ONLY
1459
0
    } else if (is_interintra_mode) {
1460
0
      const int ret =
1461
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1462
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1463
0
      if (ret < 0) continue;
1464
0
    }
1465
1466
    // If we are searching newmv and the mv is the same as refmv, skip the
1467
    // current mode
1468
0
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1469
1470
    // Update rd_stats for the current motion mode
1471
0
    txfm_info->skip_txfm = 0;
1472
0
    rd_stats->dist = 0;
1473
0
    rd_stats->sse = 0;
1474
0
    rd_stats->skip_txfm = 1;
1475
0
    rd_stats->rate = tmp_rate2;
1476
0
    const ModeCosts *mode_costs = &x->mode_costs;
1477
0
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1478
0
    if (interintra_allowed) {
1479
0
      rd_stats->rate +=
1480
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1481
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1482
0
    }
1483
0
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1484
0
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1485
0
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1486
0
        rd_stats->rate +=
1487
0
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1488
0
      } else {
1489
0
        rd_stats->rate +=
1490
0
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1491
0
      }
1492
0
    }
1493
1494
0
    int64_t this_yrd = INT64_MAX;
1495
1496
0
    if (!do_tx_search) {
1497
      // Avoid doing a transform search here to speed up the overall mode
1498
      // search. It will be done later in the mode search if the current
1499
      // motion mode seems promising.
1500
0
      int64_t curr_sse = -1;
1501
0
      int64_t sse_y = -1;
1502
0
      int est_residue_cost = 0;
1503
0
      int64_t est_dist = 0;
1504
0
      int64_t est_rd = 0;
1505
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1506
0
        curr_sse = get_sse(cpi, x, &sse_y);
1507
0
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1508
0
                                                 &est_residue_cost, &est_dist);
1509
0
        (void)has_est_rd;
1510
0
        assert(has_est_rd);
1511
0
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1512
0
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1513
0
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1514
0
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1515
0
            NULL, &curr_sse, NULL, NULL, NULL);
1516
0
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1517
0
      }
1518
0
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1519
0
      if (est_rd * 0.80 > *best_est_rd) {
1520
0
        mbmi->ref_frame[1] = ref_frame_1;
1521
0
        continue;
1522
0
      }
1523
0
      const int mode_rate = rd_stats->rate;
1524
0
      rd_stats->rate += est_residue_cost;
1525
0
      rd_stats->dist = est_dist;
1526
0
      rd_stats->rdcost = est_rd;
1527
0
      if (rd_stats->rdcost < *best_est_rd) {
1528
0
        *best_est_rd = rd_stats->rdcost;
1529
0
        assert(sse_y >= 0);
1530
0
        ref_skip_rd[1] = txfm_rd_gate_level
1531
0
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1532
0
                             : INT64_MAX;
1533
0
      }
1534
0
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1535
0
        if (!is_comp_pred) {
1536
0
          assert(curr_sse >= 0);
1537
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1538
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1539
0
                                rd_stats_uv, mbmi);
1540
0
        }
1541
0
      } else {
1542
0
        assert(curr_sse >= 0);
1543
0
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1544
0
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1545
0
                              rd_stats_uv, mbmi);
1546
0
      }
1547
0
      mbmi->skip_txfm = 0;
1548
0
    } else {
1549
      // Perform full transform search
1550
0
      int64_t skip_rd = INT64_MAX;
1551
0
      int64_t skip_rdy = INT64_MAX;
1552
0
      if (txfm_rd_gate_level) {
1553
        // Check if the mode is good enough based on skip RD
1554
0
        int64_t sse_y = INT64_MAX;
1555
0
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1556
0
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1557
0
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1558
0
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1559
0
                                        txfm_rd_gate_level, 0);
1560
0
        if (!eval_txfm) continue;
1561
0
      }
1562
1563
      // Do transform search
1564
0
      const int mode_rate = rd_stats->rate;
1565
0
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1566
0
                           rd_stats->rate, ref_best_rd)) {
1567
0
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1568
0
          return INT64_MAX;
1569
0
        }
1570
0
        continue;
1571
0
      }
1572
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1573
0
      const int y_rate =
1574
0
          rd_stats->skip_txfm
1575
0
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1576
0
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1577
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1578
1579
0
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1580
0
      if (curr_rd < ref_best_rd) {
1581
0
        ref_best_rd = curr_rd;
1582
0
        ref_skip_rd[0] = skip_rd;
1583
0
        ref_skip_rd[1] = skip_rdy;
1584
0
      }
1585
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1586
0
        inter_mode_data_push(
1587
0
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1588
0
            rd_stats_y->rate + rd_stats_uv->rate +
1589
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1590
0
      }
1591
0
    }
1592
1593
0
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1594
0
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1595
0
        mbmi->interp_filters =
1596
0
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1597
0
      }
1598
0
    }
1599
1600
0
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1601
0
    if (mode_index == 0) {
1602
0
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1603
0
    }
1604
0
    if (mode_index == 0 || tmp_rd < best_rd) {
1605
      // Update best_rd data if this is the best motion mode so far
1606
0
      best_mbmi = *mbmi;
1607
0
      best_rd = tmp_rd;
1608
0
      best_rd_stats = *rd_stats;
1609
0
      best_rd_stats_y = *rd_stats_y;
1610
0
      best_rate_mv = tmp_rate_mv;
1611
0
      *yrd = this_yrd;
1612
0
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1613
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
1614
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1615
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1616
0
      best_xskip_txfm = mbmi->skip_txfm;
1617
0
    }
1618
0
  }
1619
  // Update RD and mbmi stats for selected motion mode
1620
0
  mbmi->ref_frame[1] = ref_frame_1;
1621
0
  *rate_mv = best_rate_mv;
1622
0
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1623
0
    av1_invalid_rd_stats(rd_stats);
1624
0
    restore_dst_buf(xd, *orig_dst, num_planes);
1625
0
    return INT64_MAX;
1626
0
  }
1627
0
  *mbmi = best_mbmi;
1628
0
  *rd_stats = best_rd_stats;
1629
0
  *rd_stats_y = best_rd_stats_y;
1630
0
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1631
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
1632
0
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1633
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1634
0
  txfm_info->skip_txfm = best_xskip_txfm;
1635
1636
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1637
0
  return 0;
1638
0
}
1639
1640
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1641
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1642
0
                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
1643
0
  assert(bsize < BLOCK_SIZES_ALL);
1644
0
  const AV1_COMMON *cm = &cpi->common;
1645
0
  const int num_planes = av1_num_planes(cm);
1646
0
  MACROBLOCKD *const xd = &x->e_mbd;
1647
0
  const int mi_row = xd->mi_row;
1648
0
  const int mi_col = xd->mi_col;
1649
0
  int64_t total_sse = 0;
1650
0
  int64_t this_rd = INT64_MAX;
1651
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1652
0
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1653
1654
0
  for (int plane = 0; plane < num_planes; ++plane) {
1655
    // Call av1_enc_build_inter_predictor() for one plane at a time.
1656
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1657
0
                                  plane, plane);
1658
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1659
0
    const BLOCK_SIZE plane_bsize =
1660
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1661
1662
0
    av1_subtract_plane(x, plane_bsize, plane);
1663
1664
0
    int64_t sse =
1665
0
        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1666
0
    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1667
0
    sse <<= 4;
1668
0
    total_sse += sse;
1669
    // When current rd cost is more than the best rd, skip evaluation of
1670
    // remaining planes.
1671
0
    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1672
0
    if (this_rd > best_rd) break;
1673
0
  }
1674
1675
0
  rd_stats->dist = rd_stats->sse = total_sse;
1676
0
  rd_stats->rdcost = this_rd;
1677
1678
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1679
0
  return 0;
1680
0
}
1681
1682
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1683
// mode
1684
// Note(rachelbarker): This speed feature currently does not interact correctly
1685
// with global motion. The issue is that, when global motion is used, GLOBALMV
1686
// produces a different prediction to NEARESTMV/NEARMV even if the motion
1687
// vectors are the same. Thus GLOBALMV should not be pruned in this case.
1688
static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1689
                                      int ref_idx,
1690
                                      const MV_REFERENCE_FRAME *ref_frame,
1691
0
                                      PREDICTION_MODE single_mode) {
1692
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1693
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1694
0
  assert(single_mode != NEWMV);
1695
0
  if (single_mode == NEARESTMV) {
1696
0
    return 0;
1697
0
  } else if (single_mode == NEARMV) {
1698
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1699
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1700
0
    if (ref_mv_count < 2) return 1;
1701
0
  } else if (single_mode == GLOBALMV) {
1702
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1703
0
    if (ref_mv_count == 0) return 1;
1704
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1705
0
    else if (ref_mv_count == 1)
1706
0
      return 0;
1707
1708
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1709
    // Check GLOBALMV is matching with any mv in ref_mv_stack
1710
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1711
0
      int_mv this_mv;
1712
1713
0
      if (ref_idx == 0)
1714
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1715
0
      else
1716
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1717
1718
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1719
0
        return 1;
1720
0
    }
1721
0
  }
1722
0
  return 0;
1723
0
}
1724
1725
static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1726
                              int ref_idx, int ref_mv_idx,
1727
                              int skip_repeated_ref_mv,
1728
                              const MV_REFERENCE_FRAME *ref_frame,
1729
0
                              const MB_MODE_INFO_EXT *mbmi_ext) {
1730
0
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1731
0
  assert(is_inter_singleref_mode(single_mode));
1732
0
  if (single_mode == NEWMV) {
1733
0
    this_mv->as_int = INVALID_MV;
1734
0
  } else if (single_mode == GLOBALMV) {
1735
0
    if (skip_repeated_ref_mv &&
1736
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1737
0
      return 0;
1738
0
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1739
0
  } else {
1740
0
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
1741
0
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1742
0
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1743
0
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1744
0
      assert(ref_mv_offset >= 0);
1745
0
      if (ref_idx == 0) {
1746
0
        *this_mv =
1747
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1748
0
      } else {
1749
0
        *this_mv =
1750
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1751
0
      }
1752
0
    } else {
1753
0
      if (skip_repeated_ref_mv &&
1754
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1755
0
        return 0;
1756
0
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1757
0
    }
1758
0
  }
1759
0
  return 1;
1760
0
}
1761
1762
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1763
// population
1764
static inline int skip_nearest_near_mv_using_refmv_weight(
1765
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1766
0
    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1767
0
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1768
  // Do not skip the mode if the current block has not yet obtained a valid
1769
  // inter mode.
1770
0
  if (!is_inter_mode(best_mode)) return 0;
1771
1772
0
  const MACROBLOCKD *xd = &x->e_mbd;
1773
  // Do not skip the mode if both the top and left neighboring blocks are not
1774
  // available.
1775
0
  if (!xd->left_available || !xd->up_available) return 0;
1776
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1777
0
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1778
0
  const int ref_mv_count =
1779
0
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1780
1781
0
  if (ref_mv_count == 0) return 0;
1782
  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1783
0
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1784
1785
  // Count number of ref mvs populated from nearest candidates
1786
0
  int nearest_refmv_count = 0;
1787
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1788
0
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1789
0
  }
1790
1791
  // nearest_refmv_count indicates the closeness of block motion characteristics
1792
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1793
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1794
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1795
  // mode since these modes work well for blocks that shares similar motion
1796
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1797
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1798
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
1799
0
  const int prune_thresh = 1 + (ref_mv_count >= 2);
1800
0
  if (nearest_refmv_count < prune_thresh) return 1;
1801
0
  return 0;
1802
0
}
1803
1804
// This function update the non-new mv for the current prediction mode
1805
static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1806
                               const AV1_COMMON *cm, const MACROBLOCK *x,
1807
0
                               int skip_repeated_ref_mv) {
1808
0
  const MACROBLOCKD *xd = &x->e_mbd;
1809
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
1810
0
  const int is_comp_pred = has_second_ref(mbmi);
1811
1812
0
  int ret = 1;
1813
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
1814
0
    int_mv this_mv;
1815
0
    this_mv.as_int = INVALID_MV;
1816
0
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1817
0
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1818
0
    if (!ret) return 0;
1819
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1820
0
    if (single_mode == NEWMV) {
1821
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1822
0
      cur_mv[i] =
1823
0
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1824
0
                         .this_mv
1825
0
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1826
0
                         .comp_mv;
1827
0
    } else {
1828
0
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1829
0
    }
1830
0
  }
1831
0
  return ret;
1832
0
}
1833
1834
static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
1835
                               const MB_MODE_INFO_EXT *mbmi_ext,
1836
                               const int (*const drl_mode_cost0)[2],
1837
0
                               int8_t ref_frame_type) {
1838
0
  int cost = 0;
1839
0
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1840
0
    for (int idx = 0; idx < 2; ++idx) {
1841
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1842
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1843
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1844
0
        if (mbmi->ref_mv_idx == idx) return cost;
1845
0
      }
1846
0
    }
1847
0
    return cost;
1848
0
  }
1849
1850
0
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
1851
0
    for (int idx = 1; idx < 3; ++idx) {
1852
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1853
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1854
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1855
0
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1856
0
      }
1857
0
    }
1858
0
    return cost;
1859
0
  }
1860
0
  return cost;
1861
0
}
1862
1863
static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
1864
                                        const MB_MODE_INFO *const mbmi,
1865
0
                                        PREDICTION_MODE this_mode) {
1866
0
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1867
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1868
0
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1869
0
    if (single_mode == NEWMV &&
1870
0
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1871
0
      return 0;
1872
0
    }
1873
0
  }
1874
0
  return 1;
1875
0
}
1876
1877
static int get_drl_refmv_count(const MACROBLOCK *const x,
1878
                               const MV_REFERENCE_FRAME *ref_frame,
1879
0
                               PREDICTION_MODE mode) {
1880
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1881
0
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1882
0
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1883
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1884
0
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1885
0
  const int has_drl =
1886
0
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1887
0
  const int ref_set =
1888
0
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1889
1890
0
  return ref_set;
1891
0
}
1892
1893
// Checks if particular ref_mv_idx should be pruned.
1894
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1895
                                         const int qindex,
1896
0
                                         const int ref_mv_idx) {
1897
0
  if (reduce_inter_modes >= 3) return 1;
1898
  // Q-index logic based pruning is enabled only for
1899
  // reduce_inter_modes = 2.
1900
0
  assert(reduce_inter_modes == 2);
1901
  // When reduce_inter_modes=2, pruning happens as below based on q index.
1902
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1903
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1904
  // For q index range between 171 and 255: no pruning.
1905
0
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1906
0
  return (ref_mv_idx >= min_prune_ref_mv_idx);
1907
0
}
1908
1909
// Whether this reference motion vector can be skipped, based on initial
1910
// heuristics.
1911
static bool ref_mv_idx_early_breakout(
1912
    const SPEED_FEATURES *const sf,
1913
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1914
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
1915
0
    int ref_mv_idx) {
1916
0
  MACROBLOCKD *xd = &x->e_mbd;
1917
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1918
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1919
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1920
0
  const int is_comp_pred = has_second_ref(mbmi);
1921
0
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1922
0
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
1923
0
        mbmi->ref_frame[0] == LAST3_FRAME ||
1924
0
        mbmi->ref_frame[1] == LAST2_FRAME ||
1925
0
        mbmi->ref_frame[1] == LAST3_FRAME) {
1926
0
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1927
0
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1928
0
          REF_CAT_LEVEL) {
1929
0
        return true;
1930
0
      }
1931
0
    }
1932
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
1933
0
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1934
0
        have_newmv_in_inter_mode(mbmi->mode)) {
1935
0
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1936
0
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1937
0
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1938
0
        const int do_prune = prune_ref_mv_idx_using_qindex(
1939
0
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1940
0
        if (do_prune &&
1941
0
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1942
0
             REF_CAT_LEVEL)) {
1943
0
          return true;
1944
0
        }
1945
0
      }
1946
0
    }
1947
0
  }
1948
1949
0
  mbmi->ref_mv_idx = ref_mv_idx;
1950
0
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1951
0
    return true;
1952
0
  }
1953
0
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1954
0
  const int drl_cost = get_drl_cost(
1955
0
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1956
0
  est_rd_rate += drl_cost;
1957
0
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1958
0
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1959
0
    return true;
1960
0
  }
1961
0
  return false;
1962
0
}
1963
1964
// Compute the estimated RD cost for the motion vector with simple translation.
1965
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1966
                                          RD_STATS *rd_stats,
1967
                                          HandleInterModeArgs *args,
1968
                                          int ref_mv_idx, int64_t ref_best_rd,
1969
0
                                          BLOCK_SIZE bsize) {
1970
0
  MACROBLOCKD *xd = &x->e_mbd;
1971
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1972
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1973
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1974
0
  const AV1_COMMON *cm = &cpi->common;
1975
0
  const int is_comp_pred = has_second_ref(mbmi);
1976
0
  const ModeCosts *mode_costs = &x->mode_costs;
1977
1978
0
  struct macroblockd_plane *p = xd->plane;
1979
0
  const BUFFER_SET orig_dst = {
1980
0
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1981
0
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1982
0
  };
1983
0
  av1_init_rd_stats(rd_stats);
1984
1985
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1986
0
  mbmi->comp_group_idx = 0;
1987
0
  mbmi->compound_idx = 1;
1988
0
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
1989
0
    mbmi->ref_frame[1] = NONE_FRAME;
1990
0
  }
1991
0
  int16_t mode_ctx =
1992
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1993
1994
0
  mbmi->num_proj_ref = 0;
1995
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
1996
0
  mbmi->ref_mv_idx = ref_mv_idx;
1997
1998
0
  rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1999
0
  const int drl_cost =
2000
0
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2001
0
  rd_stats->rate += drl_cost;
2002
2003
0
  int_mv cur_mv[2];
2004
0
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2005
0
    return INT64_MAX;
2006
0
  }
2007
0
  assert(have_nearmv_in_inter_mode(mbmi->mode));
2008
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2009
0
    mbmi->mv[i].as_int = cur_mv[i].as_int;
2010
0
  }
2011
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2012
0
  rd_stats->rate += ref_mv_cost;
2013
2014
0
  if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2015
0
    return INT64_MAX;
2016
0
  }
2017
2018
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2019
0
  mbmi->num_proj_ref = 0;
2020
0
  if (is_comp_pred) {
2021
    // Only compound_average
2022
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2023
0
    mbmi->comp_group_idx = 0;
2024
0
    mbmi->compound_idx = 1;
2025
0
  }
2026
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2027
2028
0
  const int mi_row = xd->mi_row;
2029
0
  const int mi_col = xd->mi_col;
2030
0
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2031
0
                                AOM_PLANE_Y, AOM_PLANE_Y);
2032
0
  int est_rate;
2033
0
  int64_t est_dist;
2034
0
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2035
0
                                  NULL, NULL, NULL, NULL, NULL);
2036
0
  return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2037
0
}
2038
2039
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2040
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2041
// it is included.
2042
0
static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2043
2044
0
static inline bool mask_check_bit(int mask, int index) {
2045
0
  return (mask >> index) & 0x1;
2046
0
}
2047
2048
// Before performing the full MV search in handle_inter_mode, do a simple
2049
// translation search and see if we can eliminate any motion vectors.
2050
// Returns an integer where, if the i-th bit is set, it means that the i-th
2051
// motion vector should be searched. This is only set for NEAR_MV.
2052
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2053
                                RD_STATS *rd_stats,
2054
                                HandleInterModeArgs *const args,
2055
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2056
0
                                const int ref_set) {
2057
  // If the number of ref mv count is equal to 1, do not prune the same. It
2058
  // is better to evaluate the same than to prune it.
2059
0
  if (ref_set == 1) return 1;
2060
0
  AV1_COMMON *const cm = &cpi->common;
2061
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2062
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2063
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2064
2065
  // Only search indices if they have some chance of being good.
2066
0
  int good_indices = 0;
2067
0
  for (int i = 0; i < ref_set; ++i) {
2068
0
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2069
0
                                  ref_best_rd, i)) {
2070
0
      continue;
2071
0
    }
2072
0
    mask_set_bit(&good_indices, i);
2073
0
  }
2074
2075
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2076
  // is large enough. If these conditions are not met, return all good indices
2077
  // found so far.
2078
0
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2079
0
    return good_indices;
2080
0
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2081
0
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2082
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2083
  // so b/2384 can be resolved.
2084
0
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2085
0
      (mbmi->ref_frame[1] > 0 &&
2086
0
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2087
0
    return good_indices;
2088
0
  }
2089
2090
  // Calculate the RD cost for the motion vectors using simple translation.
2091
0
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2092
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2093
    // If this index is bad, ignore it.
2094
0
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2095
0
      continue;
2096
0
    }
2097
0
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2098
0
        cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2099
0
  }
2100
  // Find the index with the best RD cost.
2101
0
  int best_idx = 0;
2102
0
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2103
0
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2104
0
      best_idx = i;
2105
0
    }
2106
0
  }
2107
  // Only include indices that are good and within a % of the best.
2108
0
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2109
  // If the simple translation cost is not within this multiple of the
2110
  // best RD, skip it. Note that the cutoff is derived experimentally.
2111
0
  const double ref_dth = 5;
2112
0
  int result = 0;
2113
0
  for (int i = 0; i < ref_set; ++i) {
2114
0
    if (mask_check_bit(good_indices, i) &&
2115
0
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2116
0
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2117
0
      mask_set_bit(&result, i);
2118
0
    }
2119
0
  }
2120
0
  return result;
2121
0
}
2122
2123
/*!\brief Motion mode information for inter mode search speedup.
2124
 *
2125
 * Used in a speed feature to search motion modes other than
2126
 * SIMPLE_TRANSLATION only on winning candidates.
2127
 */
2128
typedef struct motion_mode_candidate {
2129
  /*!
2130
   * Mode info for the motion mode candidate.
2131
   */
2132
  MB_MODE_INFO mbmi;
2133
  /*!
2134
   * Rate describing the cost of the motion vectors for this candidate.
2135
   */
2136
  int rate_mv;
2137
  /*!
2138
   * Rate before motion mode search and transform coding is applied.
2139
   */
2140
  int rate2_nocoeff;
2141
  /*!
2142
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2143
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2144
   * candidate.
2145
   */
2146
  int skip_motion_mode;
2147
  /*!
2148
   * Total RD cost for this candidate.
2149
   */
2150
  int64_t rd_cost;
2151
} motion_mode_candidate;
2152
2153
/*!\cond */
2154
typedef struct motion_mode_best_st_candidate {
2155
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2156
  int num_motion_mode_cand;
2157
} motion_mode_best_st_candidate;
2158
2159
// Checks if the current reference frame matches with neighbouring block's
2160
// (top/left) reference frames
2161
static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2162
0
                                               MB_MODE_INFO *nb_mbmi) {
2163
0
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2164
0
                                          nb_mbmi->ref_frame[1] };
2165
0
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2166
0
                                           cur_mbmi->ref_frame[1] };
2167
0
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2168
0
  int match_found = 0;
2169
2170
0
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2171
0
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2172
0
        (cur_ref_frames[i] == nb_ref_frames[1]))
2173
0
      match_found = 1;
2174
0
  }
2175
0
  return match_found;
2176
0
}
2177
2178
static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2179
0
                                              MACROBLOCKD *xd) {
2180
0
  if (!xd->up_available) return 1;
2181
0
  const int mi_col = xd->mi_col;
2182
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2183
  // prev_row_mi points into the mi array, starting at the beginning of the
2184
  // previous row.
2185
0
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2186
0
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2187
0
  uint8_t mi_step;
2188
0
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2189
0
       above_mi_col += mi_step) {
2190
0
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2191
0
    mi_step = mi_size_wide[above_mi[0]->bsize];
2192
0
    int match_found = 0;
2193
0
    if (is_inter_block(*above_mi))
2194
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2195
0
    if (match_found) return 1;
2196
0
  }
2197
0
  return 0;
2198
0
}
2199
2200
static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2201
0
                                             MACROBLOCKD *xd) {
2202
0
  if (!xd->left_available) return 1;
2203
0
  const int mi_row = xd->mi_row;
2204
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2205
  // prev_col_mi points into the mi array, starting at the top of the
2206
  // previous column
2207
0
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2208
0
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2209
0
  uint8_t mi_step;
2210
0
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2211
0
       left_mi_row += mi_step) {
2212
0
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2213
0
    mi_step = mi_size_high[left_mi[0]->bsize];
2214
0
    int match_found = 0;
2215
0
    if (is_inter_block(*left_mi))
2216
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2217
0
    if (match_found) return 1;
2218
0
  }
2219
0
  return 0;
2220
0
}
2221
/*!\endcond */
2222
2223
/*! \brief Struct used to hold TPL data to
2224
 * narrow down parts of the inter mode search.
2225
 */
2226
typedef struct {
2227
  /*!
2228
   * The best inter cost out of all of the reference frames.
2229
   */
2230
  int64_t best_inter_cost;
2231
  /*!
2232
   * The inter cost for each reference frame.
2233
   */
2234
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2235
} PruneInfoFromTpl;
2236
2237
#if !CONFIG_REALTIME_ONLY
2238
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2239
static inline void get_block_level_tpl_stats(
2240
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2241
0
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2242
0
  AV1_COMMON *const cm = &cpi->common;
2243
2244
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2245
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2246
0
  const int tpl_idx = cpi->gf_frame_index;
2247
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2248
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2249
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2250
0
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2251
0
  const int mi_wide = mi_size_wide[bsize];
2252
0
  const int mi_high = mi_size_high[bsize];
2253
0
  const int tpl_stride = tpl_frame->stride;
2254
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2255
0
  const int mi_col_sr =
2256
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2257
0
  const int mi_col_end_sr =
2258
0
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2259
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2260
2261
0
  const int row_step = step;
2262
0
  const int col_step_sr =
2263
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2264
0
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2265
0
       row += row_step) {
2266
0
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2267
0
         col += col_step_sr) {
2268
0
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2269
0
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2270
2271
      // Sums up the inter cost of corresponding ref frames
2272
0
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2273
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2274
0
            this_stats->pred_error[ref_idx];
2275
0
      }
2276
0
    }
2277
0
  }
2278
2279
  // Computes the best inter cost (minimum inter_cost)
2280
0
  int64_t best_inter_cost = INT64_MAX;
2281
0
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2282
0
    const int64_t cur_inter_cost =
2283
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2284
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2285
    // calculating the minimum inter_cost
2286
0
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2287
0
        valid_refs[ref_idx])
2288
0
      best_inter_cost = cur_inter_cost;
2289
0
  }
2290
0
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2291
0
}
2292
#endif
2293
2294
static inline int prune_modes_based_on_tpl_stats(
2295
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2296
0
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2297
0
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2298
0
  if ((prune_mode_level < 2) && have_newmv) return 0;
2299
2300
0
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2301
0
  if (best_inter_cost == INT64_MAX) return 0;
2302
2303
0
  const int prune_level = prune_mode_level - 1;
2304
0
  int64_t cur_inter_cost;
2305
2306
0
  const int is_globalmv =
2307
0
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2308
0
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2309
2310
  // Thresholds used for pruning:
2311
  // Lower value indicates aggressive pruning and higher value indicates
2312
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2313
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2314
  // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2315
0
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2316
0
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2317
0
  };
2318
2319
0
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2320
0
  if (!is_comp_pred) {
2321
0
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2322
0
  } else {
2323
0
    const int64_t inter_cost_ref0 =
2324
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2325
0
    const int64_t inter_cost_ref1 =
2326
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2327
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2328
    // more aggressive pruning
2329
0
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2330
0
  }
2331
2332
  // Prune the mode if cur_inter_cost is greater than threshold times
2333
  // best_inter_cost
2334
0
  if (cur_inter_cost >
2335
0
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2336
0
        best_inter_cost) >>
2337
0
       2))
2338
0
    return 1;
2339
0
  return 0;
2340
0
}
2341
2342
/*!\brief High level function to select parameters for compound mode.
2343
 *
2344
 * \ingroup inter_mode_search
2345
 * The main search functionality is done in the call to av1_compound_type_rd().
2346
 *
2347
 * \param[in]     cpi               Top-level encoder structure.
2348
 * \param[in]     x                 Pointer to struct holding all the data for
2349
 *                                  the current macroblock.
2350
 * \param[in]     args              HandleInterModeArgs struct holding
2351
 *                                  miscellaneous arguments for inter mode
2352
 *                                  search. See the documentation for this
2353
 *                                  struct for a description of each member.
2354
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2355
 *                                  It is used for early termination of this
2356
 *                                  search if the RD exceeds this value.
2357
 * \param[in,out] cur_mv            Current motion vector.
2358
 * \param[in]     bsize             Current block size.
2359
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2360
                                    compound mode.
2361
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2362
 *                                  allocated buffers for the compound
2363
 *                                  predictors and masks in the compound type
2364
 *                                  search.
2365
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2366
 *                                  prediction. This will eventually hold the
2367
 *                                  final prediction, and the tmp_dst info will
2368
 *                                  be copied here.
2369
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2370
 *                                  computed prediction.
2371
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2372
 *                                  This will be modified if a motion search is
2373
 *                                  done in the motion mode search.
2374
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2375
 *                                  information.
2376
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2377
 *                                  best total RD for a skip mode so far, and
2378
 *                                  skip_rd[1] is the best RD for a skip mode so
2379
 *                                  far in luma. This is used as a speed feature
2380
 *                                  to skip the transform search if the computed
2381
 *                                  skip RD for the current mode is not better
2382
 *                                  than the best skip_rd so far.
2383
 * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2384
 *                                  predictor. If this is 0, the inter predictor
2385
 *                                  has already been built and thus we can avoid
2386
 *                                  repeating computation.
2387
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2388
 * a viable candidate.
2389
 */
2390
static int process_compound_inter_mode(
2391
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2392
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2393
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2394
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2395
0
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2396
0
  MACROBLOCKD *xd = &x->e_mbd;
2397
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2398
0
  const AV1_COMMON *cm = &cpi->common;
2399
0
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2400
0
                                   cm->seq_params->enable_masked_compound;
2401
0
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2402
0
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2403
2404
0
  const int num_planes = av1_num_planes(cm);
2405
0
  const int mi_row = xd->mi_row;
2406
0
  const int mi_col = xd->mi_col;
2407
0
  int is_luma_interp_done = 0;
2408
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2409
2410
0
  int64_t best_rd_compound;
2411
0
  int64_t rd_thresh;
2412
0
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2413
0
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2414
0
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2415
0
                                         comp_type_rd_scale);
2416
  // Select compound type and any parameters related to that type
2417
  // (for example, the mask parameters if it is a masked mode) and compute
2418
  // the RD
2419
0
  *compmode_interinter_cost = av1_compound_type_rd(
2420
0
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2421
0
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2422
0
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2423
0
  if (ref_best_rd < INT64_MAX &&
2424
0
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2425
0
          ref_best_rd) {
2426
0
    restore_dst_buf(xd, *orig_dst, num_planes);
2427
0
    return 1;
2428
0
  }
2429
2430
  // Build only uv predictor for COMPOUND_AVERAGE.
2431
  // Note there is no need to call av1_enc_build_inter_predictor
2432
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2433
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2434
  // rather than the tmp_buf.
2435
0
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2436
0
    if (num_planes > 1) {
2437
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2438
0
                                    AOM_PLANE_U, num_planes - 1);
2439
0
    }
2440
0
    *skip_build_pred = 1;
2441
0
  }
2442
0
  return 0;
2443
0
}
2444
2445
// Speed feature to prune out MVs that are similar to previous MVs if they
2446
// don't achieve the best RD advantage.
2447
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2448
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2449
0
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2450
0
  int i;
2451
0
  const int is_comp_pred = has_second_ref(mbmi);
2452
0
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2453
2454
  // Skip the evaluation if an MV match is found.
2455
0
  if (ref_mv_idx > 0) {
2456
0
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2457
0
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2458
2459
0
      int mv_diff = 0;
2460
0
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2461
0
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2462
0
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2463
0
      }
2464
2465
      // If this mode is not the best one, and current MV is similar to
2466
      // previous stored MV, terminate this ref_mv_idx evaluation.
2467
0
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2468
0
    }
2469
0
  }
2470
2471
0
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2472
0
    for (i = 0; i < is_comp_pred + 1; ++i)
2473
0
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2474
0
  }
2475
2476
0
  return 0;
2477
0
}
2478
2479
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2480
 *
2481
 * \ingroup inter_mode_search
2482
 *
2483
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2484
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2485
 * Else returns 0.
2486
 *
2487
 * Note that the sse of here comes from single_motion_search. So it is
2488
 * interpolated with the filter in motion search, not the actual interpolation
2489
 * filter used in encoding.
2490
 *
2491
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2492
 * \param[in]     x                 Pointer to struct holding all the data for
2493
 *                                  the current macroblock.
2494
 * \param[in]     bsize             The current block_size.
2495
 * \param[in]     args              The args to handle_inter_mode, used to track
2496
 *                                  the best SSE.
2497
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2498
 *                                       prune_zero_mv_with_sse value
2499
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2500
 */
2501
static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2502
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
2503
                                         const HandleInterModeArgs *args,
2504
0
                                         int prune_zero_mv_with_sse) {
2505
0
  const MACROBLOCKD *xd = &x->e_mbd;
2506
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2507
2508
0
  const int is_comp_pred = has_second_ref(mbmi);
2509
0
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2510
2511
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2512
0
    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2513
      // Pruning logic only works for IDENTITY type models
2514
      // Note: In theory we could apply similar logic for TRANSLATION
2515
      // type models, but we do not code these due to a spec bug
2516
      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2517
0
      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2518
0
      return 0;
2519
0
    }
2520
2521
    // Don't prune if we have invalid data
2522
0
    assert(mbmi->mv[idx].as_int == 0);
2523
0
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2524
0
      return 0;
2525
0
    }
2526
0
  }
2527
2528
  // Sum up the sse of ZEROMV and best NEWMV
2529
0
  unsigned int this_sse_sum = 0;
2530
0
  unsigned int best_sse_sum = 0;
2531
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2532
0
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2533
0
    const struct macroblockd_plane *pd = xd->plane;
2534
0
    const struct buf_2d *src_buf = &p->src;
2535
0
    const struct buf_2d *ref_buf = &pd->pre[idx];
2536
0
    const uint8_t *src = src_buf->buf;
2537
0
    const uint8_t *ref = ref_buf->buf;
2538
0
    const int src_stride = src_buf->stride;
2539
0
    const int ref_stride = ref_buf->stride;
2540
2541
0
    unsigned int this_sse;
2542
0
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2543
0
    this_sse_sum += this_sse;
2544
2545
0
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2546
0
    best_sse_sum += best_sse;
2547
0
  }
2548
2549
0
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2550
0
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2551
0
    return 1;
2552
0
  }
2553
2554
0
  return 0;
2555
0
}
2556
2557
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2558
 *
2559
 * \ingroup inter_mode_search
2560
 *
2561
 * Does a simple interpolation filter search during winner mode evaluation. This
2562
 * is currently only used by realtime mode as \ref
2563
 * av1_interpolation_filter_search is not called during realtime encoding.
2564
 *
2565
 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2566
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2567
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2568
 *  *
2569
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2570
 *                                  flags.
2571
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2572
 *                                  used to access the buffers.
2573
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2574
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2575
 * \param[in]     bsize             The current block_size.
2576
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2577
 */
2578
static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2579
                                      int mi_row, int mi_col,
2580
0
                                      BLOCK_SIZE bsize) {
2581
0
  static const InterpFilters filters_ref_set[3] = {
2582
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2583
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2584
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2585
0
  };
2586
2587
0
  const AV1_COMMON *const cm = &cpi->common;
2588
0
  MACROBLOCKD *const xd = &x->e_mbd;
2589
0
  MB_MODE_INFO *const mi = xd->mi[0];
2590
0
  int64_t best_cost = INT64_MAX;
2591
0
  int best_filter_index = -1;
2592
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2593
0
  const int num_planes = av1_num_planes(cm);
2594
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2595
0
  assert(is_inter_mode(mi->mode));
2596
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2597
0
  assert(!is_inter_compound_mode(mi->mode));
2598
2599
0
  if (!av1_is_interp_needed(xd)) {
2600
0
    return false;
2601
0
  }
2602
2603
0
  struct macroblockd_plane *pd = xd->plane;
2604
0
  const BUFFER_SET orig_dst = {
2605
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2606
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2607
0
  };
2608
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2609
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2610
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2611
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2612
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2613
2614
0
  for (int i = 0; i < 3; ++i) {
2615
0
    if (is_240p_or_lesser) {
2616
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2617
0
        continue;
2618
0
      }
2619
0
    } else {
2620
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2621
0
        continue;
2622
0
      }
2623
0
    }
2624
0
    int64_t cost;
2625
0
    RD_STATS tmp_rd = { 0 };
2626
2627
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2628
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2629
2630
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2631
0
                       ? MODELRD_LEGACY
2632
0
                       : MODELRD_TYPE_INTERP_FILTER](
2633
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2634
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2635
2636
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2637
0
                                           cm->seq_params->enable_dual_filter);
2638
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2639
0
    if (cost < best_cost) {
2640
0
      best_filter_index = i;
2641
0
      best_cost = cost;
2642
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2643
0
    }
2644
0
  }
2645
0
  assert(best_filter_index >= 0);
2646
2647
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2648
2649
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2650
2651
0
  if (is_best_pred_in_orig) {
2652
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2653
0
  } else {
2654
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2655
    // is_best_pred_in_orig is false, that means the current buffer is the
2656
    // original one.
2657
0
    assert(&orig_dst == dst_bufs[0]);
2658
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2659
0
    const int width = block_size_wide[bsize];
2660
0
    const int height = block_size_high[bsize];
2661
0
#if CONFIG_AV1_HIGHBITDEPTH
2662
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2663
0
    if (is_hbd) {
2664
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2665
0
                               tmp_dst.stride[AOM_PLANE_Y],
2666
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2667
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2668
0
    } else {
2669
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2670
0
                        orig_dst.plane[AOM_PLANE_Y],
2671
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2672
0
    }
2673
#else
2674
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2675
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2676
                      width, height);
2677
#endif
2678
0
  }
2679
2680
  // Build the YUV predictor.
2681
0
  if (num_planes > 1) {
2682
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2683
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2684
0
  }
2685
2686
0
  return true;
2687
0
}
2688
2689
/*!\brief AV1 inter mode RD computation
2690
 *
2691
 * \ingroup inter_mode_search
2692
 * Do the RD search for a given inter mode and compute all information relevant
2693
 * to the input mode. It will compute the best MV,
2694
 * compound parameters (if the mode is a compound mode) and interpolation filter
2695
 * parameters.
2696
 *
2697
 * \param[in]     cpi               Top-level encoder structure.
2698
 * \param[in]     tile_data         Pointer to struct holding adaptive
2699
 *                                  data/contexts/models for the tile during
2700
 *                                  encoding.
2701
 * \param[in]     x                 Pointer to structure holding all the data
2702
 *                                  for the current macroblock.
2703
 * \param[in]     bsize             Current block size.
2704
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2705
 *                                  information.
2706
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2707
 *                                  for only the Y plane.
2708
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2709
 *                                  for only the UV planes.
2710
 * \param[in]     args              HandleInterModeArgs struct holding
2711
 *                                  miscellaneous arguments for inter mode
2712
 *                                  search. See the documentation for this
2713
 *                                  struct for a description of each member.
2714
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2715
 *                                  It is used for early termination of this
2716
 *                                  search if the RD exceeds this value.
2717
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2718
 *                                  built in this search.
2719
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2720
 *                                  allocated buffers for the compound
2721
 *                                  predictors and masks in the compound type
2722
 *                                  search.
2723
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2724
 *                                  do_tx_search (see below) is 0.
2725
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2726
 *                                  a full transform search. This will compute
2727
 *                                  an estimated RD for the modes without the
2728
 *                                  transform search and later perform the full
2729
 *                                  transform search on the best candidates.
2730
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2731
 *                                  information to perform a full transform
2732
 *                                  search only on winning candidates searched
2733
 *                                  with an estimate for transform coding RD.
2734
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2735
 *                                  motion mode information used in a speed
2736
 *                                  feature to search motion modes other than
2737
 *                                  SIMPLE_TRANSLATION only on winning
2738
 *                                  candidates.
2739
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2740
 *                                  best total RD for a skip mode so far, and
2741
 *                                  skip_rd[1] is the best RD for a skip mode so
2742
 *                                  far in luma. This is used as a speed feature
2743
 *                                  to skip the transform search if the computed
2744
 *                                  skip RD for the current mode is not better
2745
 *                                  than the best skip_rd so far.
2746
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2747
 *                                         narrow down the search based on data
2748
 *                                         collected in the TPL model.
2749
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
2750
 *                                  the luma plane.
2751
 *
2752
 * \return The RD cost for the mode being searched.
2753
 */
2754
static int64_t handle_inter_mode(
2755
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2756
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2757
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2758
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2759
    int64_t *best_est_rd, const int do_tx_search,
2760
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2761
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2762
0
    int64_t *yrd) {
2763
0
  const AV1_COMMON *cm = &cpi->common;
2764
0
  const int num_planes = av1_num_planes(cm);
2765
0
  MACROBLOCKD *xd = &x->e_mbd;
2766
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2767
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2768
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2769
0
  const int is_comp_pred = has_second_ref(mbmi);
2770
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2771
2772
#if CONFIG_REALTIME_ONLY
2773
  const int prune_modes_based_on_tpl = 0;
2774
#else   // CONFIG_REALTIME_ONLY
2775
0
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2776
0
  const int prune_modes_based_on_tpl =
2777
0
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2778
0
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2779
0
#endif  // CONFIG_REALTIME_ONLY
2780
0
  int i;
2781
  // Reference frames for this mode
2782
0
  const int refs[2] = { mbmi->ref_frame[0],
2783
0
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2784
0
  int rate_mv = 0;
2785
0
  int64_t rd = INT64_MAX;
2786
  // Do first prediction into the destination buffer. Do the next
2787
  // prediction into a temporary buffer. Then keep track of which one
2788
  // of these currently holds the best predictor, and use the other
2789
  // one for future predictions. In the end, copy from tmp_buf to
2790
  // dst if necessary.
2791
0
  struct macroblockd_plane *pd = xd->plane;
2792
0
  const BUFFER_SET orig_dst = {
2793
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2794
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2795
0
  };
2796
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2797
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2798
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2799
2800
0
  int64_t ret_val = INT64_MAX;
2801
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2802
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2803
0
  int64_t best_rd = INT64_MAX;
2804
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2805
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806
0
  int64_t best_yrd = INT64_MAX;
2807
0
  MB_MODE_INFO best_mbmi = *mbmi;
2808
0
  int best_xskip_txfm = 0;
2809
0
  int64_t newmv_ret_val = INT64_MAX;
2810
0
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2811
2812
  // Do not prune the mode based on inter cost from tpl if the current ref frame
2813
  // is the winner ref in neighbouring blocks.
2814
0
  int ref_match_found_in_above_nb = 0;
2815
0
  int ref_match_found_in_left_nb = 0;
2816
0
  if (prune_modes_based_on_tpl) {
2817
0
    ref_match_found_in_above_nb =
2818
0
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2819
0
    ref_match_found_in_left_nb =
2820
0
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2821
0
  }
2822
2823
  // First, perform a simple translation search for each of the indices. If
2824
  // an index performs well, it will be fully searched in the main loop
2825
  // of this function.
2826
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2827
  // Save MV results from first 2 ref_mv_idx.
2828
0
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2829
0
  int best_ref_mv_idx = -1;
2830
0
  const int idx_mask =
2831
0
      ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2832
0
  const int16_t mode_ctx =
2833
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2834
0
  const ModeCosts *mode_costs = &x->mode_costs;
2835
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2836
0
  const int base_rate =
2837
0
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2838
2839
0
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2840
0
    save_mv[i][0].as_int = INVALID_MV;
2841
0
    save_mv[i][1].as_int = INVALID_MV;
2842
0
  }
2843
0
  args->start_mv_cnt = 0;
2844
2845
  // Main loop of this function. This will  iterate over all of the ref mvs
2846
  // in the dynamic reference list and do the following:
2847
  //    1.) Get the current MV. Create newmv MV if necessary
2848
  //    2.) Search compound type and parameters if applicable
2849
  //    3.) Do interpolation filter search
2850
  //    4.) Build the inter predictor
2851
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2852
  //        WARPED_CAUSAL)
2853
  //    6.) Update stats if best so far
2854
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2855
0
    mbmi->ref_mv_idx = ref_mv_idx;
2856
2857
0
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2858
0
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2859
0
    const int drl_cost = get_drl_cost(
2860
0
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2861
0
    mode_info[ref_mv_idx].drl_cost = drl_cost;
2862
0
    mode_info[ref_mv_idx].skip = 0;
2863
2864
0
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2865
      // MV did not perform well in simple translation search. Skip it.
2866
0
      continue;
2867
0
    }
2868
0
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2869
0
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2870
      // Skip mode if TPL model indicates it will not be beneficial.
2871
0
      if (prune_modes_based_on_tpl_stats(
2872
0
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2873
0
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2874
0
        continue;
2875
0
    }
2876
0
    av1_init_rd_stats(rd_stats);
2877
2878
    // Initialize compound mode data
2879
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2880
0
    mbmi->comp_group_idx = 0;
2881
0
    mbmi->compound_idx = 1;
2882
0
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2883
2884
0
    mbmi->num_proj_ref = 0;
2885
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
2886
2887
    // Compute cost for signalling this DRL index
2888
0
    rd_stats->rate = base_rate;
2889
0
    rd_stats->rate += drl_cost;
2890
2891
0
    int rs = 0;
2892
0
    int compmode_interinter_cost = 0;
2893
2894
0
    int_mv cur_mv[2];
2895
2896
    // TODO(Cherma): Extend this speed feature to support compound mode
2897
0
    int skip_repeated_ref_mv =
2898
0
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2899
    // Generate the current mv according to the prediction mode
2900
0
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2901
0
      continue;
2902
0
    }
2903
2904
    // The above call to build_cur_mv does not handle NEWMV modes. Build
2905
    // the mv here if we have NEWMV for any predictors.
2906
0
    if (have_newmv_in_inter_mode(this_mode)) {
2907
#if CONFIG_COLLECT_COMPONENT_TIMING
2908
      start_timing(cpi, handle_newmv_time);
2909
#endif
2910
0
      newmv_ret_val =
2911
0
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2912
#if CONFIG_COLLECT_COMPONENT_TIMING
2913
      end_timing(cpi, handle_newmv_time);
2914
#endif
2915
2916
0
      if (newmv_ret_val != 0) continue;
2917
2918
0
      if (is_inter_singleref_mode(this_mode) &&
2919
0
          cur_mv[0].as_int != INVALID_MV) {
2920
0
        const MV_REFERENCE_FRAME ref = refs[0];
2921
0
        const unsigned int this_sse = x->pred_sse[ref];
2922
0
        if (this_sse < args->best_single_sse_in_refs[ref]) {
2923
0
          args->best_single_sse_in_refs[ref] = this_sse;
2924
0
        }
2925
2926
0
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2927
0
          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2928
0
          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2929
0
          const double scale_factor[3][11] = {
2930
0
            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2931
0
            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2932
0
            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2933
0
          };
2934
0
          assert(pix_idx >= 0);
2935
0
          assert(th_idx <= 2);
2936
0
          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2937
0
            continue;
2938
0
        }
2939
0
      }
2940
2941
0
      rd_stats->rate += rate_mv;
2942
0
    }
2943
    // Copy the motion vector for this mode into mbmi struct
2944
0
    for (i = 0; i < is_comp_pred + 1; ++i) {
2945
0
      mbmi->mv[i].as_int = cur_mv[i].as_int;
2946
0
    }
2947
2948
0
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2949
0
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2950
0
      continue;
2951
0
    }
2952
2953
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2954
    // is enabled, and the current MV is similar to a previous one.
2955
0
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2956
0
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2957
0
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
2958
0
      continue;
2959
2960
0
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2961
0
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2962
0
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2963
0
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2964
0
        continue;
2965
0
      }
2966
0
    }
2967
2968
0
    int skip_build_pred = 0;
2969
0
    const int mi_row = xd->mi_row;
2970
0
    const int mi_col = xd->mi_col;
2971
2972
    // Handle a compound predictor, continue if it is determined this
2973
    // cannot be the best compound mode
2974
0
    if (is_comp_pred) {
2975
#if CONFIG_COLLECT_COMPONENT_TIMING
2976
      start_timing(cpi, compound_type_rd_time);
2977
#endif
2978
0
      const int not_best_mode = process_compound_inter_mode(
2979
0
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2980
0
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2981
0
          &skip_build_pred);
2982
#if CONFIG_COLLECT_COMPONENT_TIMING
2983
      end_timing(cpi, compound_type_rd_time);
2984
#endif
2985
0
      if (not_best_mode) continue;
2986
0
    }
2987
2988
0
    if (!args->skip_ifs) {
2989
#if CONFIG_COLLECT_COMPONENT_TIMING
2990
      start_timing(cpi, interpolation_filter_search_time);
2991
#endif
2992
      // Determine the interpolation filter for this mode
2993
0
      ret_val = av1_interpolation_filter_search(
2994
0
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2995
0
          &skip_build_pred, args, ref_best_rd);
2996
#if CONFIG_COLLECT_COMPONENT_TIMING
2997
      end_timing(cpi, interpolation_filter_search_time);
2998
#endif
2999
0
      if (args->modelled_rd != NULL && !is_comp_pred) {
3000
0
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3001
0
      }
3002
0
      if (ret_val != 0) {
3003
0
        restore_dst_buf(xd, orig_dst, num_planes);
3004
0
        continue;
3005
0
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3006
0
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3007
0
        restore_dst_buf(xd, orig_dst, num_planes);
3008
0
        continue;
3009
0
      }
3010
3011
      // Compute modelled RD if enabled
3012
0
      if (args->modelled_rd != NULL) {
3013
0
        if (is_comp_pred) {
3014
0
          const int mode0 = compound_ref0_mode(this_mode);
3015
0
          const int mode1 = compound_ref1_mode(this_mode);
3016
0
          const int64_t mrd =
3017
0
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3018
0
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3019
0
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3020
0
            restore_dst_buf(xd, orig_dst, num_planes);
3021
0
            continue;
3022
0
          }
3023
0
        }
3024
0
      }
3025
0
    }
3026
3027
0
    rd_stats->rate += compmode_interinter_cost;
3028
0
    if (skip_build_pred != 1) {
3029
      // Build this inter predictor if it has not been previously built
3030
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3031
0
                                    av1_num_planes(cm) - 1);
3032
0
    }
3033
3034
#if CONFIG_COLLECT_COMPONENT_TIMING
3035
    start_timing(cpi, motion_mode_rd_time);
3036
#endif
3037
0
    int rate2_nocoeff = rd_stats->rate;
3038
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3039
    // OBMC_CAUSAL or WARPED_CAUSAL
3040
0
    int64_t this_yrd;
3041
0
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3042
0
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3043
0
                             &orig_dst, best_est_rd, do_tx_search,
3044
0
                             inter_modes_info, 0, &this_yrd);
3045
#if CONFIG_COLLECT_COMPONENT_TIMING
3046
    end_timing(cpi, motion_mode_rd_time);
3047
#endif
3048
0
    assert(
3049
0
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3050
3051
0
    if (ret_val != INT64_MAX) {
3052
0
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3053
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
3054
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3055
      // Collect mode stats for multiwinner mode processing
3056
0
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3057
0
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3058
0
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3059
0
                              do_tx_search);
3060
0
      if (tmp_rd < best_rd) {
3061
0
        best_yrd = this_yrd;
3062
        // Update the best rd stats if we found the best mode so far
3063
0
        best_rd_stats = *rd_stats;
3064
0
        best_rd_stats_y = *rd_stats_y;
3065
0
        best_rd_stats_uv = *rd_stats_uv;
3066
0
        best_rd = tmp_rd;
3067
0
        best_mbmi = *mbmi;
3068
0
        best_xskip_txfm = txfm_info->skip_txfm;
3069
0
        memcpy(best_blk_skip, txfm_info->blk_skip,
3070
0
               sizeof(best_blk_skip[0]) * xd->height * xd->width);
3071
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3072
0
                       xd->height * xd->width);
3073
0
        motion_mode_cand->rate_mv = rate_mv;
3074
0
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3075
0
      }
3076
3077
0
      if (tmp_rd < ref_best_rd) {
3078
0
        ref_best_rd = tmp_rd;
3079
0
        best_ref_mv_idx = ref_mv_idx;
3080
0
      }
3081
0
    }
3082
0
    restore_dst_buf(xd, orig_dst, num_planes);
3083
0
  }
3084
3085
0
  if (best_rd == INT64_MAX) return INT64_MAX;
3086
3087
  // re-instate status of the best choice
3088
0
  *rd_stats = best_rd_stats;
3089
0
  *rd_stats_y = best_rd_stats_y;
3090
0
  *rd_stats_uv = best_rd_stats_uv;
3091
0
  *yrd = best_yrd;
3092
0
  *mbmi = best_mbmi;
3093
0
  txfm_info->skip_txfm = best_xskip_txfm;
3094
0
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3095
0
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3096
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
3097
0
         sizeof(best_blk_skip[0]) * xd->height * xd->width);
3098
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3099
3100
0
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3101
3102
0
  return rd_stats->rdcost;
3103
0
}
3104
3105
/*!\brief Search for the best intrabc predictor
3106
 *
3107
 * \ingroup intra_mode_search
3108
 * \callergraph
3109
 * This function performs a motion search to find the best intrabc predictor.
3110
 *
3111
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3112
 * search before this function).
3113
 */
3114
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3115
                                       PICK_MODE_CONTEXT *ctx,
3116
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3117
0
                                       int64_t best_rd) {
3118
0
  const AV1_COMMON *const cm = &cpi->common;
3119
0
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3120
0
      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3121
0
    return INT64_MAX;
3122
0
  const int num_planes = av1_num_planes(cm);
3123
3124
0
  MACROBLOCKD *const xd = &x->e_mbd;
3125
0
  const TileInfo *tile = &xd->tile;
3126
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3127
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3128
3129
0
  const int mi_row = xd->mi_row;
3130
0
  const int mi_col = xd->mi_col;
3131
0
  const int w = block_size_wide[bsize];
3132
0
  const int h = block_size_high[bsize];
3133
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3134
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3135
3136
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3137
0
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3138
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3139
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3140
0
                   mbmi_ext->mode_context);
3141
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3142
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3143
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3144
0
  int_mv nearestmv, nearmv;
3145
0
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3146
0
                                   0);
3147
3148
0
  if (nearestmv.as_int == INVALID_MV) {
3149
0
    nearestmv.as_int = 0;
3150
0
  }
3151
0
  if (nearmv.as_int == INVALID_MV) {
3152
0
    nearmv.as_int = 0;
3153
0
  }
3154
3155
0
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3156
0
  if (dv_ref.as_int == 0) {
3157
0
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3158
0
  }
3159
  // Ref DV should not have sub-pel.
3160
0
  assert((dv_ref.as_mv.col & 7) == 0);
3161
0
  assert((dv_ref.as_mv.row & 7) == 0);
3162
0
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3163
3164
0
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3165
0
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3166
0
  for (int i = 0; i < num_planes; ++i) {
3167
0
    xd->plane[i].pre[0] = yv12_mb[i];
3168
0
  }
3169
3170
0
  enum IntrabcMotionDirection {
3171
0
    IBC_MOTION_ABOVE,
3172
0
    IBC_MOTION_LEFT,
3173
0
    IBC_MOTION_DIRECTIONS
3174
0
  };
3175
3176
0
  MB_MODE_INFO best_mbmi = *mbmi;
3177
0
  RD_STATS best_rdstats = *rd_stats;
3178
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3179
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3180
0
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3181
3182
0
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3183
0
  const SEARCH_METHODS search_method =
3184
0
      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3185
0
  const search_site_config *lookahead_search_sites =
3186
0
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3187
0
  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3188
0
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3189
0
                                     &dv_ref.as_mv, start_mv,
3190
0
                                     lookahead_search_sites, search_method,
3191
0
                                     /*fine_search_interval=*/0);
3192
0
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3193
0
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3194
3195
0
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3196
0
       dir < IBC_MOTION_DIRECTIONS; ++dir) {
3197
0
    switch (dir) {
3198
0
      case IBC_MOTION_ABOVE:
3199
0
        fullms_params.mv_limits.col_min =
3200
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3201
0
        fullms_params.mv_limits.col_max =
3202
0
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3203
0
        fullms_params.mv_limits.row_min =
3204
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3205
0
        fullms_params.mv_limits.row_max =
3206
0
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3207
0
        break;
3208
0
      case IBC_MOTION_LEFT:
3209
0
        fullms_params.mv_limits.col_min =
3210
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3211
0
        fullms_params.mv_limits.col_max =
3212
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3213
        // TODO(aconverse@google.com): Minimize the overlap between above and
3214
        // left areas.
3215
0
        fullms_params.mv_limits.row_min =
3216
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3217
0
        int bottom_coded_mi_edge =
3218
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3219
0
        fullms_params.mv_limits.row_max =
3220
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3221
0
        break;
3222
0
      default: assert(0);
3223
0
    }
3224
0
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3225
0
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3226
0
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3227
0
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3228
3229
0
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3230
3231
0
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3232
0
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3233
0
      continue;
3234
0
    }
3235
3236
0
    const int step_param = cpi->mv_search_params.mv_step_param;
3237
0
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3238
0
    int_mv best_mv, best_hash_mv;
3239
0
    FULLPEL_MV_STATS best_mv_stats;
3240
3241
0
    int bestsme =
3242
0
        av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3243
0
                              &best_mv.as_fullmv, &best_mv_stats, NULL);
3244
0
    const int hashsme = av1_intrabc_hash_search(
3245
0
        cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3246
0
    if (hashsme < bestsme) {
3247
0
      best_mv = best_hash_mv;
3248
0
      bestsme = hashsme;
3249
0
    }
3250
3251
0
    if (bestsme == INT_MAX) continue;
3252
0
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3253
0
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3254
0
                                get_fullmv_from_mv(&dv)))
3255
0
      continue;
3256
0
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3257
0
                         cm->seq_params->mib_size_log2))
3258
0
      continue;
3259
3260
    // DV should not have sub-pel.
3261
0
    assert((dv.col & 7) == 0);
3262
0
    assert((dv.row & 7) == 0);
3263
0
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3264
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3265
0
    mbmi->use_intrabc = 1;
3266
0
    mbmi->mode = DC_PRED;
3267
0
    mbmi->uv_mode = UV_DC_PRED;
3268
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3269
0
    mbmi->mv[0].as_mv = dv;
3270
0
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3271
0
    mbmi->skip_txfm = 0;
3272
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3273
0
                                  av1_num_planes(cm) - 1);
3274
3275
    // TODO(aconverse@google.com): The full motion field defining discount
3276
    // in MV_COST_WEIGHT is too large. Explore other values.
3277
0
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3278
0
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3279
0
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3280
0
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3281
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3282
0
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3283
0
      continue;
3284
0
    rd_stats_yuv.rdcost =
3285
0
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3286
0
    if (rd_stats_yuv.rdcost < best_rd) {
3287
0
      best_rd = rd_stats_yuv.rdcost;
3288
0
      best_mbmi = *mbmi;
3289
0
      best_rdstats = rd_stats_yuv;
3290
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
3291
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3292
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3293
0
    }
3294
0
  }
3295
0
  *mbmi = best_mbmi;
3296
0
  *rd_stats = best_rdstats;
3297
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
3298
0
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3299
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3300
#if CONFIG_RD_DEBUG
3301
  mbmi->rd_stats = *rd_stats;
3302
#endif
3303
0
  return best_rd;
3304
0
}
3305
3306
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3307
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3308
// the typedef will prevent doxygen from finding this function and generating
3309
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3310
// doxygen, we can revert back to using the typedefs.
3311
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3312
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3313
0
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3314
0
  const AV1_COMMON *const cm = &cpi->common;
3315
0
  MACROBLOCKD *const xd = &x->e_mbd;
3316
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3317
0
  const int num_planes = av1_num_planes(cm);
3318
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3319
0
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3320
0
  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3321
0
  int64_t dist_y = 0, dist_uv = 0;
3322
3323
0
  ctx->rd_stats.skip_txfm = 0;
3324
0
  mbmi->ref_frame[0] = INTRA_FRAME;
3325
0
  mbmi->ref_frame[1] = NONE_FRAME;
3326
0
  mbmi->use_intrabc = 0;
3327
0
  mbmi->mv[0].as_int = 0;
3328
0
  mbmi->skip_mode = 0;
3329
3330
0
  const int64_t intra_yrd =
3331
0
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3332
0
                                 &y_skip_txfm, bsize, best_rd, ctx);
3333
3334
  // Initialize default mode evaluation params
3335
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3336
3337
0
  if (intra_yrd < best_rd) {
3338
    // Search intra modes for uv planes if needed
3339
0
    if (num_planes > 1) {
3340
      // Set up the tx variables for reproducing the y predictions in case we
3341
      // need it for chroma-from-luma.
3342
0
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3343
0
        memcpy(txfm_info->blk_skip, ctx->blk_skip,
3344
0
               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3345
0
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3346
0
      }
3347
0
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3348
0
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3349
0
                                  &dist_uv, &uv_skip_txfm, bsize,
3350
0
                                  max_uv_tx_size);
3351
0
    }
3352
3353
    // Intra block is always coded as non-skip
3354
0
    rd_cost->rate =
3355
0
        rate_y + rate_uv +
3356
0
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3357
0
    rd_cost->dist = dist_y + dist_uv;
3358
0
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3359
0
    rd_cost->skip_txfm = 0;
3360
0
  } else {
3361
0
    rd_cost->rate = INT_MAX;
3362
0
  }
3363
3364
0
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3365
0
    best_rd = rd_cost->rdcost;
3366
0
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3367
0
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3368
0
    memcpy(ctx->blk_skip, txfm_info->blk_skip,
3369
0
           sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3370
0
    assert(rd_cost->rate != INT_MAX);
3371
0
  }
3372
0
  if (rd_cost->rate == INT_MAX) return;
3373
3374
0
  ctx->mic = *xd->mi[0];
3375
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3376
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3377
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3378
0
}
3379
3380
static inline void calc_target_weighted_pred(
3381
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3382
    const uint8_t *above, int above_stride, const uint8_t *left,
3383
    int left_stride);
3384
3385
static inline void rd_pick_skip_mode(
3386
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3387
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3388
0
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3389
0
  const AV1_COMMON *const cm = &cpi->common;
3390
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3391
0
  const int num_planes = av1_num_planes(cm);
3392
0
  MACROBLOCKD *const xd = &x->e_mbd;
3393
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3394
3395
0
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3396
0
  RD_STATS skip_mode_rd_stats;
3397
0
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3398
3399
0
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3400
0
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3401
0
    return;
3402
0
  }
3403
3404
0
  const MV_REFERENCE_FRAME ref_frame =
3405
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3406
0
  const MV_REFERENCE_FRAME second_ref_frame =
3407
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3408
0
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3409
0
  const THR_MODES mode_index =
3410
0
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3411
3412
0
  if (mode_index == THR_INVALID) {
3413
0
    return;
3414
0
  }
3415
3416
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3417
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
3418
0
      cpi->all_one_sided_refs) {
3419
0
    return;
3420
0
  }
3421
3422
0
  mbmi->mode = this_mode;
3423
0
  mbmi->uv_mode = UV_DC_PRED;
3424
0
  mbmi->ref_frame[0] = ref_frame;
3425
0
  mbmi->ref_frame[1] = second_ref_frame;
3426
0
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3427
0
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3428
0
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3429
0
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3430
0
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3431
0
      return;
3432
0
    }
3433
0
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3434
0
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3435
0
                     mbmi_ext->mode_context);
3436
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3437
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3438
0
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3439
0
  }
3440
3441
0
  assert(this_mode == NEAREST_NEARESTMV);
3442
0
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3443
0
    return;
3444
0
  }
3445
3446
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3447
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3448
0
  mbmi->comp_group_idx = 0;
3449
0
  mbmi->compound_idx = x->compound_idx;
3450
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3451
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3452
0
  mbmi->ref_mv_idx = 0;
3453
0
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3454
0
  mbmi->palette_mode_info.palette_size[0] = 0;
3455
0
  mbmi->palette_mode_info.palette_size[1] = 0;
3456
3457
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3458
3459
0
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3460
0
  for (int i = 0; i < num_planes; i++) {
3461
0
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3462
0
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3463
0
  }
3464
3465
0
  BUFFER_SET orig_dst;
3466
0
  for (int i = 0; i < num_planes; i++) {
3467
0
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3468
0
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3469
0
  }
3470
3471
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3472
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3473
0
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3474
0
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3475
0
    const ModeCosts *mode_costs = &x->mode_costs;
3476
0
    best_intra_inter_mode_cost = RDCOST(
3477
0
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3478
0
        rd_cost->dist);
3479
    // Account for non-skip mode rate in total rd stats
3480
0
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3481
0
    av1_rd_cost_update(x->rdmult, rd_cost);
3482
0
  }
3483
3484
  // Obtain the rdcost for skip_mode.
3485
0
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3486
0
               best_intra_inter_mode_cost);
3487
3488
0
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3489
0
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3490
0
    assert(mode_index != THR_INVALID);
3491
0
    search_state->best_mbmode.skip_mode = 1;
3492
0
    search_state->best_mbmode = *mbmi;
3493
0
    memset(search_state->best_mbmode.inter_tx_size,
3494
0
           search_state->best_mbmode.tx_size,
3495
0
           sizeof(search_state->best_mbmode.inter_tx_size));
3496
0
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3497
0
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3498
0
                  xd);
3499
0
    search_state->best_mode_index = mode_index;
3500
3501
    // Update rd_cost
3502
0
    rd_cost->rate = skip_mode_rd_stats.rate;
3503
0
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3504
0
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3505
3506
0
    search_state->best_rd = rd_cost->rdcost;
3507
0
    search_state->best_skip2 = 1;
3508
0
    search_state->best_mode_skippable = 1;
3509
3510
0
    x->txfm_search_info.skip_txfm = 1;
3511
0
  }
3512
0
}
3513
3514
// Get winner mode stats of given mode index
3515
static inline MB_MODE_INFO *get_winner_mode_stats(
3516
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3517
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3518
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3519
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3520
0
    int mode_idx) {
3521
0
  MB_MODE_INFO *winner_mbmi;
3522
0
  if (multi_winner_mode_type) {
3523
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3524
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3525
0
    winner_mbmi = &winner_mode_stat->mbmi;
3526
3527
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3528
0
    *winner_rate_y = winner_mode_stat->rate_y;
3529
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3530
0
    *winner_mode_index = winner_mode_stat->mode_index;
3531
0
  } else {
3532
0
    winner_mbmi = best_mbmode;
3533
0
    *winner_rd_cost = best_rd_cost;
3534
0
    *winner_rate_y = best_rate_y;
3535
0
    *winner_rate_uv = best_rate_uv;
3536
0
    *winner_mode_index = *best_mode_index;
3537
0
  }
3538
0
  return winner_mbmi;
3539
0
}
3540
3541
// speed feature: fast intra/inter transform type search
3542
// Used for speed >= 2
3543
// When this speed feature is on, in rd mode search, only DCT is used.
3544
// After the mode is determined, this function is called, to select
3545
// transform types and get accurate rdcost.
3546
static inline void refine_winner_mode_tx(
3547
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3548
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3549
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3550
0
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3551
0
  const AV1_COMMON *const cm = &cpi->common;
3552
0
  MACROBLOCKD *const xd = &x->e_mbd;
3553
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3554
0
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3555
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3556
0
  int64_t best_rd;
3557
0
  const int num_planes = av1_num_planes(cm);
3558
3559
0
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3560
0
                                         rd_cost->skip_txfm))
3561
0
    return;
3562
3563
  // Set params for winner mode evaluation
3564
0
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3565
3566
  // No best mode identified so far
3567
0
  if (*best_mode_index == THR_INVALID) return;
3568
3569
0
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3570
0
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3571
0
    RD_STATS *winner_rd_stats = NULL;
3572
0
    int winner_rate_y = 0, winner_rate_uv = 0;
3573
0
    THR_MODES winner_mode_index = 0;
3574
3575
    // TODO(any): Combine best mode and multi-winner mode processing paths
3576
    // Get winner mode stats for current mode index
3577
0
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3578
0
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3579
0
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3580
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3581
3582
0
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3583
0
        winner_mode_index != THR_INVALID &&
3584
0
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3585
0
                                          rd_cost->skip_txfm)) {
3586
0
      RD_STATS rd_stats = *winner_rd_stats;
3587
0
      int skip_blk = 0;
3588
0
      RD_STATS rd_stats_y, rd_stats_uv;
3589
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3590
3591
0
      *mbmi = *winner_mbmi;
3592
3593
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3594
3595
      // Select prediction reference frames.
3596
0
      for (int i = 0; i < num_planes; i++) {
3597
0
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3598
0
        if (has_second_ref(mbmi))
3599
0
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3600
0
      }
3601
3602
0
      if (is_inter_mode(mbmi->mode)) {
3603
0
        const int mi_row = xd->mi_row;
3604
0
        const int mi_col = xd->mi_col;
3605
0
        bool is_predictor_built = false;
3606
0
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3607
        // Do interpolation filter search for realtime mode if applicable.
3608
0
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3609
0
            cpi->oxcf.mode == REALTIME &&
3610
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3611
0
            is_inter_mode(prediction_mode) &&
3612
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3613
0
            !is_inter_compound_mode(prediction_mode)) {
3614
0
          is_predictor_built =
3615
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3616
0
        }
3617
0
        if (!is_predictor_built) {
3618
0
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3619
0
                                        av1_num_planes(cm) - 1);
3620
0
        }
3621
0
        if (mbmi->motion_mode == OBMC_CAUSAL)
3622
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3623
3624
0
        av1_subtract_plane(x, bsize, 0);
3625
0
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3626
0
            !xd->lossless[mbmi->segment_id]) {
3627
0
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3628
0
                                              INT64_MAX);
3629
0
          assert(rd_stats_y.rate != INT_MAX);
3630
0
        } else {
3631
0
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3632
0
                                            INT64_MAX);
3633
0
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3634
0
                 sizeof(mbmi->inter_tx_size));
3635
0
          for (int i = 0; i < xd->height * xd->width; ++i)
3636
0
            set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3637
0
        }
3638
0
      } else {
3639
0
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3640
0
                                          INT64_MAX);
3641
0
      }
3642
3643
0
      if (num_planes > 1) {
3644
0
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3645
0
      } else {
3646
0
        av1_init_rd_stats(&rd_stats_uv);
3647
0
      }
3648
3649
0
      const ModeCosts *mode_costs = &x->mode_costs;
3650
0
      if (is_inter_mode(mbmi->mode) &&
3651
0
          RDCOST(x->rdmult,
3652
0
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3653
0
                     rd_stats_uv.rate,
3654
0
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3655
0
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3656
0
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3657
0
        skip_blk = 1;
3658
0
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3659
0
        rd_stats_uv.rate = 0;
3660
0
        rd_stats_y.dist = rd_stats_y.sse;
3661
0
        rd_stats_uv.dist = rd_stats_uv.sse;
3662
0
      } else {
3663
0
        skip_blk = 0;
3664
0
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3665
0
      }
3666
0
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3667
0
                      winner_rate_y - winner_rate_uv;
3668
0
      int64_t this_rd =
3669
0
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3670
0
      if (best_rd > this_rd) {
3671
0
        *best_mbmode = *mbmi;
3672
0
        *best_mode_index = winner_mode_index;
3673
0
        av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3674
0
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3675
0
        rd_cost->rate = this_rate;
3676
0
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3677
0
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3678
0
        rd_cost->rdcost = this_rd;
3679
0
        best_rd = this_rd;
3680
0
        *best_skip2 = skip_blk;
3681
0
      }
3682
0
    }
3683
0
  }
3684
0
}
3685
3686
/*!\cond */
3687
typedef struct {
3688
  // Mask for each reference frame, specifying which prediction modes to NOT try
3689
  // during search.
3690
  uint32_t pred_modes[REF_FRAMES];
3691
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3692
  // reference frames (i, j).
3693
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3694
  // (NONE_FRAME).
3695
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3696
} mode_skip_mask_t;
3697
/*!\endcond */
3698
3699
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
3700
static inline void disable_reference(
3701
0
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3702
0
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3703
0
    ref_combo[ref][ref2 + 1] = true;
3704
0
  }
3705
0
}
3706
3707
// Update 'ref_combo' mask to disable all inter references except ALTREF.
3708
static inline void disable_inter_references_except_altref(
3709
0
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3710
0
  disable_reference(LAST_FRAME, ref_combo);
3711
0
  disable_reference(LAST2_FRAME, ref_combo);
3712
0
  disable_reference(LAST3_FRAME, ref_combo);
3713
0
  disable_reference(GOLDEN_FRAME, ref_combo);
3714
0
  disable_reference(BWDREF_FRAME, ref_combo);
3715
0
  disable_reference(ALTREF2_FRAME, ref_combo);
3716
0
}
3717
3718
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3719
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3720
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3721
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3722
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3723
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3724
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3725
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3726
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3727
};
3728
3729
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3730
3731
0
static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
3732
0
  if (ref_set == REF_SET_FULL) {
3733
    // Everything available by default.
3734
0
    memset(mask, 0, sizeof(*mask));
3735
0
  } else {
3736
    // All modes available by default.
3737
0
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3738
    // All references disabled first.
3739
0
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3740
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3741
0
        mask->ref_combo[ref1][ref2 + 1] = true;
3742
0
      }
3743
0
    }
3744
0
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3745
0
    int num_ref_combos;
3746
3747
    // Then enable reduced set of references explicitly.
3748
0
    switch (ref_set) {
3749
0
      case REF_SET_REDUCED:
3750
0
        ref_set_combos = reduced_ref_combos;
3751
0
        num_ref_combos =
3752
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3753
0
        break;
3754
0
      case REF_SET_REALTIME:
3755
0
        ref_set_combos = real_time_ref_combos;
3756
0
        num_ref_combos =
3757
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3758
0
        break;
3759
0
      default: assert(0); num_ref_combos = 0;
3760
0
    }
3761
3762
0
    for (int i = 0; i < num_ref_combos; ++i) {
3763
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3764
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3765
0
    }
3766
0
  }
3767
0
}
3768
3769
static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
3770
                                       const AV1_COMP *cpi, MACROBLOCK *x,
3771
0
                                       BLOCK_SIZE bsize) {
3772
0
  const AV1_COMMON *const cm = &cpi->common;
3773
0
  const struct segmentation *const seg = &cm->seg;
3774
0
  MACROBLOCKD *const xd = &x->e_mbd;
3775
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3776
0
  unsigned char segment_id = mbmi->segment_id;
3777
0
  const SPEED_FEATURES *const sf = &cpi->sf;
3778
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3779
0
  REF_SET ref_set = REF_SET_FULL;
3780
3781
0
  if (sf->rt_sf.use_real_time_ref_set)
3782
0
    ref_set = REF_SET_REALTIME;
3783
0
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3784
0
    ref_set = REF_SET_REDUCED;
3785
3786
0
  default_skip_mask(mask, ref_set);
3787
3788
0
  int min_pred_mv_sad = INT_MAX;
3789
0
  MV_REFERENCE_FRAME ref_frame;
3790
0
  if (ref_set == REF_SET_REALTIME) {
3791
    // For real-time encoding, we only look at a subset of ref frames. So the
3792
    // threshold for pruning should be computed from this subset as well.
3793
0
    const int num_rt_refs =
3794
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3795
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3796
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3797
0
      if (ref != INTRA_FRAME) {
3798
0
        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3799
0
      }
3800
0
    }
3801
0
  } else {
3802
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3803
0
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3804
0
  }
3805
3806
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3807
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3808
      // Skip checking missing reference in both single and compound reference
3809
      // modes.
3810
0
      disable_reference(ref_frame, mask->ref_combo);
3811
0
    } else {
3812
      // Skip fixed mv modes for poor references
3813
0
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3814
0
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3815
0
      }
3816
0
    }
3817
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3818
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3819
      // Reference not used for the segment.
3820
0
      disable_reference(ref_frame, mask->ref_combo);
3821
0
    }
3822
0
  }
3823
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3824
  // is disabled for this segment. This is to prevent the possibility that we
3825
  // end up unable to pick any mode.
3826
0
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3827
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3828
    // unless ARNR filtering is enabled in which case we want
3829
    // an unfiltered alternative. We allow near/nearest as well
3830
    // because they may result in zero-zero MVs but be cheaper.
3831
0
    if (cpi->rc.is_src_frame_alt_ref &&
3832
0
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3833
0
      disable_inter_references_except_altref(mask->ref_combo);
3834
3835
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3836
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3837
0
      int_mv near_mv, nearest_mv, global_mv;
3838
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3839
0
                  &x->mbmi_ext);
3840
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3841
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3842
3843
0
      if (near_mv.as_int != global_mv.as_int)
3844
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3845
0
      if (nearest_mv.as_int != global_mv.as_int)
3846
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3847
0
    }
3848
0
  }
3849
3850
0
  if (cpi->rc.is_src_frame_alt_ref) {
3851
0
    if (inter_sf->alt_ref_search_fp &&
3852
0
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3853
0
      mask->pred_modes[ALTREF_FRAME] = 0;
3854
0
      disable_inter_references_except_altref(mask->ref_combo);
3855
0
      disable_reference(INTRA_FRAME, mask->ref_combo);
3856
0
    }
3857
0
  }
3858
3859
0
  if (inter_sf->alt_ref_search_fp) {
3860
0
    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3861
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3862
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3863
      // those are past frames
3864
0
      MV_REFERENCE_FRAME start_frame =
3865
0
          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3866
0
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3867
0
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3868
0
            0) {
3869
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3870
          // to the relative dist of LAST_FRAME.
3871
0
          if (inter_sf->alt_ref_search_fp == 1 &&
3872
0
              (abs(cpi->ref_frame_dist_info
3873
0
                       .ref_relative_dist[ref_frame - LAST_FRAME]) >
3874
0
               1.5 * abs(cpi->ref_frame_dist_info
3875
0
                             .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3876
0
            continue;
3877
0
          }
3878
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
3879
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
3880
0
        }
3881
0
      }
3882
0
    }
3883
0
  }
3884
3885
0
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3886
0
    if (x->best_pred_mv_sad[0] < INT_MAX) {
3887
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3888
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3889
3890
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3891
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3892
0
        ref_frame = prune_ref_list[ref_idx];
3893
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
3894
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3895
0
      }
3896
0
    }
3897
0
  }
3898
3899
0
  if (bsize > sf->part_sf.max_intra_bsize) {
3900
0
    disable_reference(INTRA_FRAME, mask->ref_combo);
3901
0
  }
3902
3903
0
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3904
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3905
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3906
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3907
0
    }
3908
0
  }
3909
3910
0
  mask->pred_modes[INTRA_FRAME] |=
3911
0
      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3912
3913
  // Prune reference frames which are not the closest to the current
3914
  // frame and with large pred_mv_sad.
3915
0
  if (inter_sf->prune_single_ref) {
3916
0
    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3917
0
    const double prune_threshes[2] = { 1.20, 1.05 };
3918
3919
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3920
0
      const RefFrameDistanceInfo *const ref_frame_dist_info =
3921
0
          &cpi->ref_frame_dist_info;
3922
0
      const int is_closest_ref =
3923
0
          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3924
0
          (ref_frame == ref_frame_dist_info->nearest_future_ref);
3925
3926
0
      if (!is_closest_ref) {
3927
0
        const int dir =
3928
0
            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3929
0
                ? 0
3930
0
                : 1;
3931
0
        if (x->best_pred_mv_sad[dir] < INT_MAX &&
3932
0
            x->pred_mv_sad[ref_frame] >
3933
0
                prune_threshes[inter_sf->prune_single_ref - 1] *
3934
0
                    x->best_pred_mv_sad[dir])
3935
0
          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3936
0
      }
3937
0
    }
3938
0
  }
3939
0
}
3940
3941
static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
3942
                                          HandleInterModeArgs *const args,
3943
0
                                          int is_hbd) {
3944
0
  if (is_hbd) {
3945
0
    const int len = sizeof(uint16_t);
3946
0
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3947
0
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3948
0
                                                 (MAX_SB_SQUARE >> 1) * len);
3949
0
    args->above_pred_buf[2] =
3950
0
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3951
0
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3952
0
    args->left_pred_buf[1] =
3953
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3954
0
    args->left_pred_buf[2] =
3955
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3956
0
  } else {
3957
0
    args->above_pred_buf[0] = obmc_buffer->above_pred;
3958
0
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3959
0
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3960
0
    args->left_pred_buf[0] = obmc_buffer->left_pred;
3961
0
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3962
0
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3963
0
  }
3964
0
}
3965
3966
static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3967
0
                                  MV_REFERENCE_FRAME ref_frame) {
3968
0
  const AV1_COMMON *const cm = &cpi->common;
3969
0
  MV_REFERENCE_FRAME rf[2];
3970
0
  av1_set_ref_frame(rf, ref_frame);
3971
3972
0
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3973
3974
0
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3975
0
                                       cm->cur_frame->ref_display_order_hint)) {
3976
0
    return 1;
3977
0
  }
3978
3979
0
  return 0;
3980
0
}
3981
3982
static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
3983
0
                                                    int skip_ref_frame_mask) {
3984
0
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3985
0
    if (!(skip_ref_frame_mask & (1 << r))) {
3986
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3987
0
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
3988
0
        return 1;
3989
0
      }
3990
0
    }
3991
0
  }
3992
0
  return 0;
3993
0
}
3994
3995
static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3996
0
                                             const MB_MODE_INFO *mi_cache) {
3997
0
  if (!mi_cache) {
3998
0
    return 0;
3999
0
  }
4000
4001
0
  if (ref_frame < REF_FRAMES) {
4002
0
    return (ref_frame == mi_cache->ref_frame[0] ||
4003
0
            ref_frame == mi_cache->ref_frame[1]);
4004
0
  }
4005
4006
  // if we are here, then the current mode is compound.
4007
0
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4008
0
  return ref_frame == cached_ref_type;
4009
0
}
4010
4011
// Please add/modify parameter setting in this function, making it consistent
4012
// and easy to read and maintain.
4013
static inline void set_params_rd_pick_inter_mode(
4014
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4015
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4016
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4017
0
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4018
0
  const AV1_COMMON *const cm = &cpi->common;
4019
0
  MACROBLOCKD *const xd = &x->e_mbd;
4020
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4021
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4022
0
  unsigned char segment_id = mbmi->segment_id;
4023
4024
0
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4025
0
  av1_collect_neighbors_ref_counts(xd);
4026
0
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4027
0
                           ref_costs_comp);
4028
4029
0
  const int mi_row = xd->mi_row;
4030
0
  const int mi_col = xd->mi_col;
4031
0
  x->best_pred_mv_sad[0] = INT_MAX;
4032
0
  x->best_pred_mv_sad[1] = INT_MAX;
4033
4034
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4035
0
       ++ref_frame) {
4036
0
    x->pred_mv_sad[ref_frame] = INT_MAX;
4037
0
    mbmi_ext->mode_context[ref_frame] = 0;
4038
0
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4039
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4040
      // Skip the ref frame if the mask says skip and the ref is not used by
4041
      // compound ref.
4042
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4043
0
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4044
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4045
0
        continue;
4046
0
      }
4047
0
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4048
0
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4049
0
    }
4050
0
    if (cpi->sf.inter_sf.alt_ref_search_fp ||
4051
0
        cpi->sf.inter_sf.prune_single_ref ||
4052
0
        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4053
      // Store the best pred_mv_sad across all past frames
4054
0
      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4055
0
          0)
4056
0
        x->best_pred_mv_sad[0] =
4057
0
            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4058
0
      else
4059
        // Store the best pred_mv_sad across all future frames
4060
0
        x->best_pred_mv_sad[1] =
4061
0
            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4062
0
    }
4063
0
  }
4064
4065
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4066
    // No second reference on RT ref set, so no need to initialize
4067
0
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4068
0
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4069
0
      mbmi_ext->mode_context[ref_frame] = 0;
4070
0
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4071
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4072
0
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4073
0
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4074
0
        continue;
4075
0
      }
4076
4077
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4078
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4079
0
        continue;
4080
0
      }
4081
      // Ref mv list population is not required, when compound references are
4082
      // pruned.
4083
0
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4084
4085
0
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4086
0
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4087
0
                       mbmi_ext->mode_context);
4088
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4089
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4090
0
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4091
0
    }
4092
0
  }
4093
4094
0
  av1_count_overlappable_neighbors(cm, xd);
4095
0
  const FRAME_UPDATE_TYPE update_type =
4096
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4097
0
  int use_actual_frame_probs = 1;
4098
0
  int prune_obmc;
4099
#if CONFIG_FPMT_TEST
4100
  use_actual_frame_probs =
4101
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4102
  if (!use_actual_frame_probs) {
4103
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4104
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4105
  }
4106
#endif
4107
0
  if (use_actual_frame_probs) {
4108
0
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4109
0
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4110
0
  }
4111
0
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4112
0
    if (check_num_overlappable_neighbors(mbmi) &&
4113
0
        is_motion_variation_allowed_bsize(bsize)) {
4114
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4115
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4116
0
                                       MAX_SB_SIZE >> 1 };
4117
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4118
0
                                        MAX_SB_SIZE >> 1 };
4119
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4120
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4121
0
                                          dst_width1, dst_height1,
4122
0
                                          args->above_pred_stride);
4123
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4124
0
                                         dst_width2, dst_height2,
4125
0
                                         args->left_pred_stride);
4126
0
      const int num_planes = av1_num_planes(cm);
4127
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4128
0
                           mi_col, 0, num_planes);
4129
0
      calc_target_weighted_pred(
4130
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4131
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4132
0
    }
4133
0
  }
4134
4135
0
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4136
4137
  // Set params for mode evaluation
4138
0
  set_mode_eval_params(cpi, x, MODE_EVAL);
4139
4140
0
  x->comp_rd_stats_idx = 0;
4141
4142
0
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4143
0
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4144
0
  }
4145
0
}
4146
4147
static inline void init_single_inter_mode_search_state(
4148
0
    InterModeSearchState *search_state) {
4149
0
  for (int dir = 0; dir < 2; ++dir) {
4150
0
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4151
0
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4152
0
        SingleInterModeState *state;
4153
4154
0
        state = &search_state->single_state[dir][mode][ref_frame];
4155
0
        state->ref_frame = NONE_FRAME;
4156
0
        state->rd = INT64_MAX;
4157
4158
0
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4159
0
        state->ref_frame = NONE_FRAME;
4160
0
        state->rd = INT64_MAX;
4161
4162
0
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4163
0
      }
4164
0
    }
4165
0
  }
4166
4167
0
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4168
0
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4169
0
    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4170
0
  }
4171
0
  av1_zero(search_state->single_state_cnt);
4172
0
  av1_zero(search_state->single_state_modelled_cnt);
4173
0
}
4174
4175
static inline void init_inter_mode_search_state(
4176
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4177
0
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4178
0
  init_intra_mode_search_state(&search_state->intra_search_state);
4179
0
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4180
4181
0
  search_state->best_rd = best_rd_so_far;
4182
0
  search_state->best_skip_rd[0] = INT64_MAX;
4183
0
  search_state->best_skip_rd[1] = INT64_MAX;
4184
4185
0
  av1_zero(search_state->best_mbmode);
4186
4187
0
  search_state->best_rate_y = INT_MAX;
4188
4189
0
  search_state->best_rate_uv = INT_MAX;
4190
4191
0
  search_state->best_mode_skippable = 0;
4192
4193
0
  search_state->best_skip2 = 0;
4194
4195
0
  search_state->best_mode_index = THR_INVALID;
4196
4197
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4198
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4199
0
  const unsigned char segment_id = mbmi->segment_id;
4200
4201
0
  search_state->num_available_refs = 0;
4202
0
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4203
0
  memset(search_state->dist_order_refs, -1,
4204
0
         sizeof(search_state->dist_order_refs));
4205
4206
0
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4207
0
    search_state->mode_threshold[i] = 0;
4208
0
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4209
0
  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4210
0
    search_state->mode_threshold[i] =
4211
0
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4212
0
        RD_THRESH_FAC_FRAC_BITS;
4213
4214
0
  search_state->best_intra_rd = INT64_MAX;
4215
4216
0
  search_state->best_pred_sse = UINT_MAX;
4217
4218
0
  av1_zero(search_state->single_newmv);
4219
0
  av1_zero(search_state->single_newmv_rate);
4220
0
  av1_zero(search_state->single_newmv_valid);
4221
0
  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4222
0
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4223
0
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4224
0
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4225
0
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4226
0
      }
4227
0
    }
4228
0
  }
4229
4230
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4231
0
    search_state->best_pred_rd[i] = INT64_MAX;
4232
0
  }
4233
4234
0
  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4235
0
    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4236
0
      search_state->mode_threshold[i] =
4237
0
          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4238
0
          RD_THRESH_FAC_FRAC_BITS;
4239
4240
0
    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4241
0
      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4242
0
        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4243
0
          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4244
0
          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4245
0
        }
4246
0
      }
4247
0
    }
4248
4249
0
    init_single_inter_mode_search_state(search_state);
4250
0
  }
4251
0
}
4252
4253
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4254
                           const MV_REFERENCE_FRAME *ref_frame,
4255
0
                           const PREDICTION_MODE this_mode) {
4256
0
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4257
0
    return true;
4258
0
  }
4259
4260
0
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4261
0
}
4262
4263
static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4264
                                      BLOCK_SIZE bsize,
4265
                                      PREDICTION_MODE curr_mode,
4266
0
                                      const MV_REFERENCE_FRAME *ref_frames) {
4267
0
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4268
0
  if (comp_pred) {
4269
0
    if (!is_comp_ref_allowed(bsize)) return 1;
4270
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4271
0
      return 1;
4272
0
    }
4273
4274
0
    const AV1_COMMON *const cm = &cpi->common;
4275
0
    if (frame_is_intra_only(cm)) return 1;
4276
4277
0
    const CurrentFrame *const current_frame = &cm->current_frame;
4278
0
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4279
4280
0
    const struct segmentation *const seg = &cm->seg;
4281
0
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4282
    // Do not allow compound prediction if the segment level reference frame
4283
    // feature is in use as in this case there can only be one reference.
4284
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4285
0
  }
4286
4287
0
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4288
    // Mode must be compatible
4289
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4290
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4291
0
  }
4292
4293
0
  return 0;
4294
0
}
4295
4296
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4297
0
                                        BLOCK_SIZE bsize, int mib_size) {
4298
0
  const int sb_size_mask = mib_size - 1;
4299
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4300
0
  const int mi_row = xd->mi_row;
4301
0
  const int mi_col = xd->mi_col;
4302
0
  const int mi_row_in_sb = mi_row & sb_size_mask;
4303
0
  const int mi_col_in_sb = mi_col & sb_size_mask;
4304
0
  const int mi_w = mi_size_wide[bsize];
4305
0
  const int mi_h = mi_size_high[bsize];
4306
0
  int picked_ref_frames_mask = 0;
4307
0
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4308
0
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4309
0
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4310
0
    }
4311
0
  }
4312
0
  return picked_ref_frames_mask;
4313
0
}
4314
4315
// Check if reference frame pair of the current block matches with the given
4316
// block.
4317
static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4318
0
                                       const MV_REFERENCE_FRAME *ref_frames) {
4319
0
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4320
0
          (ref_frames[1] == mbmi->ref_frame[1]));
4321
0
}
4322
4323
// Case 1: return 0, means don't skip this mode
4324
// Case 2: return 1, means skip this mode completely
4325
// Case 3: return 2, means skip compound only, but still try single motion modes
4326
static int inter_mode_search_order_independent_skip(
4327
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4328
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4329
0
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4330
0
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4331
0
    return 1;
4332
0
  }
4333
4334
0
  const int ref_type = av1_ref_frame_type(ref_frame);
4335
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set)
4336
0
    if (prune_ref_frame(cpi, x, ref_type)) return 1;
4337
4338
  // This is only used in motion vector unit test.
4339
0
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4340
0
      ref_frame[0] == INTRA_FRAME)
4341
0
    return 1;
4342
4343
0
  const AV1_COMMON *const cm = &cpi->common;
4344
0
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4345
0
    return 1;
4346
0
  }
4347
4348
  // Reuse the prediction mode in cache
4349
0
  if (x->use_mb_mode_cache) {
4350
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4351
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4352
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4353
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4354
4355
    // If the cached mode is intra, then we just need to match the mode.
4356
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4357
0
      return 1;
4358
0
    }
4359
4360
    // If the cached mode is single inter mode, then we match the mode and
4361
    // reference frame.
4362
0
    if (cached_mode_is_single) {
4363
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4364
0
        return 1;
4365
0
      }
4366
0
    } else {
4367
      // If the cached mode is compound, then we need to consider several cases.
4368
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4369
0
      if (mode_is_single) {
4370
        // If the mode is single, we know the modes can't match. But we might
4371
        // still want to search it if compound mode depends on the current mode.
4372
0
        int skip_motion_mode_only = 0;
4373
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4374
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4375
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4376
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4377
0
        } else if (cached_mode == NEW_NEWMV) {
4378
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4379
0
                                   ref_frame[0] == cached_frame[1]);
4380
0
        }
4381
4382
0
        return 1 + skip_motion_mode_only;
4383
0
      } else {
4384
        // If both modes are compound, then everything must match.
4385
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4386
0
            ref_frame[1] != cached_frame[1]) {
4387
0
          return 1;
4388
0
        }
4389
0
      }
4390
0
    }
4391
0
  }
4392
4393
0
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4394
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4395
  // valid partition is required, do not skip mode.
4396
0
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4397
0
      x->must_find_valid_partition)
4398
0
    return 0;
4399
4400
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4401
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4402
  // frames
4403
0
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4404
0
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4405
0
    const MACROBLOCKD *const xd = &x->e_mbd;
4406
0
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4407
0
        xd->up_available) {
4408
0
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4409
0
                                                    { 1, 1, 0 },
4410
0
                                                    { 2, 1, 0 } };
4411
0
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4412
4413
0
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4414
0
             qindex_sub_range < 3);
4415
0
      const int num_ref_frame_pair_match_thresh =
4416
0
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4417
0
                    [qindex_sub_range];
4418
4419
0
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4420
0
             num_ref_frame_pair_match_thresh >= 0);
4421
0
      int num_ref_frame_pair_match = 0;
4422
4423
0
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4424
0
      num_ref_frame_pair_match +=
4425
0
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4426
4427
      // Pruning based on ref frame pair match with neighbors.
4428
0
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4429
0
    }
4430
0
  }
4431
4432
0
  int skip_motion_mode = 0;
4433
0
  if (mbmi->partition != PARTITION_NONE) {
4434
0
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4435
0
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4436
      // Since the compound ref modes depends on the motion estimation result of
4437
      // two single ref modes (best mv of single ref modes as the start point),
4438
      // if current single ref mode is marked skip, we need to check if it will
4439
      // be used in compound ref modes.
4440
0
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4441
        // Found a not skipped compound ref mode which contains current
4442
        // single ref. So this single ref can't be skipped completely
4443
        // Just skip its motion mode search, still try its simple
4444
        // transition mode.
4445
0
        skip_motion_mode = 1;
4446
0
        skip_ref = 0;
4447
0
      }
4448
0
    }
4449
    // If we are reusing the prediction from cache, and the current frame is
4450
    // required by the cache, then we cannot prune it.
4451
0
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4452
0
      skip_ref = 0;
4453
      // If the cache only needs the current reference type for compound
4454
      // prediction, then we can skip motion mode search.
4455
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4456
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4457
0
    }
4458
0
    if (skip_ref) return 1;
4459
0
  }
4460
4461
0
  if (ref_frame[0] == INTRA_FRAME) {
4462
0
    if (mode != DC_PRED) {
4463
      // Disable intra modes other than DC_PRED for blocks with low variance
4464
      // Threshold for intra skipping based on source variance
4465
      // TODO(debargha): Specialize the threshold for super block sizes
4466
0
      const unsigned int skip_intra_var_thresh = 64;
4467
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4468
0
          x->source_variance < skip_intra_var_thresh)
4469
0
        return 1;
4470
0
    }
4471
0
  }
4472
4473
0
  if (skip_motion_mode) return 2;
4474
4475
0
  return 0;
4476
0
}
4477
4478
static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4479
                             const MV_REFERENCE_FRAME *ref_frames,
4480
0
                             const AV1_COMMON *cm) {
4481
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4482
0
  mbmi->ref_mv_idx = 0;
4483
0
  mbmi->mode = curr_mode;
4484
0
  mbmi->uv_mode = UV_DC_PRED;
4485
0
  mbmi->ref_frame[0] = ref_frames[0];
4486
0
  mbmi->ref_frame[1] = ref_frames[1];
4487
0
  pmi->palette_size[0] = 0;
4488
0
  pmi->palette_size[1] = 0;
4489
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4490
0
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4491
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4492
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4493
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4494
0
}
4495
4496
static inline void collect_single_states(MACROBLOCK *x,
4497
                                         InterModeSearchState *search_state,
4498
0
                                         const MB_MODE_INFO *const mbmi) {
4499
0
  int i, j;
4500
0
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4501
0
  const PREDICTION_MODE this_mode = mbmi->mode;
4502
0
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4503
0
  const int mode_offset = INTER_OFFSET(this_mode);
4504
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4505
4506
  // Simple rd
4507
0
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4508
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4509
0
    const int64_t rd =
4510
0
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4511
0
    if (rd < simple_rd) simple_rd = rd;
4512
0
  }
4513
4514
  // Insertion sort of single_state
4515
0
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4516
0
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4517
0
  i = search_state->single_state_cnt[dir][mode_offset];
4518
0
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4519
0
    state_s[j] = state_s[j - 1];
4520
0
  state_s[j] = this_state_s;
4521
0
  search_state->single_state_cnt[dir][mode_offset]++;
4522
4523
  // Modelled rd
4524
0
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4525
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4526
0
    const int64_t rd =
4527
0
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4528
0
    if (rd < modelled_rd) modelled_rd = rd;
4529
0
  }
4530
4531
  // Insertion sort of single_state_modelled
4532
0
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4533
0
  SingleInterModeState *state_m =
4534
0
      search_state->single_state_modelled[dir][mode_offset];
4535
0
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4536
0
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4537
0
    state_m[j] = state_m[j - 1];
4538
0
  state_m[j] = this_state_m;
4539
0
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4540
0
}
4541
4542
static inline void analyze_single_states(const AV1_COMP *cpi,
4543
0
                                         InterModeSearchState *search_state) {
4544
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4545
0
  assert(prune_level >= 1);
4546
0
  int i, j, dir, mode;
4547
4548
0
  for (dir = 0; dir < 2; ++dir) {
4549
0
    int64_t best_rd;
4550
0
    SingleInterModeState(*state)[FWD_REFS];
4551
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4552
4553
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4554
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4555
    // have same motion vectors). Always keep the best of each mode
4556
    // because it might form the best possible combination with other mode.
4557
0
    state = search_state->single_state[dir];
4558
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4559
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4560
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4561
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4562
0
        if (state[mode][i].rd != INT64_MAX &&
4563
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4564
0
          state[mode][i].valid = 0;
4565
0
        }
4566
0
      }
4567
0
    }
4568
4569
0
    state = search_state->single_state_modelled[dir];
4570
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4571
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4572
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4573
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4574
0
        if (state[mode][i].rd != INT64_MAX &&
4575
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4576
0
          state[mode][i].valid = 0;
4577
0
        }
4578
0
      }
4579
0
    }
4580
0
  }
4581
4582
  // Ordering by simple rd first, then by modelled rd
4583
0
  for (dir = 0; dir < 2; ++dir) {
4584
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4585
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4586
0
      const int state_cnt_m =
4587
0
          search_state->single_state_modelled_cnt[dir][mode];
4588
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4589
0
      SingleInterModeState *state_m =
4590
0
          search_state->single_state_modelled[dir][mode];
4591
0
      int count = 0;
4592
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4593
0
      for (i = 0; i < state_cnt_s; ++i) {
4594
0
        if (state_s[i].rd == INT64_MAX) break;
4595
0
        if (state_s[i].valid) {
4596
0
          search_state->single_rd_order[dir][mode][count++] =
4597
0
              state_s[i].ref_frame;
4598
0
        }
4599
0
      }
4600
0
      if (count >= max_candidates) continue;
4601
4602
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4603
0
        if (state_m[i].rd == INT64_MAX) break;
4604
0
        if (!state_m[i].valid) continue;
4605
0
        const int ref_frame = state_m[i].ref_frame;
4606
0
        int match = 0;
4607
        // Check if existing already
4608
0
        for (j = 0; j < count; ++j) {
4609
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4610
0
            match = 1;
4611
0
            break;
4612
0
          }
4613
0
        }
4614
0
        if (match) continue;
4615
        // Check if this ref_frame is removed in simple rd
4616
0
        int valid = 1;
4617
0
        for (j = 0; j < state_cnt_s; ++j) {
4618
0
          if (ref_frame == state_s[j].ref_frame) {
4619
0
            valid = state_s[j].valid;
4620
0
            break;
4621
0
          }
4622
0
        }
4623
0
        if (valid) {
4624
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4625
0
        }
4626
0
      }
4627
0
    }
4628
0
  }
4629
0
}
4630
4631
static int compound_skip_get_candidates(
4632
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4633
0
    const int dir, const PREDICTION_MODE mode) {
4634
0
  const int mode_offset = INTER_OFFSET(mode);
4635
0
  const SingleInterModeState *state =
4636
0
      search_state->single_state[dir][mode_offset];
4637
0
  const SingleInterModeState *state_modelled =
4638
0
      search_state->single_state_modelled[dir][mode_offset];
4639
4640
0
  int max_candidates = 0;
4641
0
  for (int i = 0; i < FWD_REFS; ++i) {
4642
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4643
0
    max_candidates++;
4644
0
  }
4645
4646
0
  int candidates = max_candidates;
4647
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4648
0
    candidates = AOMMIN(2, max_candidates);
4649
0
  }
4650
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4651
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4652
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4653
0
      candidates = 1;
4654
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4655
0
  }
4656
4657
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4658
    // Limit the number of candidates to 1 in each direction for compound
4659
    // prediction
4660
0
    candidates = AOMMIN(1, candidates);
4661
0
  }
4662
0
  return candidates;
4663
0
}
4664
4665
static int compound_skip_by_single_states(
4666
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4667
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4668
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4669
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4670
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4671
0
                        compound_ref1_mode(this_mode) };
4672
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4673
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4674
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4675
0
  int ref_searched[2] = { 0, 0 };
4676
0
  int ref_mv_match[2] = { 1, 1 };
4677
0
  int i, j;
4678
4679
0
  for (i = 0; i < 2; ++i) {
4680
0
    const SingleInterModeState *state =
4681
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
4682
0
    const int state_cnt =
4683
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4684
0
    for (j = 0; j < state_cnt; ++j) {
4685
0
      if (state[j].ref_frame == refs[i]) {
4686
0
        ref_searched[i] = 1;
4687
0
        break;
4688
0
      }
4689
0
    }
4690
0
  }
4691
4692
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4693
0
  for (i = 0; i < 2; ++i) {
4694
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4695
0
      continue;
4696
0
    }
4697
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4698
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4699
0
      int_mv single_mv;
4700
0
      int_mv comp_mv;
4701
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4702
0
                  &x->mbmi_ext);
4703
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4704
0
      if (single_mv.as_int != comp_mv.as_int) {
4705
0
        ref_mv_match[i] = 0;
4706
0
        break;
4707
0
      }
4708
0
    }
4709
0
  }
4710
4711
0
  for (i = 0; i < 2; ++i) {
4712
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
4713
0
    const int candidates =
4714
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4715
0
    const MV_REFERENCE_FRAME *ref_order =
4716
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4717
0
    int match = 0;
4718
0
    for (j = 0; j < candidates; ++j) {
4719
0
      if (refs[i] == ref_order[j]) {
4720
0
        match = 1;
4721
0
        break;
4722
0
      }
4723
0
    }
4724
0
    if (!match) return 1;
4725
0
  }
4726
4727
0
  return 0;
4728
0
}
4729
4730
// Check if ref frames of current block matches with given block.
4731
static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
4732
                                   const MV_REFERENCE_FRAME *ref_frames,
4733
0
                                   int *const is_ref_match) {
4734
0
  if (is_inter_block(mbmi)) {
4735
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4736
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4737
0
    if (has_second_ref(mbmi)) {
4738
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4739
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4740
0
    }
4741
0
  }
4742
0
}
4743
4744
// Prune compound mode using ref frames of neighbor blocks.
4745
static inline int compound_skip_using_neighbor_refs(
4746
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4747
0
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4748
  // Exclude non-extended compound modes from pruning
4749
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4750
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4751
0
    return 0;
4752
4753
0
  if (prune_ext_comp_using_neighbors >= 3) return 1;
4754
4755
0
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4756
                                // 1 - match for backward refs
4757
  // Check if ref frames of this block matches with left neighbor.
4758
0
  if (xd->left_available)
4759
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4760
4761
  // Check if ref frames of this block matches with above neighbor.
4762
0
  if (xd->up_available)
4763
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4764
4765
  // Combine ref frame match with neighbors in forward and backward refs.
4766
0
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4767
4768
  // Pruning based on ref frame match with neighbors.
4769
0
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4770
0
  return 1;
4771
0
}
4772
4773
// Update best single mode for the given reference frame based on simple rd.
4774
static inline void update_best_single_mode(InterModeSearchState *search_state,
4775
                                           const PREDICTION_MODE this_mode,
4776
                                           const MV_REFERENCE_FRAME ref_frame,
4777
0
                                           int64_t this_rd) {
4778
0
  if (this_rd < search_state->best_single_rd[ref_frame]) {
4779
0
    search_state->best_single_rd[ref_frame] = this_rd;
4780
0
    search_state->best_single_mode[ref_frame] = this_mode;
4781
0
  }
4782
0
}
4783
4784
// Prune compound mode using best single mode for the same reference.
4785
static inline int skip_compound_using_best_single_mode_ref(
4786
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4787
    const PREDICTION_MODE *best_single_mode,
4788
0
    int prune_comp_using_best_single_mode_ref) {
4789
  // Exclude non-extended compound modes from pruning
4790
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4791
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4792
0
    return 0;
4793
4794
0
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4795
0
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4796
  // Get ref frame direction corresponding to NEWMV
4797
  // 0 - NEWMV corresponding to forward direction
4798
  // 1 - NEWMV corresponding to backward direction
4799
0
  const int newmv_dir = comp_mode_ref0 != NEWMV;
4800
4801
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4802
  // have NEWMV as single mode winner.
4803
  // Example: For an extended-compound mode,
4804
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4805
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4806
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
4807
  //   ALTREF_FRAME is NEWMV
4808
0
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4809
0
  if (single_mode == NEWMV) return 0;
4810
4811
  // Avoid pruning the compound mode when best single mode is not available
4812
0
  if (prune_comp_using_best_single_mode_ref == 1)
4813
0
    if (single_mode == MB_MODE_COUNT) return 0;
4814
0
  return 1;
4815
0
}
4816
4817
0
static int compare_int64(const void *a, const void *b) {
4818
0
  int64_t a64 = *((int64_t *)a);
4819
0
  int64_t b64 = *((int64_t *)b);
4820
0
  if (a64 < b64) {
4821
0
    return -1;
4822
0
  } else if (a64 == b64) {
4823
0
    return 0;
4824
0
  } else {
4825
0
    return 1;
4826
0
  }
4827
0
}
4828
4829
static inline void update_search_state(
4830
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4831
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4832
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4833
0
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4834
0
  const MACROBLOCKD *xd = &x->e_mbd;
4835
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
4836
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
4837
0
  const int skip_txfm =
4838
0
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4839
0
  const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4840
4841
0
  search_state->best_rd = new_best_rd_stats->rdcost;
4842
0
  search_state->best_mode_index = new_best_mode;
4843
0
  *best_rd_stats_dst = *new_best_rd_stats;
4844
0
  search_state->best_mbmode = *mbmi;
4845
0
  search_state->best_skip2 = skip_txfm;
4846
0
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4847
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4848
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
4849
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4850
  // These two values will be updated when av1_txfm_search is called.
4851
0
  if (txfm_search_done) {
4852
0
    search_state->best_rate_y =
4853
0
        new_best_rd_stats_y->rate +
4854
0
        x->mode_costs.skip_txfm_cost[skip_ctx]
4855
0
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
4856
0
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4857
0
  }
4858
0
  search_state->best_y_rdcost = *new_best_rd_stats_y;
4859
0
  memcpy(ctx->blk_skip, txfm_info->blk_skip,
4860
0
         sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4861
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4862
0
}
4863
4864
// Find the best RD for a reference frame (among single reference modes)
4865
// and store +10% of it in the 0-th element in ref_frame_rd.
4866
0
static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4867
0
  assert(ref_frame_rd[0] == INT64_MAX);
4868
0
  int64_t ref_copy[REF_FRAMES - 1];
4869
0
  memcpy(ref_copy, ref_frame_rd + 1,
4870
0
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4871
0
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4872
4873
0
  int64_t cutoff = ref_copy[0];
4874
  // The cut-off is within 10% of the best.
4875
0
  if (cutoff != INT64_MAX) {
4876
0
    assert(cutoff < INT64_MAX / 200);
4877
0
    cutoff = (110 * cutoff) / 100;
4878
0
  }
4879
0
  ref_frame_rd[0] = cutoff;
4880
0
}
4881
4882
// Check if either frame is within the cutoff.
4883
static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4884
                                        MV_REFERENCE_FRAME frame1,
4885
0
                                        MV_REFERENCE_FRAME frame2) {
4886
0
  assert(frame2 > 0);
4887
0
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4888
0
         ref_frame_rd[frame2] <= ref_frame_rd[0];
4889
0
}
4890
4891
static inline void evaluate_motion_mode_for_winner_candidates(
4892
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4893
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4894
    PICK_MODE_CONTEXT *const ctx,
4895
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4896
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
4897
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4898
0
    InterModeSearchState *const search_state, int64_t *yrd) {
4899
0
  const AV1_COMMON *const cm = &cpi->common;
4900
0
  const int num_planes = av1_num_planes(cm);
4901
0
  MACROBLOCKD *const xd = &x->e_mbd;
4902
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4903
0
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
4904
0
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4905
4906
0
  for (int cand = 0; cand < num_best_cand; cand++) {
4907
0
    RD_STATS rd_stats;
4908
0
    RD_STATS rd_stats_y;
4909
0
    RD_STATS rd_stats_uv;
4910
0
    av1_init_rd_stats(&rd_stats);
4911
0
    av1_init_rd_stats(&rd_stats_y);
4912
0
    av1_init_rd_stats(&rd_stats_uv);
4913
0
    int rate_mv;
4914
4915
0
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4916
0
    args->skip_motion_mode =
4917
0
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4918
0
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4919
0
    rd_stats.rate =
4920
0
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4921
4922
    // Continue if the best candidate is compound.
4923
0
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
4924
4925
0
    x->txfm_search_info.skip_txfm = 0;
4926
0
    struct macroblockd_plane *pd = xd->plane;
4927
0
    const BUFFER_SET orig_dst = {
4928
0
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4929
0
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4930
0
    };
4931
4932
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4933
    // Initialize motion mode to simple translation
4934
    // Calculation of switchable rate depends on it.
4935
0
    mbmi->motion_mode = 0;
4936
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4937
0
    for (int i = 0; i < num_planes; i++) {
4938
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4939
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4940
0
    }
4941
4942
0
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4943
0
                           search_state->best_skip_rd[1] };
4944
0
    int64_t this_yrd = INT64_MAX;
4945
0
    int64_t ret_value = motion_mode_rd(
4946
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4947
0
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4948
0
        do_tx_search, inter_modes_info, 1, &this_yrd);
4949
4950
0
    if (ret_value != INT64_MAX) {
4951
0
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4952
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
4953
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4954
      // Collect mode stats for multiwinner mode processing
4955
0
      store_winner_mode_stats(
4956
0
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4957
0
          mode_enum, NULL, bsize, rd_stats.rdcost,
4958
0
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4959
0
      if (rd_stats.rdcost < search_state->best_rd) {
4960
0
        *yrd = this_yrd;
4961
0
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4962
0
                            &rd_stats_uv, mode_enum, x, do_tx_search);
4963
0
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4964
0
      }
4965
0
    }
4966
0
  }
4967
0
}
4968
4969
/*!\cond */
4970
// Arguments for speed feature pruning of inter mode search
4971
typedef struct {
4972
  int *skip_motion_mode;
4973
  mode_skip_mask_t *mode_skip_mask;
4974
  InterModeSearchState *search_state;
4975
  int skip_ref_frame_mask;
4976
  int reach_first_comp_mode;
4977
  int mode_thresh_mul_fact;
4978
  int num_single_modes_processed;
4979
  int prune_cpd_using_sr_stats_ready;
4980
} InterModeSFArgs;
4981
/*!\endcond */
4982
4983
static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4984
                           int64_t *ref_frame_rd, int midx,
4985
0
                           InterModeSFArgs *args, int is_low_temp_var) {
4986
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4987
0
  MACROBLOCKD *const xd = &x->e_mbd;
4988
  // Get the actual prediction mode we are trying in this iteration
4989
0
  const THR_MODES mode_enum = av1_default_mode_order[midx];
4990
0
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4991
0
  const PREDICTION_MODE this_mode = mode_def->mode;
4992
0
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4993
0
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4994
0
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4995
0
  const int comp_pred = second_ref_frame > INTRA_FRAME;
4996
4997
0
  if (ref_frame == INTRA_FRAME) return 1;
4998
4999
0
  const FRAME_UPDATE_TYPE update_type =
5000
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5001
0
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5002
0
      comp_pred) {
5003
0
    return 1;
5004
0
  }
5005
5006
  // This is for real time encoding.
5007
0
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5008
0
      this_mode != NEARESTMV)
5009
0
    return 1;
5010
5011
  // Check if this mode should be skipped because it is incompatible with the
5012
  // current frame
5013
0
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5014
0
    return 1;
5015
0
  const int ret = inter_mode_search_order_independent_skip(
5016
0
      cpi, x, args->mode_skip_mask, args->search_state,
5017
0
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5018
0
  if (ret == 1) return 1;
5019
0
  *(args->skip_motion_mode) = (ret == 2);
5020
5021
  // We've reached the first compound prediction mode, get stats from the
5022
  // single reference predictors to help with pruning.
5023
  // Disable this pruning logic if interpolation filter search was skipped for
5024
  // single prediction modes as it can result in aggressive pruning of compound
5025
  // prediction modes due to the absence of modelled_rd populated by
5026
  // av1_interpolation_filter_search().
5027
  // TODO(Remya): Check the impact of the sf
5028
  // 'prune_comp_search_by_single_result' if compound prediction modes are
5029
  // enabled in future for REALTIME encode.
5030
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5031
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5032
0
      args->reach_first_comp_mode == 0) {
5033
0
    analyze_single_states(cpi, args->search_state);
5034
0
    args->reach_first_comp_mode = 1;
5035
0
  }
5036
5037
  // Prune aggressively when best mode is skippable.
5038
0
  int mul_fact = args->search_state->best_mode_skippable
5039
0
                     ? args->mode_thresh_mul_fact
5040
0
                     : (1 << MODE_THRESH_QBITS);
5041
0
  int64_t mode_threshold =
5042
0
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5043
0
      MODE_THRESH_QBITS;
5044
5045
0
  if (args->search_state->best_rd < mode_threshold) return 1;
5046
5047
  // Skip this compound mode based on the RD results from the single prediction
5048
  // modes
5049
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5050
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5051
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5052
0
                                       ref_frame, second_ref_frame, x))
5053
0
      return 1;
5054
0
  }
5055
5056
0
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5057
    // After we done with single reference modes, find the 2nd best RD
5058
    // for a reference frame. Only search compound modes that have a reference
5059
    // frame at least as good as the 2nd best.
5060
0
    if (!args->prune_cpd_using_sr_stats_ready &&
5061
0
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5062
0
      find_top_ref(ref_frame_rd);
5063
0
      args->prune_cpd_using_sr_stats_ready = 1;
5064
0
    }
5065
0
    if (args->prune_cpd_using_sr_stats_ready &&
5066
0
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5067
0
      return 1;
5068
0
  }
5069
5070
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5071
0
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5072
0
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5073
0
    return 1;
5074
0
  }
5075
5076
0
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5077
0
    if (compound_skip_using_neighbor_refs(
5078
0
            xd, this_mode, ref_frames,
5079
0
            sf->inter_sf.prune_ext_comp_using_neighbors))
5080
0
      return 1;
5081
0
  }
5082
5083
0
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5084
0
    if (skip_compound_using_best_single_mode_ref(
5085
0
            this_mode, ref_frames, args->search_state->best_single_mode,
5086
0
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5087
0
      return 1;
5088
0
  }
5089
5090
0
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5091
0
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5092
0
    if (skip_nearest_near_mv_using_refmv_weight(
5093
0
            x, this_mode, ref_frame_type,
5094
0
            args->search_state->best_mbmode.mode)) {
5095
      // Ensure the mode is pruned only when the current block has obtained a
5096
      // valid inter mode.
5097
0
      assert(is_inter_mode(args->search_state->best_mbmode.mode));
5098
0
      return 1;
5099
0
    }
5100
0
  }
5101
5102
0
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5103
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5104
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5105
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5106
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5107
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5108
0
        return 1;
5109
0
    }
5110
0
  }
5111
5112
0
  return 0;
5113
0
}
5114
5115
static void record_best_compound(REFERENCE_MODE reference_mode,
5116
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5117
                                 InterModeSearchState *search_state,
5118
0
                                 int compmode_cost) {
5119
0
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5120
5121
0
  if (reference_mode == REFERENCE_MODE_SELECT) {
5122
0
    single_rate = rd_stats->rate - compmode_cost;
5123
0
    hybrid_rate = rd_stats->rate;
5124
0
  } else {
5125
0
    single_rate = rd_stats->rate;
5126
0
    hybrid_rate = rd_stats->rate + compmode_cost;
5127
0
  }
5128
5129
0
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5130
0
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5131
5132
0
  if (!comp_pred) {
5133
0
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5134
0
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5135
0
  } else {
5136
0
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5137
0
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5138
0
  }
5139
0
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5140
0
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5141
0
}
5142
5143
// Does a transform search over a list of the best inter mode candidates.
5144
// This is called if the original mode search computed an RD estimate
5145
// for the transform search rather than doing a full search.
5146
static void tx_search_best_inter_candidates(
5147
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5148
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5149
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5150
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5151
0
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5152
0
  AV1_COMMON *const cm = &cpi->common;
5153
0
  MACROBLOCKD *const xd = &x->e_mbd;
5154
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5155
0
  const ModeCosts *mode_costs = &x->mode_costs;
5156
0
  const int num_planes = av1_num_planes(cm);
5157
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5158
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5159
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5160
0
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5161
0
  search_state->best_rd = best_rd_so_far;
5162
0
  search_state->best_mode_index = THR_INVALID;
5163
  // Initialize best mode stats for winner mode processing
5164
0
  x->winner_mode_count = 0;
5165
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5166
0
                          NULL, bsize, best_rd_so_far,
5167
0
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5168
0
  inter_modes_info->num =
5169
0
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5170
0
          ? inter_modes_info->num
5171
0
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5172
0
  const int64_t top_est_rd =
5173
0
      inter_modes_info->num > 0
5174
0
          ? inter_modes_info
5175
0
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5176
0
          : INT64_MAX;
5177
0
  *yrd = INT64_MAX;
5178
0
  int64_t best_rd_in_this_partition = INT64_MAX;
5179
0
  int num_inter_mode_cands = inter_modes_info->num;
5180
0
  int newmv_mode_evaled = 0;
5181
0
  int max_allowed_cands = INT_MAX;
5182
0
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5183
    // The bound on the no. of inter mode candidates, beyond which the
5184
    // candidates are limited if a newmv mode got evaluated, is set as
5185
    // max_allowed_cands + 1.
5186
0
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5187
0
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5188
0
    max_allowed_cands =
5189
0
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5190
0
  }
5191
5192
0
  int num_mode_thresh = INT_MAX;
5193
0
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5194
    // Bound the no. of transform searches per prediction mode beyond a
5195
    // threshold.
5196
0
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5197
0
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5198
0
    num_mode_thresh =
5199
0
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5200
0
  }
5201
5202
0
  int num_tx_cands = 0;
5203
0
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5204
  // Iterate over best inter mode candidates and perform tx search
5205
0
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5206
0
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5207
0
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5208
0
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5209
0
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5210
0
    if (curr_est_rd * 0.80 > top_est_rd) break;
5211
5212
0
    if (num_tx_cands > num_mode_thresh) {
5213
0
      if ((prediction_mode != NEARESTMV &&
5214
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5215
0
          (prediction_mode == NEARESTMV &&
5216
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5217
0
        continue;
5218
0
    }
5219
5220
0
    txfm_info->skip_txfm = 0;
5221
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5222
5223
    // Select prediction reference frames.
5224
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5225
0
    for (int i = 0; i < num_planes; i++) {
5226
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5227
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5228
0
    }
5229
5230
0
    bool is_predictor_built = false;
5231
5232
    // Initialize RD stats
5233
0
    RD_STATS rd_stats;
5234
0
    RD_STATS rd_stats_y;
5235
0
    RD_STATS rd_stats_uv;
5236
0
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5237
0
    int64_t skip_rd = INT64_MAX;
5238
0
    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5239
0
        cm->seq_params->enable_masked_compound,
5240
0
        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5241
0
        /*eval_motion_mode=*/0);
5242
0
    if (txfm_rd_gate_level) {
5243
      // Check if the mode is good enough based on skip RD
5244
0
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5245
0
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5246
0
      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5247
0
                                      skip_rd, txfm_rd_gate_level, 0);
5248
0
      if (!eval_txfm) continue;
5249
0
    }
5250
5251
    // Build the prediction for this mode
5252
0
    if (!is_predictor_built) {
5253
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5254
0
                                    av1_num_planes(cm) - 1);
5255
0
    }
5256
0
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5257
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5258
0
    }
5259
5260
0
    num_tx_cands++;
5261
0
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5262
0
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5263
0
    int64_t this_yrd = INT64_MAX;
5264
    // Do the transform search
5265
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5266
0
                         mode_rate, search_state->best_rd)) {
5267
0
      continue;
5268
0
    } else {
5269
0
      const int y_rate =
5270
0
          rd_stats.skip_txfm
5271
0
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5272
0
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5273
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5274
5275
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5276
0
        inter_mode_data_push(
5277
0
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5278
0
            rd_stats_y.rate + rd_stats_uv.rate +
5279
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5280
0
      }
5281
0
    }
5282
0
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5283
0
    if (rd_stats.rdcost < best_rd_in_this_partition) {
5284
0
      best_rd_in_this_partition = rd_stats.rdcost;
5285
0
      *yrd = this_yrd;
5286
0
    }
5287
5288
0
    const THR_MODES mode_enum = get_prediction_mode_idx(
5289
0
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5290
5291
    // Collect mode stats for multiwinner mode processing
5292
0
    const int txfm_search_done = 1;
5293
0
    store_winner_mode_stats(
5294
0
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5295
0
        NULL, bsize, rd_stats.rdcost,
5296
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5297
5298
0
    if (rd_stats.rdcost < search_state->best_rd) {
5299
0
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5300
0
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5301
0
      search_state->best_skip_rd[0] = skip_rd;
5302
      // Limit the total number of modes to be evaluated if the first is valid
5303
      // and transform skip or compound
5304
0
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5305
0
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5306
          // Evaluate more candidates at high quantizers where occurrence of
5307
          // transform skip is high.
5308
0
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5309
0
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5310
0
          num_inter_mode_cands =
5311
0
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5312
0
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5313
0
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5314
          // Evaluate more candidates at low quantizers where occurrence of
5315
          // single reference mode is high.
5316
0
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5317
0
                                                { 10, 7, 5, 3 } };
5318
0
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5319
0
          num_inter_mode_cands = AOMMIN(
5320
0
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5321
0
        }
5322
0
      }
5323
0
    }
5324
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5325
    // a newmv mode was evaluated already.
5326
0
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5327
0
  }
5328
0
}
5329
5330
// Indicates number of winner simple translation modes to be used
5331
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5332
5333
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5334
// speed feature. This list consists of modes that have only searched
5335
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5336
// modes after the initial RD search.
5337
static void handle_winner_cand(
5338
    MB_MODE_INFO *const mbmi,
5339
    motion_mode_best_st_candidate *best_motion_mode_cands,
5340
    int max_winner_motion_mode_cand, int64_t this_rd,
5341
0
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5342
  // Number of current motion mode candidates in list
5343
0
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5344
0
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5345
5346
  // find the best location to insert new motion mode candidate
5347
0
  for (int j = 0; j < num_motion_mode_cand; j++) {
5348
0
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5349
0
      valid_motion_mode_cand_loc = j;
5350
0
      break;
5351
0
    }
5352
0
  }
5353
5354
  // Insert motion mode if location is found
5355
0
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5356
0
    if (num_motion_mode_cand > 0 &&
5357
0
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5358
0
      memmove(
5359
0
          &best_motion_mode_cands
5360
0
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5361
0
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5362
0
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5363
0
           valid_motion_mode_cand_loc) *
5364
0
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5365
0
    motion_mode_cand->mbmi = *mbmi;
5366
0
    motion_mode_cand->rd_cost = this_rd;
5367
0
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5368
0
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5369
0
        *motion_mode_cand;
5370
0
    best_motion_mode_cands->num_motion_mode_cand =
5371
0
        AOMMIN(max_winner_motion_mode_cand,
5372
0
               best_motion_mode_cands->num_motion_mode_cand + 1);
5373
0
  }
5374
0
}
5375
5376
/*!\brief Search intra modes in interframes
5377
 *
5378
 * \ingroup intra_mode_search
5379
 *
5380
 * This function searches for the best intra mode when the current frame is an
5381
 * interframe. This function however does *not* handle luma palette mode.
5382
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5383
 *
5384
 * This function will first iterate through the luma mode candidates to find the
5385
 * best luma intra mode. Once the best luma mode it's found, it will then search
5386
 * for the best chroma mode. Because palette mode is currently not handled by
5387
 * here, a cache of uv mode is stored in
5388
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5389
 * av1_search_palette_mode.
5390
 *
5391
 * \param[in,out] search_state      Struct keep track of the prediction mode
5392
 *                                  search state in interframe.
5393
 *
5394
 * \param[in]     cpi               Top-level encoder structure.
5395
 * \param[in,out] x                 Pointer to struct holding all the data for
5396
 *                                  the current prediction block.
5397
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5398
 *                                  prediction modes searched.
5399
 * \param[in]     bsize             Current block size.
5400
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5401
 *                                  copy the tx_type and txfm_skip arrays.
5402
 *                                  for only the Y plane.
5403
 * \param[in]     sf_args           Stores the list of intra mode candidates
5404
 *                                  to be searched.
5405
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5406
 *                                      current ref frame is an intra frame.
5407
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5408
 *                                  terminate chroma intra mode search.
5409
 *
5410
 * \remark If a new best mode is found, search_state and rd_costs are updated
5411
 * correspondingly. While x is also modified, it is only used as a temporary
5412
 * buffer, and the final decisions are stored in search_state.
5413
 */
5414
static inline void search_intra_modes_in_interframe(
5415
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5416
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5417
    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5418
0
    int64_t yrd_threshold) {
5419
0
  const AV1_COMMON *const cm = &cpi->common;
5420
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5421
0
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5422
0
  MACROBLOCKD *const xd = &x->e_mbd;
5423
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5424
0
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5425
5426
0
  int is_best_y_mode_intra = 0;
5427
0
  RD_STATS best_intra_rd_stats_y;
5428
0
  int64_t best_rd_y = INT64_MAX;
5429
0
  int best_mode_cost_y = -1;
5430
0
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5431
0
  THR_MODES best_mode_enum = THR_INVALID;
5432
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5433
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5434
0
  const int num_4x4 = bsize_to_num_blk(bsize);
5435
5436
  // Performs luma search
5437
0
  int64_t best_model_rd = INT64_MAX;
5438
0
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5439
0
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5440
0
    top_intra_model_rd[i] = INT64_MAX;
5441
0
  }
5442
0
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5443
0
    if (sf->intra_sf.skip_intra_in_interframe &&
5444
0
        search_state->intra_search_state.skip_intra_modes)
5445
0
      break;
5446
0
    set_y_mode_and_delta_angle(
5447
0
        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5448
0
    assert(mbmi->mode < INTRA_MODE_END);
5449
5450
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5451
0
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5452
0
      continue;
5453
5454
0
    const THR_MODES mode_enum =
5455
0
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5456
0
    if ((!intra_mode_cfg->enable_smooth_intra ||
5457
0
         cpi->sf.intra_sf.disable_smooth_intra) &&
5458
0
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5459
0
         mbmi->mode == SMOOTH_V_PRED))
5460
0
      continue;
5461
0
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5462
0
      continue;
5463
0
    if (av1_is_directional_mode(mbmi->mode) &&
5464
0
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5465
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5466
0
      continue;
5467
0
    const PREDICTION_MODE this_mode = mbmi->mode;
5468
5469
0
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5470
0
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5471
0
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5472
0
    x->txfm_search_info.skip_txfm = 0;
5473
5474
0
    if (this_mode != DC_PRED) {
5475
      // Only search the oblique modes if the best so far is
5476
      // one of the neighboring directional modes
5477
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5478
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5479
0
        if (search_state->best_mode_index != THR_INVALID &&
5480
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5481
0
          continue;
5482
0
      }
5483
0
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5484
0
        if (conditional_skipintra(
5485
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5486
0
          continue;
5487
0
      }
5488
0
    }
5489
5490
0
    RD_STATS intra_rd_stats_y;
5491
0
    int mode_cost_y;
5492
0
    int64_t intra_rd_y = INT64_MAX;
5493
0
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5494
0
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5495
0
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5496
0
        &best_model_rd, top_intra_model_rd);
5497
0
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5498
0
      is_best_y_mode_intra = 1;
5499
0
      if (intra_rd_y < best_rd_y) {
5500
0
        best_intra_rd_stats_y = intra_rd_stats_y;
5501
0
        best_mode_cost_y = mode_cost_y;
5502
0
        best_rd_y = intra_rd_y;
5503
0
        best_mbmi = *mbmi;
5504
0
        best_mode_enum = mode_enum;
5505
0
        memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5506
0
               sizeof(best_blk_skip[0]) * num_4x4);
5507
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5508
0
      }
5509
0
    }
5510
0
  }
5511
5512
0
  if (!is_best_y_mode_intra) {
5513
0
    return;
5514
0
  }
5515
5516
0
  assert(best_rd_y < INT64_MAX);
5517
5518
  // Restores the best luma mode
5519
0
  *mbmi = best_mbmi;
5520
0
  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5521
0
         sizeof(best_blk_skip[0]) * num_4x4);
5522
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5523
5524
  // Performs chroma search
5525
0
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5526
0
  av1_init_rd_stats(&intra_rd_stats);
5527
0
  av1_init_rd_stats(&intra_rd_stats_uv);
5528
0
  const int num_planes = av1_num_planes(cm);
5529
0
  if (num_planes > 1) {
5530
0
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5531
0
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5532
0
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5533
5534
0
    if (!intra_uv_mode_valid) {
5535
0
      return;
5536
0
    }
5537
0
  }
5538
5539
  // Merge the luma and chroma rd stats
5540
0
  assert(best_mode_cost_y >= 0);
5541
0
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5542
0
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5543
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5544
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5545
    // (prediction granularity), so we account for it in the full rate,
5546
    // not the tokenonly rate.
5547
0
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5548
0
  }
5549
5550
0
  const ModeCosts *mode_costs = &x->mode_costs;
5551
0
  const PREDICTION_MODE mode = mbmi->mode;
5552
0
  if (num_planes > 1 && xd->is_chroma_ref) {
5553
0
    const int uv_mode_cost =
5554
0
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5555
0
    intra_rd_stats.rate +=
5556
0
        intra_rd_stats_uv.rate +
5557
0
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5558
0
  }
5559
5560
  // Intra block is always coded as non-skip
5561
0
  intra_rd_stats.skip_txfm = 0;
5562
0
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5563
  // Add in the cost of the no skip flag.
5564
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5565
0
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5566
  // Calculate the final RD estimate for this mode.
5567
0
  const int64_t this_rd =
5568
0
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5569
  // Keep record of best intra rd
5570
0
  if (this_rd < search_state->best_intra_rd) {
5571
0
    search_state->best_intra_rd = this_rd;
5572
0
    intra_search_state->best_intra_mode = mode;
5573
0
  }
5574
5575
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5576
0
    search_state->best_pred_rd[i] =
5577
0
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5578
0
  }
5579
5580
0
  intra_rd_stats.rdcost = this_rd;
5581
5582
  // Collect mode stats for multiwinner mode processing
5583
0
  const int txfm_search_done = 1;
5584
0
  store_winner_mode_stats(
5585
0
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5586
0
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5587
0
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5588
0
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5589
0
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5590
0
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5591
0
                        best_mode_enum, x, txfm_search_done);
5592
0
  }
5593
0
}
5594
5595
#if !CONFIG_REALTIME_ONLY
5596
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5597
// features in intra mode pruning.
5598
static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5599
                                                MACROBLOCK *x, BLOCK_SIZE bsize,
5600
                                                int mi_row, int mi_col,
5601
                                                int64_t *inter_cost,
5602
0
                                                int64_t *intra_cost) {
5603
0
  const AV1_COMMON *const cm = &cpi->common;
5604
  // Only consider full SB.
5605
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5606
0
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5607
0
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5608
0
                  (block_size_high[sb_size] / tpl_bsize_1d);
5609
0
  SuperBlockEnc *sb_enc = &x->sb_enc;
5610
0
  if (sb_enc->tpl_data_count == len) {
5611
0
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5612
0
    const int tpl_stride = sb_enc->tpl_stride;
5613
0
    const int tplw = mi_size_wide[tpl_bsize];
5614
0
    const int tplh = mi_size_high[tpl_bsize];
5615
0
    const int nw = mi_size_wide[bsize] / tplw;
5616
0
    const int nh = mi_size_high[bsize] / tplh;
5617
0
    if (nw >= 1 && nh >= 1) {
5618
0
      const int of_h = mi_row % mi_size_high[sb_size];
5619
0
      const int of_w = mi_col % mi_size_wide[sb_size];
5620
0
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5621
5622
0
      for (int k = 0; k < nh; k++) {
5623
0
        for (int l = 0; l < nw; l++) {
5624
0
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5625
0
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5626
0
        }
5627
0
      }
5628
0
      *inter_cost /= nw * nh;
5629
0
      *intra_cost /= nw * nh;
5630
0
    }
5631
0
  }
5632
0
}
5633
#endif  // !CONFIG_REALTIME_ONLY
5634
5635
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5636
// intra mode search.
5637
static inline void skip_intra_modes_in_interframe(
5638
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5639
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5640
0
    int64_t inter_cost, int64_t intra_cost) {
5641
0
  MACROBLOCKD *const xd = &x->e_mbd;
5642
0
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5643
0
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5644
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5645
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5646
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5647
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5648
0
        x->source_variance > 128) {
5649
0
      search_state->intra_search_state.skip_intra_modes = 1;
5650
0
      return;
5651
0
    }
5652
0
  }
5653
5654
0
  const unsigned int src_var_thresh_intra_skip = 1;
5655
0
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5656
0
  if (!(skip_intra_in_interframe &&
5657
0
        (x->source_variance > src_var_thresh_intra_skip)))
5658
0
    return;
5659
5660
  // Prune intra search based on best inter mode being transfrom skip.
5661
0
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5662
0
    const int qindex_thresh[2] = { 200, MAXQ };
5663
0
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5664
0
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5665
0
        (x->qindex <= qindex_thresh[ind])) {
5666
0
      search_state->intra_search_state.skip_intra_modes = 1;
5667
0
      return;
5668
0
    } else if ((skip_intra_in_interframe >= 4) &&
5669
0
               (inter_cost < 0 || intra_cost < 0)) {
5670
0
      search_state->intra_search_state.skip_intra_modes = 1;
5671
0
      return;
5672
0
    }
5673
0
  }
5674
  // Use ML model to prune intra search.
5675
0
  if (inter_cost >= 0 && intra_cost >= 0) {
5676
0
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5677
0
                                     ? &av1_intrap_nn_config
5678
0
                                     : &av1_intrap_hd_nn_config;
5679
0
    float nn_features[6];
5680
0
    float scores[2] = { 0.0f };
5681
5682
0
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5683
0
    nn_features[1] = (float)mi_size_wide_log2[bsize];
5684
0
    nn_features[2] = (float)mi_size_high_log2[bsize];
5685
0
    nn_features[3] = (float)intra_cost;
5686
0
    nn_features[4] = (float)inter_cost;
5687
0
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5688
0
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5689
0
    nn_features[5] = (float)(ac_q_max / ac_q);
5690
5691
0
    av1_nn_predict(nn_features, nn_config, 1, scores);
5692
5693
    // For two parameters, the max prob returned from av1_nn_softmax equals
5694
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5695
    // calling of av1_nn_softmax.
5696
0
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5697
0
    assert(skip_intra_in_interframe <= 5);
5698
0
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5699
0
      search_state->intra_search_state.skip_intra_modes = 1;
5700
0
    }
5701
0
  }
5702
0
}
5703
5704
static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
5705
0
                                             int is_single_pred) {
5706
0
  const MODE encoding_mode = cpi->oxcf.mode;
5707
0
  if (encoding_mode == REALTIME) {
5708
0
    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5709
0
            (cpi->sf.interp_sf.skip_interp_filter_search ||
5710
0
             cpi->sf.winner_mode_sf.winner_mode_ifs));
5711
0
  } else if (encoding_mode == GOOD) {
5712
    // Skip interpolation filter search for single prediction modes.
5713
0
    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5714
0
  }
5715
0
  return false;
5716
0
}
5717
5718
static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
5719
0
                                     BLOCK_SIZE bsize) {
5720
0
  const AV1_COMMON *const cm = &cpi->common;
5721
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5722
5723
0
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5724
0
      !sf->rt_sf.short_circuit_low_temp_var ||
5725
0
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
5726
0
    return 0;
5727
0
  }
5728
5729
0
  const int mi_row = x->e_mbd.mi_row;
5730
0
  const int mi_col = x->e_mbd.mi_col;
5731
0
  int is_low_temp_var = 0;
5732
5733
0
  if (cm->seq_params->sb_size == BLOCK_64X64)
5734
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5735
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5736
0
  else
5737
0
    is_low_temp_var = av1_get_force_skip_low_temp_var(
5738
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5739
5740
0
  return is_low_temp_var;
5741
0
}
5742
5743
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
5744
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5745
                            struct macroblock *x, struct RD_STATS *rd_cost,
5746
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5747
0
                            int64_t best_rd_so_far) {
5748
0
  AV1_COMMON *const cm = &cpi->common;
5749
0
  const FeatureFlags *const features = &cm->features;
5750
0
  const int num_planes = av1_num_planes(cm);
5751
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5752
0
  MACROBLOCKD *const xd = &x->e_mbd;
5753
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5754
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5755
0
  int i;
5756
0
  const ModeCosts *mode_costs = &x->mode_costs;
5757
0
  const int *comp_inter_cost =
5758
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5759
5760
0
  InterModeSearchState search_state;
5761
0
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5762
0
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5763
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5764
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5765
0
  };
5766
0
  HandleInterModeArgs args = { { NULL },
5767
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5768
0
                               { NULL },
5769
0
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5770
0
                                 MAX_SB_SIZE >> 1 },
5771
0
                               NULL,
5772
0
                               NULL,
5773
0
                               NULL,
5774
0
                               search_state.modelled_rd,
5775
0
                               INT_MAX,
5776
0
                               INT_MAX,
5777
0
                               search_state.simple_rd,
5778
0
                               0,
5779
0
                               false,
5780
0
                               interintra_modes,
5781
0
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5782
0
                               { { 0, 0 } },
5783
0
                               { 0 },
5784
0
                               0,
5785
0
                               0,
5786
0
                               -1,
5787
0
                               -1,
5788
0
                               -1,
5789
0
                               { 0 },
5790
0
                               { 0 },
5791
0
                               UINT_MAX };
5792
  // Currently, is_low_temp_var is used in real time encoding.
5793
0
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5794
5795
0
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5796
  // Indicates the appropriate number of simple translation winner modes for
5797
  // exhaustive motion mode evaluation
5798
0
  const int max_winner_motion_mode_cand =
5799
0
      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5800
0
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5801
0
  motion_mode_candidate motion_mode_cand;
5802
0
  motion_mode_best_st_candidate best_motion_mode_cands;
5803
  // Initializing the number of motion mode candidates to zero.
5804
0
  best_motion_mode_cands.num_motion_mode_cand = 0;
5805
0
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5806
0
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5807
5808
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5809
5810
0
  av1_invalid_rd_stats(rd_cost);
5811
5812
0
  for (i = 0; i < REF_FRAMES; ++i) {
5813
0
    x->warp_sample_info[i].num = -1;
5814
0
  }
5815
5816
  // Ref frames that are selected by square partition blocks.
5817
0
  int picked_ref_frames_mask = 0;
5818
0
  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5819
0
      mbmi->partition != PARTITION_NONE) {
5820
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5821
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
5822
    // implies prune for vert, horiz and extended partition blocks.
5823
0
    if ((mbmi->partition != PARTITION_VERT &&
5824
0
         mbmi->partition != PARTITION_HORZ) ||
5825
0
        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5826
0
      picked_ref_frames_mask =
5827
0
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5828
0
    }
5829
0
  }
5830
5831
#if CONFIG_COLLECT_COMPONENT_TIMING
5832
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
5833
#endif
5834
  // Skip ref frames that never selected by square blocks.
5835
0
  const int skip_ref_frame_mask =
5836
0
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5837
0
  mode_skip_mask_t mode_skip_mask;
5838
0
  unsigned int ref_costs_single[REF_FRAMES];
5839
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5840
0
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5841
  // init params, set frame modes, speed features
5842
0
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5843
0
                                skip_ref_frame_mask, ref_costs_single,
5844
0
                                ref_costs_comp, yv12_mb);
5845
#if CONFIG_COLLECT_COMPONENT_TIMING
5846
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
5847
#endif
5848
5849
0
  int64_t best_est_rd = INT64_MAX;
5850
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5851
  // If do_tx_search is 0, only estimated RD should be computed.
5852
  // If do_tx_search is 1, all modes have TX search performed.
5853
0
  const int do_tx_search =
5854
0
      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5855
0
        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5856
0
         num_pels_log2_lookup[bsize] > 8));
5857
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5858
0
  inter_modes_info->num = 0;
5859
5860
  // Temporary buffers used by handle_inter_mode().
5861
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5862
5863
  // The best RD found for the reference frame, among single reference modes.
5864
  // Note that the 0-th element will contain a cut-off that is later used
5865
  // to determine if we should skip a compound mode.
5866
0
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5867
0
                                       INT64_MAX, INT64_MAX, INT64_MAX,
5868
0
                                       INT64_MAX, INT64_MAX };
5869
5870
  // Prepared stats used later to check if we could skip intra mode eval.
5871
0
  int64_t inter_cost = -1;
5872
0
  int64_t intra_cost = -1;
5873
  // Need to tweak the threshold for hdres speed 0 & 1.
5874
0
  const int mi_row = xd->mi_row;
5875
0
  const int mi_col = xd->mi_col;
5876
5877
  // Obtain the relevant tpl stats for pruning inter modes
5878
0
  PruneInfoFromTpl inter_cost_info_from_tpl;
5879
0
#if !CONFIG_REALTIME_ONLY
5880
0
  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5881
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5882
    // prune_ref_by_selective_ref_frame()
5883
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5884
    // prune_ref_by_selective_ref_frame()
5885
    // Populating valid_refs[idx] = 1 ensures that
5886
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5887
    // pruned ref frame.
5888
0
    int valid_refs[INTER_REFS_PER_FRAME];
5889
0
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5890
0
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5891
0
      valid_refs[frame - 1] =
5892
0
          x->tpl_keep_ref_frame[frame] ||
5893
0
          !prune_ref_by_selective_ref_frame(
5894
0
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5895
0
    }
5896
0
    av1_zero(inter_cost_info_from_tpl);
5897
0
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5898
0
                              &inter_cost_info_from_tpl);
5899
0
  }
5900
5901
0
  const int do_pruning =
5902
0
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5903
0
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5904
0
      cpi->oxcf.algo_cfg.enable_tpl_model)
5905
0
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5906
0
                                 &intra_cost);
5907
0
#endif  // !CONFIG_REALTIME_ONLY
5908
5909
  // Initialize best mode stats for winner mode processing.
5910
0
  const int max_winner_mode_count =
5911
0
      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5912
0
  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5913
0
  x->winner_mode_count = 0;
5914
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5915
0
                          NULL, bsize, best_rd_so_far,
5916
0
                          sf->winner_mode_sf.multi_winner_mode_type, 0);
5917
5918
0
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5919
0
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
5920
    // Higher multiplication factor values for lower quantizers.
5921
0
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5922
0
  }
5923
5924
  // Initialize arguments for mode loop speed features
5925
0
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
5926
0
                              &mode_skip_mask,
5927
0
                              &search_state,
5928
0
                              skip_ref_frame_mask,
5929
0
                              0,
5930
0
                              mode_thresh_mul_fact,
5931
0
                              0,
5932
0
                              0 };
5933
0
  int64_t best_inter_yrd = INT64_MAX;
5934
5935
  // This is the main loop of this function. It loops over all possible inter
5936
  // modes and calls handle_inter_mode() to compute the RD for each.
5937
  // Here midx is just an iterator index that should not be used by itself
5938
  // except to keep track of the number of modes searched. It should be used
5939
  // with av1_default_mode_order to get the enum that defines the mode, which
5940
  // can be used with av1_mode_defs to get the prediction mode and the ref
5941
  // frames.
5942
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5943
  // good speedup for real time case. If we decide to use compound mode in real
5944
  // time, maybe we can modify av1_default_mode_order table.
5945
0
  THR_MODES mode_start = THR_INTER_MODE_START;
5946
0
  THR_MODES mode_end = THR_INTER_MODE_END;
5947
0
  const CurrentFrame *const current_frame = &cm->current_frame;
5948
0
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
5949
0
    mode_start = SINGLE_REF_MODE_START;
5950
0
    mode_end = SINGLE_REF_MODE_END;
5951
0
  }
5952
5953
0
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5954
    // Get the actual prediction mode we are trying in this iteration
5955
0
    const THR_MODES mode_enum = av1_default_mode_order[midx];
5956
0
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5957
0
    const PREDICTION_MODE this_mode = mode_def->mode;
5958
0
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5959
5960
0
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5961
0
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5962
0
    const int is_single_pred =
5963
0
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5964
0
    const int comp_pred = second_ref_frame > INTRA_FRAME;
5965
5966
0
    init_mbmi(mbmi, this_mode, ref_frames, cm);
5967
5968
0
    txfm_info->skip_txfm = 0;
5969
0
    sf_args.num_single_modes_processed += is_single_pred;
5970
0
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5971
#if CONFIG_COLLECT_COMPONENT_TIMING
5972
    start_timing(cpi, skip_inter_mode_time);
5973
#endif
5974
    // Apply speed features to decide if this inter mode can be skipped
5975
0
    const int is_skip_inter_mode = skip_inter_mode(
5976
0
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5977
#if CONFIG_COLLECT_COMPONENT_TIMING
5978
    end_timing(cpi, skip_inter_mode_time);
5979
#endif
5980
0
    if (is_skip_inter_mode) continue;
5981
5982
    // Select prediction reference frames.
5983
0
    for (i = 0; i < num_planes; i++) {
5984
0
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5985
0
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5986
0
    }
5987
5988
0
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5989
0
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5990
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
5991
0
    mbmi->ref_mv_idx = 0;
5992
5993
0
    const int64_t ref_best_rd = search_state.best_rd;
5994
0
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5995
0
    av1_init_rd_stats(&rd_stats);
5996
5997
0
    const int ref_frame_cost = comp_pred
5998
0
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
5999
0
                                   : ref_costs_single[ref_frame];
6000
0
    const int compmode_cost =
6001
0
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6002
0
    const int real_compmode_cost =
6003
0
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6004
0
            ? compmode_cost
6005
0
            : 0;
6006
    // Point to variables that are maintained between loop iterations
6007
0
    args.single_newmv = search_state.single_newmv;
6008
0
    args.single_newmv_rate = search_state.single_newmv_rate;
6009
0
    args.single_newmv_valid = search_state.single_newmv_valid;
6010
0
    args.single_comp_cost = real_compmode_cost;
6011
0
    args.ref_frame_cost = ref_frame_cost;
6012
0
    args.best_pred_sse = search_state.best_pred_sse;
6013
0
    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6014
6015
0
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6016
0
                           search_state.best_skip_rd[1] };
6017
0
    int64_t this_yrd = INT64_MAX;
6018
#if CONFIG_COLLECT_COMPONENT_TIMING
6019
    start_timing(cpi, handle_inter_mode_time);
6020
#endif
6021
0
    int64_t this_rd = handle_inter_mode(
6022
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6023
0
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6024
0
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6025
0
        &this_yrd);
6026
#if CONFIG_COLLECT_COMPONENT_TIMING
6027
    end_timing(cpi, handle_inter_mode_time);
6028
#endif
6029
0
    if (current_frame->reference_mode != SINGLE_REFERENCE) {
6030
0
      if (!args.skip_ifs &&
6031
0
          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6032
0
          is_inter_singleref_mode(this_mode)) {
6033
0
        collect_single_states(x, &search_state, mbmi);
6034
0
      }
6035
6036
0
      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6037
0
          is_inter_singleref_mode(this_mode))
6038
0
        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6039
0
    }
6040
6041
0
    if (this_rd == INT64_MAX) continue;
6042
6043
0
    if (mbmi->skip_txfm) {
6044
0
      rd_stats_y.rate = 0;
6045
0
      rd_stats_uv.rate = 0;
6046
0
    }
6047
6048
0
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6049
0
        this_rd < ref_frame_rd[ref_frame]) {
6050
0
      ref_frame_rd[ref_frame] = this_rd;
6051
0
    }
6052
6053
    // Did this mode help, i.e., is it the new best mode
6054
0
    if (this_rd < search_state.best_rd) {
6055
0
      assert(IMPLIES(comp_pred,
6056
0
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
6057
0
      search_state.best_pred_sse = x->pred_sse[ref_frame];
6058
0
      best_inter_yrd = this_yrd;
6059
0
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6060
0
                          &rd_stats_uv, mode_enum, x, do_tx_search);
6061
0
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6062
      // skip_rd[0] is the best total rd for a skip mode so far.
6063
      // skip_rd[1] is the best total rd for a skip mode so far in luma.
6064
      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6065
      // When do_tx_search = 0, skip_rd[1] is updated.
6066
0
      search_state.best_skip_rd[1] = skip_rd[1];
6067
0
    }
6068
0
    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6069
      // Add this mode to motion mode candidate list for motion mode search
6070
      // if using motion_mode_for_winner_cand speed feature
6071
0
      handle_winner_cand(mbmi, &best_motion_mode_cands,
6072
0
                         max_winner_motion_mode_cand, this_rd,
6073
0
                         &motion_mode_cand, args.skip_motion_mode);
6074
0
    }
6075
6076
    /* keep record of best compound/single-only prediction */
6077
0
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6078
0
                         x->rdmult, &search_state, compmode_cost);
6079
0
  }
6080
6081
#if CONFIG_COLLECT_COMPONENT_TIMING
6082
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6083
#endif
6084
0
  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6085
    // For the single ref winner candidates, evaluate other motion modes (non
6086
    // simple translation).
6087
0
    evaluate_motion_mode_for_winner_candidates(
6088
0
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6089
0
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6090
0
        &search_state, &best_inter_yrd);
6091
0
  }
6092
#if CONFIG_COLLECT_COMPONENT_TIMING
6093
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6094
#endif
6095
6096
#if CONFIG_COLLECT_COMPONENT_TIMING
6097
  start_timing(cpi, do_tx_search_time);
6098
#endif
6099
0
  if (do_tx_search != 1) {
6100
    // A full tx search has not yet been done, do tx search for
6101
    // top mode candidates
6102
0
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6103
0
                                    yv12_mb, mi_row, mi_col, &search_state,
6104
0
                                    rd_cost, ctx, &best_inter_yrd);
6105
0
  }
6106
#if CONFIG_COLLECT_COMPONENT_TIMING
6107
  end_timing(cpi, do_tx_search_time);
6108
#endif
6109
6110
#if CONFIG_COLLECT_COMPONENT_TIMING
6111
  start_timing(cpi, handle_intra_mode_time);
6112
#endif
6113
  // Gate intra mode evaluation if best of inter is skip except when source
6114
  // variance is extremely low and also based on max intra bsize.
6115
0
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6116
0
                                 intra_cost);
6117
6118
0
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6119
0
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6120
0
                                   &sf_args, intra_ref_frame_cost,
6121
0
                                   best_inter_yrd);
6122
#if CONFIG_COLLECT_COMPONENT_TIMING
6123
  end_timing(cpi, handle_intra_mode_time);
6124
#endif
6125
6126
#if CONFIG_COLLECT_COMPONENT_TIMING
6127
  start_timing(cpi, refine_winner_mode_tx_time);
6128
#endif
6129
0
  int winner_mode_count =
6130
0
      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6131
  // In effect only when fast tx search speed features are enabled.
6132
0
  refine_winner_mode_tx(
6133
0
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6134
0
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6135
0
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6136
#if CONFIG_COLLECT_COMPONENT_TIMING
6137
  end_timing(cpi, refine_winner_mode_tx_time);
6138
#endif
6139
6140
  // Initialize default mode evaluation params
6141
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6142
6143
  // Only try palette mode when the best mode so far is an intra mode.
6144
0
  const int try_palette =
6145
0
      cpi->oxcf.tool_cfg.enable_palette &&
6146
0
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6147
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6148
0
  RD_STATS this_rd_cost;
6149
0
  int this_skippable = 0;
6150
0
  if (try_palette) {
6151
#if CONFIG_COLLECT_COMPONENT_TIMING
6152
    start_timing(cpi, av1_search_palette_mode_time);
6153
#endif
6154
0
    this_skippable = av1_search_palette_mode(
6155
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6156
0
        ctx, &this_rd_cost, search_state.best_rd);
6157
#if CONFIG_COLLECT_COMPONENT_TIMING
6158
    end_timing(cpi, av1_search_palette_mode_time);
6159
#endif
6160
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6161
0
      search_state.best_mode_index = THR_DC;
6162
0
      mbmi->mv[0].as_int = 0;
6163
0
      rd_cost->rate = this_rd_cost.rate;
6164
0
      rd_cost->dist = this_rd_cost.dist;
6165
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6166
0
      search_state.best_rd = rd_cost->rdcost;
6167
0
      search_state.best_mbmode = *mbmi;
6168
0
      search_state.best_skip2 = 0;
6169
0
      search_state.best_mode_skippable = this_skippable;
6170
0
      memcpy(ctx->blk_skip, txfm_info->blk_skip,
6171
0
             sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6172
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6173
0
    }
6174
0
  }
6175
6176
0
  search_state.best_mbmode.skip_mode = 0;
6177
0
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6178
0
      is_comp_ref_allowed(bsize)) {
6179
0
    const struct segmentation *const seg = &cm->seg;
6180
0
    unsigned char segment_id = mbmi->segment_id;
6181
0
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6182
0
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6183
0
    }
6184
0
  }
6185
6186
  // Make sure that the ref_mv_idx is only nonzero when we're
6187
  // using a mode which can support ref_mv_idx
6188
0
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6189
0
      !(search_state.best_mbmode.mode == NEWMV ||
6190
0
        search_state.best_mbmode.mode == NEW_NEWMV ||
6191
0
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6192
0
    search_state.best_mbmode.ref_mv_idx = 0;
6193
0
  }
6194
6195
0
  if (search_state.best_mode_index == THR_INVALID ||
6196
0
      search_state.best_rd >= best_rd_so_far) {
6197
0
    rd_cost->rate = INT_MAX;
6198
0
    rd_cost->rdcost = INT64_MAX;
6199
0
    return;
6200
0
  }
6201
6202
0
  const InterpFilter interp_filter = features->interp_filter;
6203
0
  assert((interp_filter == SWITCHABLE) ||
6204
0
         (interp_filter ==
6205
0
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6206
0
         !is_inter_block(&search_state.best_mbmode));
6207
0
  assert((interp_filter == SWITCHABLE) ||
6208
0
         (interp_filter ==
6209
0
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6210
0
         !is_inter_block(&search_state.best_mbmode));
6211
6212
0
  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6213
0
    av1_update_rd_thresh_fact(
6214
0
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6215
0
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6216
0
  }
6217
6218
  // macroblock modes
6219
0
  *mbmi = search_state.best_mbmode;
6220
0
  txfm_info->skip_txfm |= search_state.best_skip2;
6221
6222
  // Note: this section is needed since the mode may have been forced to
6223
  // GLOBALMV by the all-zero mode handling of ref-mv.
6224
0
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6225
    // Correct the interp filters for GLOBALMV
6226
0
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6227
0
      int_interpfilters filters =
6228
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6229
0
      assert(mbmi->interp_filters.as_int == filters.as_int);
6230
0
      (void)filters;
6231
0
    }
6232
0
  }
6233
6234
0
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6235
6236
0
  assert(search_state.best_mode_index != THR_INVALID);
6237
6238
#if CONFIG_INTERNAL_STATS
6239
  store_coding_context(x, ctx, search_state.best_mode_index,
6240
                       search_state.best_mode_skippable);
6241
#else
6242
0
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6243
0
#endif  // CONFIG_INTERNAL_STATS
6244
6245
0
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6246
0
    assert(try_palette);
6247
0
    av1_restore_uv_color_map(cpi, x);
6248
0
  }
6249
0
}
6250
6251
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6252
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6253
                                        int mi_row, int mi_col,
6254
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6255
                                        PICK_MODE_CONTEXT *ctx,
6256
0
                                        int64_t best_rd_so_far) {
6257
0
  const AV1_COMMON *const cm = &cpi->common;
6258
0
  const FeatureFlags *const features = &cm->features;
6259
0
  MACROBLOCKD *const xd = &x->e_mbd;
6260
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6261
0
  unsigned char segment_id = mbmi->segment_id;
6262
0
  const int comp_pred = 0;
6263
0
  int i;
6264
0
  unsigned int ref_costs_single[REF_FRAMES];
6265
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6266
0
  const ModeCosts *mode_costs = &x->mode_costs;
6267
0
  const int *comp_inter_cost =
6268
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6269
0
  InterpFilter best_filter = SWITCHABLE;
6270
0
  int64_t this_rd = INT64_MAX;
6271
0
  int rate2 = 0;
6272
0
  const int64_t distortion2 = 0;
6273
0
  (void)mi_row;
6274
0
  (void)mi_col;
6275
0
  (void)tile_data;
6276
6277
0
  av1_collect_neighbors_ref_counts(xd);
6278
6279
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6280
0
                           ref_costs_comp);
6281
6282
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6283
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6284
6285
0
  rd_cost->rate = INT_MAX;
6286
6287
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6288
6289
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6290
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6291
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6292
0
  mbmi->mode = GLOBALMV;
6293
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6294
0
  mbmi->uv_mode = UV_DC_PRED;
6295
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6296
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6297
0
  else
6298
0
    mbmi->ref_frame[0] = LAST_FRAME;
6299
0
  mbmi->ref_frame[1] = NONE_FRAME;
6300
0
  mbmi->mv[0].as_int =
6301
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6302
0
                           features->allow_high_precision_mv, bsize, mi_col,
6303
0
                           mi_row, features->cur_frame_force_integer_mv)
6304
0
          .as_int;
6305
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6306
0
  x->txfm_search_info.skip_txfm = 1;
6307
6308
0
  mbmi->ref_mv_idx = 0;
6309
6310
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6311
0
  av1_count_overlappable_neighbors(cm, xd);
6312
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6313
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6314
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6315
    // Select the samples according to motion vector difference
6316
0
    if (mbmi->num_proj_ref > 1) {
6317
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6318
0
                                             mbmi->num_proj_ref, bsize);
6319
0
    }
6320
0
  }
6321
6322
0
  const InterpFilter interp_filter = features->interp_filter;
6323
0
  set_default_interp_filters(mbmi, interp_filter);
6324
6325
0
  if (interp_filter != SWITCHABLE) {
6326
0
    best_filter = interp_filter;
6327
0
  } else {
6328
0
    best_filter = EIGHTTAP_REGULAR;
6329
0
    if (av1_is_interp_needed(xd)) {
6330
0
      int rs;
6331
0
      int best_rs = INT_MAX;
6332
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6333
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6334
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6335
0
                                     cm->seq_params->enable_dual_filter);
6336
0
        if (rs < best_rs) {
6337
0
          best_rs = rs;
6338
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6339
0
        }
6340
0
      }
6341
0
    }
6342
0
  }
6343
  // Set the appropriate filter
6344
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6345
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6346
0
                                   cm->seq_params->enable_dual_filter);
6347
6348
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6349
0
    rate2 += comp_inter_cost[comp_pred];
6350
6351
  // Estimate the reference frame signaling cost and add it
6352
  // to the rolling cost variable.
6353
0
  rate2 += ref_costs_single[LAST_FRAME];
6354
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6355
6356
0
  rd_cost->rate = rate2;
6357
0
  rd_cost->dist = distortion2;
6358
0
  rd_cost->rdcost = this_rd;
6359
6360
0
  if (this_rd >= best_rd_so_far) {
6361
0
    rd_cost->rate = INT_MAX;
6362
0
    rd_cost->rdcost = INT64_MAX;
6363
0
    return;
6364
0
  }
6365
6366
0
  assert((interp_filter == SWITCHABLE) ||
6367
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6368
6369
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6370
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6371
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6372
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6373
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6374
0
  }
6375
6376
#if CONFIG_INTERNAL_STATS
6377
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6378
#else
6379
0
  store_coding_context(x, ctx, 0);
6380
0
#endif  // CONFIG_INTERNAL_STATS
6381
0
}
6382
6383
/*!\cond */
6384
struct calc_target_weighted_pred_ctxt {
6385
  const OBMCBuffer *obmc_buffer;
6386
  const uint8_t *tmp;
6387
  int tmp_stride;
6388
  int overlap;
6389
};
6390
/*!\endcond */
6391
6392
static inline void calc_target_weighted_pred_above(
6393
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6394
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6395
0
  (void)nb_mi;
6396
0
  (void)num_planes;
6397
0
  (void)rel_mi_row;
6398
0
  (void)dir;
6399
6400
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6401
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6402
6403
0
  const int bw = xd->width << MI_SIZE_LOG2;
6404
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6405
6406
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6407
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6408
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6409
0
  const int is_hbd = is_cur_buf_hbd(xd);
6410
6411
0
  if (!is_hbd) {
6412
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6413
0
      const uint8_t m0 = mask1d[row];
6414
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6415
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6416
0
        wsrc[col] = m1 * tmp[col];
6417
0
        mask[col] = m0;
6418
0
      }
6419
0
      wsrc += bw;
6420
0
      mask += bw;
6421
0
      tmp += ctxt->tmp_stride;
6422
0
    }
6423
0
  } else {
6424
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6425
6426
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6427
0
      const uint8_t m0 = mask1d[row];
6428
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6429
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6430
0
        wsrc[col] = m1 * tmp16[col];
6431
0
        mask[col] = m0;
6432
0
      }
6433
0
      wsrc += bw;
6434
0
      mask += bw;
6435
0
      tmp16 += ctxt->tmp_stride;
6436
0
    }
6437
0
  }
6438
0
}
6439
6440
static inline void calc_target_weighted_pred_left(
6441
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6442
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6443
0
  (void)nb_mi;
6444
0
  (void)num_planes;
6445
0
  (void)rel_mi_col;
6446
0
  (void)dir;
6447
6448
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6449
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6450
6451
0
  const int bw = xd->width << MI_SIZE_LOG2;
6452
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6453
6454
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6455
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6456
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6457
0
  const int is_hbd = is_cur_buf_hbd(xd);
6458
6459
0
  if (!is_hbd) {
6460
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6461
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6462
0
        const uint8_t m0 = mask1d[col];
6463
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6464
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6465
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6466
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6467
0
      }
6468
0
      wsrc += bw;
6469
0
      mask += bw;
6470
0
      tmp += ctxt->tmp_stride;
6471
0
    }
6472
0
  } else {
6473
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6474
6475
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6476
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6477
0
        const uint8_t m0 = mask1d[col];
6478
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6479
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6480
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6481
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6482
0
      }
6483
0
      wsrc += bw;
6484
0
      mask += bw;
6485
0
      tmp16 += ctxt->tmp_stride;
6486
0
    }
6487
0
  }
6488
0
}
6489
6490
// This function has a structure similar to av1_build_obmc_inter_prediction
6491
//
6492
// The OBMC predictor is computed as:
6493
//
6494
//  PObmc(x,y) =
6495
//    AOM_BLEND_A64(Mh(x),
6496
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6497
//                  PLeft(x, y))
6498
//
6499
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6500
// rounding, this can be written as:
6501
//
6502
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6503
//    Mh(x) * Mv(y) * P(x,y) +
6504
//      Mh(x) * Cv(y) * Pabove(x,y) +
6505
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6506
//
6507
// Where :
6508
//
6509
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6510
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6511
//
6512
// This function computes 'wsrc' and 'mask' as:
6513
//
6514
//  wsrc(x, y) =
6515
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6516
//      Mh(x) * Cv(y) * Pabove(x,y) +
6517
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6518
//
6519
//  mask(x, y) = Mh(x) * Mv(y)
6520
//
6521
// These can then be used to efficiently approximate the error for any
6522
// predictor P in the context of the provided neighbouring predictors by
6523
// computing:
6524
//
6525
//  error(x, y) =
6526
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6527
//
6528
static inline void calc_target_weighted_pred(
6529
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6530
    const uint8_t *above, int above_stride, const uint8_t *left,
6531
0
    int left_stride) {
6532
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6533
0
  const int bw = xd->width << MI_SIZE_LOG2;
6534
0
  const int bh = xd->height << MI_SIZE_LOG2;
6535
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6536
0
  int32_t *mask_buf = obmc_buffer->mask;
6537
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6538
6539
0
  const int is_hbd = is_cur_buf_hbd(xd);
6540
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6541
6542
  // plane 0 should not be sub-sampled
6543
0
  assert(xd->plane[0].subsampling_x == 0);
6544
0
  assert(xd->plane[0].subsampling_y == 0);
6545
6546
0
  av1_zero_array(wsrc_buf, bw * bh);
6547
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6548
6549
  // handle above row
6550
0
  if (xd->up_available) {
6551
0
    const int overlap =
6552
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6553
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6554
0
                                                   above_stride, overlap };
6555
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6556
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6557
0
                                  calc_target_weighted_pred_above, &ctxt);
6558
0
  }
6559
6560
0
  for (int i = 0; i < bw * bh; ++i) {
6561
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6562
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563
0
  }
6564
6565
  // handle left column
6566
0
  if (xd->left_available) {
6567
0
    const int overlap =
6568
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6569
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6570
0
                                                   left_stride, overlap };
6571
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6572
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6573
0
                                 calc_target_weighted_pred_left, &ctxt);
6574
0
  }
6575
6576
0
  if (!is_hbd) {
6577
0
    const uint8_t *src = x->plane[0].src.buf;
6578
6579
0
    for (int row = 0; row < bh; ++row) {
6580
0
      for (int col = 0; col < bw; ++col) {
6581
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6582
0
      }
6583
0
      wsrc_buf += bw;
6584
0
      src += x->plane[0].src.stride;
6585
0
    }
6586
0
  } else {
6587
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6588
6589
0
    for (int row = 0; row < bh; ++row) {
6590
0
      for (int col = 0; col < bw; ++col) {
6591
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6592
0
      }
6593
0
      wsrc_buf += bw;
6594
0
      src += x->plane[0].src.stride;
6595
0
    }
6596
0
  }
6597
0
}