Coverage Report

Created: 2026-05-24 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/encoder/rdopt.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdint.h>
16
#include <string.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/blend.h"
24
#include "aom_mem/aom_mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_ports/mem.h"
27
28
#include "av1/common/av1_common_int.h"
29
#include "av1/common/cfl.h"
30
#include "av1/common/blockd.h"
31
#include "av1/common/common.h"
32
#include "av1/common/common_data.h"
33
#include "av1/common/entropy.h"
34
#include "av1/common/entropymode.h"
35
#include "av1/common/enums.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mvref_common.h"
38
#include "av1/common/obmc.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconinter.h"
42
#include "av1/common/reconintra.h"
43
#include "av1/common/scan.h"
44
#include "av1/common/seg_common.h"
45
#include "av1/common/txb_common.h"
46
#include "av1/common/warped_motion.h"
47
48
#include "av1/encoder/aq_variance.h"
49
#include "av1/encoder/av1_quantize.h"
50
#include "av1/encoder/block.h"
51
#include "av1/encoder/cost.h"
52
#include "av1/encoder/compound_type.h"
53
#include "av1/encoder/encodemb.h"
54
#include "av1/encoder/encodemv.h"
55
#include "av1/encoder/encoder.h"
56
#include "av1/encoder/encodetxb.h"
57
#include "av1/encoder/hybrid_fwd_txfm.h"
58
#include "av1/encoder/interp_search.h"
59
#include "av1/encoder/intra_mode_search.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/mcomp.h"
62
#include "av1/encoder/ml.h"
63
#include "av1/encoder/mode_prune_model_weights.h"
64
#include "av1/encoder/model_rd.h"
65
#include "av1/encoder/motion_search_facade.h"
66
#include "av1/encoder/palette.h"
67
#include "av1/encoder/pustats.h"
68
#include "av1/encoder/random.h"
69
#include "av1/encoder/ratectrl.h"
70
#include "av1/encoder/rd.h"
71
#include "av1/encoder/rdopt.h"
72
#include "av1/encoder/reconinter_enc.h"
73
#include "av1/encoder/tokenize.h"
74
#include "av1/encoder/tpl_model.h"
75
#include "av1/encoder/tx_search.h"
76
#include "av1/encoder/var_based_part.h"
77
78
0
#define LAST_NEW_MV_INDEX 6
79
80
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
81
// The values are kept in Q12 format and equation used to derive is
82
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
83
0
#define MODE_THRESH_QBITS 12
84
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
85
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
86
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
87
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
88
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
89
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
90
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
91
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
92
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
93
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
94
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
95
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
96
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
97
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
98
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
99
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
100
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
101
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
102
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
103
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
104
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
105
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
106
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
107
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
108
  4144,  4120,  4096
109
};
110
111
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
112
  THR_NEARESTMV,
113
  THR_NEARESTL2,
114
  THR_NEARESTL3,
115
  THR_NEARESTB,
116
  THR_NEARESTA2,
117
  THR_NEARESTA,
118
  THR_NEARESTG,
119
120
  THR_NEWMV,
121
  THR_NEWL2,
122
  THR_NEWL3,
123
  THR_NEWB,
124
  THR_NEWA2,
125
  THR_NEWA,
126
  THR_NEWG,
127
128
  THR_NEARMV,
129
  THR_NEARL2,
130
  THR_NEARL3,
131
  THR_NEARB,
132
  THR_NEARA2,
133
  THR_NEARA,
134
  THR_NEARG,
135
136
  THR_GLOBALMV,
137
  THR_GLOBALL2,
138
  THR_GLOBALL3,
139
  THR_GLOBALB,
140
  THR_GLOBALA2,
141
  THR_GLOBALA,
142
  THR_GLOBALG,
143
144
  THR_COMP_NEAREST_NEARESTLA,
145
  THR_COMP_NEAREST_NEARESTL2A,
146
  THR_COMP_NEAREST_NEARESTL3A,
147
  THR_COMP_NEAREST_NEARESTGA,
148
  THR_COMP_NEAREST_NEARESTLB,
149
  THR_COMP_NEAREST_NEARESTL2B,
150
  THR_COMP_NEAREST_NEARESTL3B,
151
  THR_COMP_NEAREST_NEARESTGB,
152
  THR_COMP_NEAREST_NEARESTLA2,
153
  THR_COMP_NEAREST_NEARESTL2A2,
154
  THR_COMP_NEAREST_NEARESTL3A2,
155
  THR_COMP_NEAREST_NEARESTGA2,
156
  THR_COMP_NEAREST_NEARESTLL2,
157
  THR_COMP_NEAREST_NEARESTLL3,
158
  THR_COMP_NEAREST_NEARESTLG,
159
  THR_COMP_NEAREST_NEARESTBA,
160
161
  THR_COMP_NEAR_NEARLB,
162
  THR_COMP_NEW_NEWLB,
163
  THR_COMP_NEW_NEARESTLB,
164
  THR_COMP_NEAREST_NEWLB,
165
  THR_COMP_NEW_NEARLB,
166
  THR_COMP_NEAR_NEWLB,
167
  THR_COMP_GLOBAL_GLOBALLB,
168
169
  THR_COMP_NEAR_NEARLA,
170
  THR_COMP_NEW_NEWLA,
171
  THR_COMP_NEW_NEARESTLA,
172
  THR_COMP_NEAREST_NEWLA,
173
  THR_COMP_NEW_NEARLA,
174
  THR_COMP_NEAR_NEWLA,
175
  THR_COMP_GLOBAL_GLOBALLA,
176
177
  THR_COMP_NEAR_NEARL2A,
178
  THR_COMP_NEW_NEWL2A,
179
  THR_COMP_NEW_NEARESTL2A,
180
  THR_COMP_NEAREST_NEWL2A,
181
  THR_COMP_NEW_NEARL2A,
182
  THR_COMP_NEAR_NEWL2A,
183
  THR_COMP_GLOBAL_GLOBALL2A,
184
185
  THR_COMP_NEAR_NEARL3A,
186
  THR_COMP_NEW_NEWL3A,
187
  THR_COMP_NEW_NEARESTL3A,
188
  THR_COMP_NEAREST_NEWL3A,
189
  THR_COMP_NEW_NEARL3A,
190
  THR_COMP_NEAR_NEWL3A,
191
  THR_COMP_GLOBAL_GLOBALL3A,
192
193
  THR_COMP_NEAR_NEARGA,
194
  THR_COMP_NEW_NEWGA,
195
  THR_COMP_NEW_NEARESTGA,
196
  THR_COMP_NEAREST_NEWGA,
197
  THR_COMP_NEW_NEARGA,
198
  THR_COMP_NEAR_NEWGA,
199
  THR_COMP_GLOBAL_GLOBALGA,
200
201
  THR_COMP_NEAR_NEARL2B,
202
  THR_COMP_NEW_NEWL2B,
203
  THR_COMP_NEW_NEARESTL2B,
204
  THR_COMP_NEAREST_NEWL2B,
205
  THR_COMP_NEW_NEARL2B,
206
  THR_COMP_NEAR_NEWL2B,
207
  THR_COMP_GLOBAL_GLOBALL2B,
208
209
  THR_COMP_NEAR_NEARL3B,
210
  THR_COMP_NEW_NEWL3B,
211
  THR_COMP_NEW_NEARESTL3B,
212
  THR_COMP_NEAREST_NEWL3B,
213
  THR_COMP_NEW_NEARL3B,
214
  THR_COMP_NEAR_NEWL3B,
215
  THR_COMP_GLOBAL_GLOBALL3B,
216
217
  THR_COMP_NEAR_NEARGB,
218
  THR_COMP_NEW_NEWGB,
219
  THR_COMP_NEW_NEARESTGB,
220
  THR_COMP_NEAREST_NEWGB,
221
  THR_COMP_NEW_NEARGB,
222
  THR_COMP_NEAR_NEWGB,
223
  THR_COMP_GLOBAL_GLOBALGB,
224
225
  THR_COMP_NEAR_NEARLA2,
226
  THR_COMP_NEW_NEWLA2,
227
  THR_COMP_NEW_NEARESTLA2,
228
  THR_COMP_NEAREST_NEWLA2,
229
  THR_COMP_NEW_NEARLA2,
230
  THR_COMP_NEAR_NEWLA2,
231
  THR_COMP_GLOBAL_GLOBALLA2,
232
233
  THR_COMP_NEAR_NEARL2A2,
234
  THR_COMP_NEW_NEWL2A2,
235
  THR_COMP_NEW_NEARESTL2A2,
236
  THR_COMP_NEAREST_NEWL2A2,
237
  THR_COMP_NEW_NEARL2A2,
238
  THR_COMP_NEAR_NEWL2A2,
239
  THR_COMP_GLOBAL_GLOBALL2A2,
240
241
  THR_COMP_NEAR_NEARL3A2,
242
  THR_COMP_NEW_NEWL3A2,
243
  THR_COMP_NEW_NEARESTL3A2,
244
  THR_COMP_NEAREST_NEWL3A2,
245
  THR_COMP_NEW_NEARL3A2,
246
  THR_COMP_NEAR_NEWL3A2,
247
  THR_COMP_GLOBAL_GLOBALL3A2,
248
249
  THR_COMP_NEAR_NEARGA2,
250
  THR_COMP_NEW_NEWGA2,
251
  THR_COMP_NEW_NEARESTGA2,
252
  THR_COMP_NEAREST_NEWGA2,
253
  THR_COMP_NEW_NEARGA2,
254
  THR_COMP_NEAR_NEWGA2,
255
  THR_COMP_GLOBAL_GLOBALGA2,
256
257
  THR_COMP_NEAR_NEARLL2,
258
  THR_COMP_NEW_NEWLL2,
259
  THR_COMP_NEW_NEARESTLL2,
260
  THR_COMP_NEAREST_NEWLL2,
261
  THR_COMP_NEW_NEARLL2,
262
  THR_COMP_NEAR_NEWLL2,
263
  THR_COMP_GLOBAL_GLOBALLL2,
264
265
  THR_COMP_NEAR_NEARLL3,
266
  THR_COMP_NEW_NEWLL3,
267
  THR_COMP_NEW_NEARESTLL3,
268
  THR_COMP_NEAREST_NEWLL3,
269
  THR_COMP_NEW_NEARLL3,
270
  THR_COMP_NEAR_NEWLL3,
271
  THR_COMP_GLOBAL_GLOBALLL3,
272
273
  THR_COMP_NEAR_NEARLG,
274
  THR_COMP_NEW_NEWLG,
275
  THR_COMP_NEW_NEARESTLG,
276
  THR_COMP_NEAREST_NEWLG,
277
  THR_COMP_NEW_NEARLG,
278
  THR_COMP_NEAR_NEWLG,
279
  THR_COMP_GLOBAL_GLOBALLG,
280
281
  THR_COMP_NEAR_NEARBA,
282
  THR_COMP_NEW_NEWBA,
283
  THR_COMP_NEW_NEARESTBA,
284
  THR_COMP_NEAREST_NEWBA,
285
  THR_COMP_NEW_NEARBA,
286
  THR_COMP_NEAR_NEWBA,
287
  THR_COMP_GLOBAL_GLOBALBA,
288
289
  THR_DC,
290
  THR_PAETH,
291
  THR_SMOOTH,
292
  THR_SMOOTH_V,
293
  THR_SMOOTH_H,
294
  THR_H_PRED,
295
  THR_V_PRED,
296
  THR_D135_PRED,
297
  THR_D203_PRED,
298
  THR_D157_PRED,
299
  THR_D67_PRED,
300
  THR_D113_PRED,
301
  THR_D45_PRED,
302
};
303
304
/*!\cond */
305
typedef struct SingleInterModeState {
306
  int64_t rd;
307
  MV_REFERENCE_FRAME ref_frame;
308
  int valid;
309
} SingleInterModeState;
310
311
typedef struct InterModeSearchState {
312
  int64_t best_rd;
313
  int64_t best_skip_rd[2];
314
  MB_MODE_INFO best_mbmode;
315
  int best_rate_y;
316
  int best_rate_uv;
317
  int best_mode_skippable;
318
  int best_skip2;
319
  THR_MODES best_mode_index;
320
  int num_available_refs;
321
  int64_t dist_refs[REF_FRAMES];
322
  int dist_order_refs[REF_FRAMES];
323
  int64_t mode_threshold[MAX_MODES];
324
  int64_t best_intra_rd;
325
  unsigned int best_pred_sse;
326
327
  /*!
328
   * \brief Keep track of best intra rd for use in compound mode.
329
   */
330
  int64_t best_pred_rd[REFERENCE_MODES];
331
  // Save a set of single_newmv for each checked ref_mv.
332
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
333
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
335
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
336
  // The rd of simple translation in single inter modes
337
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
338
  int64_t best_single_rd[REF_FRAMES];
339
  PREDICTION_MODE best_single_mode[REF_FRAMES];
340
341
  // Single search results by [directions][modes][reference frames]
342
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
343
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
344
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
345
                                            [FWD_REFS];
346
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
347
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
348
  IntraModeSearchState intra_search_state;
349
  RD_STATS best_y_rdcost;
350
} InterModeSearchState;
351
/*!\endcond */
352
353
0
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
354
0
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
355
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
356
0
    md->ready = 0;
357
0
    md->num = 0;
358
0
    md->dist_sum = 0;
359
0
    md->ld_sum = 0;
360
0
    md->sse_sum = 0;
361
0
    md->sse_sse_sum = 0;
362
0
    md->sse_ld_sum = 0;
363
0
  }
364
0
}
365
366
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
367
                             int64_t sse, int *est_residue_cost,
368
0
                             int64_t *est_dist) {
369
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
370
0
  if (md->ready) {
371
0
    if (sse < md->dist_mean) {
372
0
      *est_residue_cost = 0;
373
0
      *est_dist = sse;
374
0
    } else {
375
0
      *est_dist = (int64_t)round(md->dist_mean);
376
0
      const double est_ld = md->a * sse + md->b;
377
      // Clamp estimated rate cost by INT_MAX / 2.
378
      // TODO(angiebird@google.com): find better solution than clamping.
379
0
      if (fabs(est_ld) < 1e-2) {
380
0
        *est_residue_cost = INT_MAX / 2;
381
0
      } else {
382
0
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
383
0
        if (est_residue_cost_dbl < 0) {
384
0
          *est_residue_cost = 0;
385
0
        } else {
386
0
          *est_residue_cost =
387
0
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
388
0
        }
389
0
      }
390
0
      if (*est_residue_cost <= 0) {
391
0
        *est_residue_cost = 0;
392
0
        *est_dist = sse;
393
0
      }
394
0
    }
395
0
    return 1;
396
0
  }
397
0
  return 0;
398
0
}
399
400
0
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
401
0
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
402
0
    const int block_idx = inter_mode_data_block_idx(bsize);
403
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
404
0
    if (block_idx == -1) continue;
405
0
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
406
0
      continue;
407
0
    } else {
408
0
      if (md->ready == 0) {
409
0
        md->dist_mean = md->dist_sum / md->num;
410
0
        md->ld_mean = md->ld_sum / md->num;
411
0
        md->sse_mean = md->sse_sum / md->num;
412
0
        md->sse_sse_mean = md->sse_sse_sum / md->num;
413
0
        md->sse_ld_mean = md->sse_ld_sum / md->num;
414
0
      } else {
415
0
        const double factor = 3;
416
0
        md->dist_mean =
417
0
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
418
0
        md->ld_mean =
419
0
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
420
0
        md->sse_mean =
421
0
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
422
0
        md->sse_sse_mean =
423
0
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
424
0
            (factor + 1);
425
0
        md->sse_ld_mean =
426
0
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
427
0
            (factor + 1);
428
0
      }
429
430
0
      const double my = md->ld_mean;
431
0
      const double mx = md->sse_mean;
432
0
      const double dx = sqrt(md->sse_sse_mean);
433
0
      const double dxy = md->sse_ld_mean;
434
435
0
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
436
0
      md->b = my - md->a * mx;
437
0
      md->ready = 1;
438
439
0
      md->num = 0;
440
0
      md->dist_sum = 0;
441
0
      md->ld_sum = 0;
442
0
      md->sse_sum = 0;
443
0
      md->sse_sse_sum = 0;
444
0
      md->sse_ld_sum = 0;
445
0
    }
446
0
    (void)rdmult;
447
0
  }
448
0
}
449
450
static inline void inter_mode_data_push(TileDataEnc *tile_data,
451
                                        BLOCK_SIZE bsize, int64_t sse,
452
0
                                        int64_t dist, int residue_cost) {
453
0
  if (residue_cost == 0 || sse == dist) return;
454
0
  const int block_idx = inter_mode_data_block_idx(bsize);
455
0
  if (block_idx == -1) return;
456
0
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
457
0
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
458
0
    const double ld = (sse - dist) * 1. / residue_cost;
459
0
    ++rd_model->num;
460
0
    rd_model->dist_sum += dist;
461
0
    rd_model->ld_sum += ld;
462
0
    rd_model->sse_sum += sse;
463
0
    rd_model->sse_sse_sum += (double)sse * (double)sse;
464
0
    rd_model->sse_ld_sum += sse * ld;
465
0
  }
466
0
}
467
468
static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
469
                                         int mode_rate, int64_t sse, int64_t rd,
470
                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
471
                                         RD_STATS *rd_cost_uv,
472
0
                                         const MB_MODE_INFO *mbmi) {
473
0
  const int num = inter_modes_info->num;
474
0
  assert(num < MAX_INTER_MODES);
475
0
  inter_modes_info->mbmi_arr[num] = *mbmi;
476
0
  inter_modes_info->mode_rate_arr[num] = mode_rate;
477
0
  inter_modes_info->sse_arr[num] = sse;
478
0
  inter_modes_info->est_rd_arr[num] = rd;
479
0
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
480
0
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
481
0
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
482
0
  ++inter_modes_info->num;
483
0
}
484
485
0
static int compare_rd_idx_pair(const void *a, const void *b) {
486
0
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
487
    // To avoid inconsistency in qsort() ordering when two elements are equal,
488
    // using idx as tie breaker. Refer aomedia:2928
489
0
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
490
0
      return 0;
491
0
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
492
0
      return 1;
493
0
    else
494
0
      return -1;
495
0
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
496
0
    return 1;
497
0
  } else {
498
0
    return -1;
499
0
  }
500
0
}
501
502
static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
503
0
                                         RdIdxPair *rd_idx_pair_arr) {
504
0
  if (inter_modes_info->num == 0) {
505
0
    return;
506
0
  }
507
0
  for (int i = 0; i < inter_modes_info->num; ++i) {
508
0
    rd_idx_pair_arr[i].idx = i;
509
0
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
510
0
  }
511
0
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
512
0
        compare_rd_idx_pair);
513
0
}
514
515
// Initialize estimated RD Cost records of compound average.
516
static inline void init_comp_avg_est_rd(
517
0
    struct macroblock *x, int skip_cmp_using_top_cmp_avg_est_rd_lvl) {
518
0
  if (!skip_cmp_using_top_cmp_avg_est_rd_lvl) return;
519
520
0
  for (int j = 0; j < TOP_COMP_AVG_EST_RD_COUNT; j++) {
521
0
    x->top_comp_avg_est_rd[j] = INT64_MAX;
522
0
  }
523
0
}
524
525
// Similar to get_horver_correlation, but also takes into account first
526
// row/column, when computing horizontal/vertical correlation.
527
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
528
                                       int width, int height, float *hcorr,
529
0
                                       float *vcorr) {
530
  // The following notation is used:
531
  // x - current pixel
532
  // y - left neighbor pixel
533
  // z - top neighbor pixel
534
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
535
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
536
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
537
538
  // First, process horizontal correlation on just the first row
539
0
  x_sum += diff[0];
540
0
  x2_sum += diff[0] * diff[0];
541
0
  x_firstrow += diff[0];
542
0
  x2_firstrow += diff[0] * diff[0];
543
0
  for (int j = 1; j < width; ++j) {
544
0
    const int16_t x = diff[j];
545
0
    const int16_t y = diff[j - 1];
546
0
    x_sum += x;
547
0
    x_firstrow += x;
548
0
    x2_sum += x * x;
549
0
    x2_firstrow += x * x;
550
0
    xy_sum += x * y;
551
0
  }
552
553
  // Process vertical correlation in the first column
554
0
  x_firstcol += diff[0];
555
0
  x2_firstcol += diff[0] * diff[0];
556
0
  for (int i = 1; i < height; ++i) {
557
0
    const int16_t x = diff[i * stride];
558
0
    const int16_t z = diff[(i - 1) * stride];
559
0
    x_sum += x;
560
0
    x_firstcol += x;
561
0
    x2_sum += x * x;
562
0
    x2_firstcol += x * x;
563
0
    xz_sum += x * z;
564
0
  }
565
566
  // Now process horiz and vert correlation through the rest unit
567
0
  for (int i = 1; i < height; ++i) {
568
0
    for (int j = 1; j < width; ++j) {
569
0
      const int16_t x = diff[i * stride + j];
570
0
      const int16_t y = diff[i * stride + j - 1];
571
0
      const int16_t z = diff[(i - 1) * stride + j];
572
0
      x_sum += x;
573
0
      x2_sum += x * x;
574
0
      xy_sum += x * y;
575
0
      xz_sum += x * z;
576
0
    }
577
0
  }
578
579
0
  for (int j = 0; j < width; ++j) {
580
0
    x_finalrow += diff[(height - 1) * stride + j];
581
0
    x2_finalrow +=
582
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
583
0
  }
584
0
  for (int i = 0; i < height; ++i) {
585
0
    x_finalcol += diff[i * stride + width - 1];
586
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
587
0
  }
588
589
0
  int64_t xhor_sum = x_sum - x_finalcol;
590
0
  int64_t xver_sum = x_sum - x_finalrow;
591
0
  int64_t y_sum = x_sum - x_firstcol;
592
0
  int64_t z_sum = x_sum - x_firstrow;
593
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
594
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
595
0
  int64_t y2_sum = x2_sum - x2_firstcol;
596
0
  int64_t z2_sum = x2_sum - x2_firstrow;
597
598
0
  const float num_hor = (float)(height * (width - 1));
599
0
  const float num_ver = (float)((height - 1) * width);
600
601
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
602
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
603
604
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
605
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
606
607
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
608
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
609
610
0
  if (xhor_var_n > 0 && y_var_n > 0) {
611
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
612
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
613
0
  } else {
614
0
    *hcorr = 1.0;
615
0
  }
616
0
  if (xver_var_n > 0 && z_var_n > 0) {
617
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
618
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
619
0
  } else {
620
0
    *vcorr = 1.0;
621
0
  }
622
0
}
623
624
static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var,
625
0
                                   int64_t *rec_var) {
626
0
  const MACROBLOCKD *xd = &x->e_mbd;
627
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
628
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
629
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
630
631
0
  BLOCK_SIZE bsize = mbmi->bsize;
632
0
  int bw = block_size_wide[bsize];
633
0
  int bh = block_size_high[bsize];
634
635
0
  static const int gau_filter[3][3] = {
636
0
    { 1, 2, 1 },
637
0
    { 2, 4, 2 },
638
0
    { 1, 2, 1 },
639
0
  };
640
641
0
  DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
642
643
0
  uint16_t *pred_ptr = &dclevel[bw + 1];
644
0
  int pred_stride = xd->plane[0].dst.stride;
645
646
0
  for (int idy = -1; idy < bh + 1; ++idy) {
647
0
    for (int idx = -1; idx < bw + 1; ++idx) {
648
0
      int offset_idy = idy;
649
0
      int offset_idx = idx;
650
0
      if (idy == -1) offset_idy = 0;
651
0
      if (idy == bh) offset_idy = bh - 1;
652
0
      if (idx == -1) offset_idx = 0;
653
0
      if (idx == bw) offset_idx = bw - 1;
654
655
0
      int offset = offset_idy * pred_stride + offset_idx;
656
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset];
657
0
    }
658
0
  }
659
660
0
  *rec_var = 0;
661
0
  for (int idy = 0; idy < bh; ++idy) {
662
0
    for (int idx = 0; idx < bw; ++idx) {
663
0
      int sum = 0;
664
0
      for (int iy = 0; iy < 3; ++iy)
665
0
        for (int ix = 0; ix < 3; ++ix)
666
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
667
0
                 gau_filter[iy][ix];
668
669
0
      sum = sum >> 4;
670
671
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
672
0
      *rec_var += diff * diff;
673
0
    }
674
0
  }
675
0
  *rec_var <<= 4;
676
677
0
  int src_stride = p->src.stride;
678
0
  for (int idy = -1; idy < bh + 1; ++idy) {
679
0
    for (int idx = -1; idx < bw + 1; ++idx) {
680
0
      int offset_idy = idy;
681
0
      int offset_idx = idx;
682
0
      if (idy == -1) offset_idy = 0;
683
0
      if (idy == bh) offset_idy = bh - 1;
684
0
      if (idx == -1) offset_idx = 0;
685
0
      if (idx == bw) offset_idx = bw - 1;
686
687
0
      int offset = offset_idy * src_stride + offset_idx;
688
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset];
689
0
    }
690
0
  }
691
692
0
  *src_var = 0;
693
0
  for (int idy = 0; idy < bh; ++idy) {
694
0
    for (int idx = 0; idx < bw; ++idx) {
695
0
      int sum = 0;
696
0
      for (int iy = 0; iy < 3; ++iy)
697
0
        for (int ix = 0; ix < 3; ++ix)
698
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
699
0
                 gau_filter[iy][ix];
700
701
0
      sum = sum >> 4;
702
703
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
704
0
      *src_var += diff * diff;
705
0
    }
706
0
  }
707
0
  *src_var <<= 4;
708
0
}
709
710
static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
711
0
                               int64_t *rec_var) {
712
0
  const MACROBLOCKD *xd = &x->e_mbd;
713
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
714
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
715
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
716
717
0
  BLOCK_SIZE bsize = mbmi->bsize;
718
0
  int bw = block_size_wide[bsize];
719
0
  int bh = block_size_high[bsize];
720
721
0
  static const int gau_filter[3][3] = {
722
0
    { 1, 2, 1 },
723
0
    { 2, 4, 2 },
724
0
    { 1, 2, 1 },
725
0
  };
726
727
0
  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
728
729
0
  uint8_t *pred_ptr = &dclevel[bw + 1];
730
0
  int pred_stride = xd->plane[0].dst.stride;
731
732
0
  for (int idy = -1; idy < bh + 1; ++idy) {
733
0
    for (int idx = -1; idx < bw + 1; ++idx) {
734
0
      int offset_idy = idy;
735
0
      int offset_idx = idx;
736
0
      if (idy == -1) offset_idy = 0;
737
0
      if (idy == bh) offset_idy = bh - 1;
738
0
      if (idx == -1) offset_idx = 0;
739
0
      if (idx == bw) offset_idx = bw - 1;
740
741
0
      int offset = offset_idy * pred_stride + offset_idx;
742
0
      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
743
0
    }
744
0
  }
745
746
0
  *rec_var = 0;
747
0
  for (int idy = 0; idy < bh; ++idy) {
748
0
    for (int idx = 0; idx < bw; ++idx) {
749
0
      int sum = 0;
750
0
      for (int iy = 0; iy < 3; ++iy)
751
0
        for (int ix = 0; ix < 3; ++ix)
752
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
753
0
                 gau_filter[iy][ix];
754
755
0
      sum = sum >> 4;
756
757
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
758
0
      *rec_var += diff * diff;
759
0
    }
760
0
  }
761
0
  *rec_var <<= 4;
762
763
0
  int src_stride = p->src.stride;
764
0
  for (int idy = -1; idy < bh + 1; ++idy) {
765
0
    for (int idx = -1; idx < bw + 1; ++idx) {
766
0
      int offset_idy = idy;
767
0
      int offset_idx = idx;
768
0
      if (idy == -1) offset_idy = 0;
769
0
      if (idy == bh) offset_idy = bh - 1;
770
0
      if (idx == -1) offset_idx = 0;
771
0
      if (idx == bw) offset_idx = bw - 1;
772
773
0
      int offset = offset_idy * src_stride + offset_idx;
774
0
      pred_ptr[idy * bw + idx] = p->src.buf[offset];
775
0
    }
776
0
  }
777
778
0
  *src_var = 0;
779
0
  for (int idy = 0; idy < bh; ++idy) {
780
0
    for (int idx = 0; idx < bw; ++idx) {
781
0
      int sum = 0;
782
0
      for (int iy = 0; iy < 3; ++iy)
783
0
        for (int ix = 0; ix < 3; ++ix)
784
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
785
0
                 gau_filter[iy][ix];
786
787
0
      sum = sum >> 4;
788
789
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
790
0
      *src_var += diff * diff;
791
0
    }
792
0
  }
793
0
  *src_var <<= 4;
794
0
}
795
796
static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
797
0
                          RD_STATS *rd_cost, bool is_inter_pred) {
798
0
  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
799
0
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
800
0
      is_inter_pred) {
801
    // Tune IQ and SSIMULACRA2 can be used to encode layered images, where
802
    // keyframes could be encoded at a lower or similar quality (i.e. higher
803
    // QP) than inter-coded frames.
804
    // In this case, libaom tends to underestimate the true RD cost of inter
805
    // prediction candidates, causing encoded file size to increase without a
806
    // corresponding increase in quality.
807
    // When both intra and inter encoded block candidates are available (with
808
    // rdcosts close to each other), the intra-coded candidate was subjectively
809
    // observed to be a bit less blurry, with a corresponding increase in
810
    // SSIMULACRA 2 scores.
811
    // Apply a 1.125x inter block bias to increase overall perceptual
812
    // compression efficiency, while still allowing the encoder to pick inter
813
    // prediction when it's beneficial.
814
0
    rd_cost->dist += rd_cost->dist >> 3;
815
0
    rd_cost->rdcost += rd_cost->rdcost >> 3;
816
0
    return;
817
0
  }
818
819
0
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
820
821
0
  if (frame_is_kf_gf_arf(cpi)) return;
822
823
0
  int64_t src_var, rec_var;
824
825
0
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
826
0
  if (is_hbd)
827
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
828
0
  else
829
0
    get_variance_stats(x, &src_var, &rec_var);
830
831
0
  if (src_var <= rec_var) return;
832
833
0
  int64_t var_offset = src_var - rec_var;
834
835
0
  rd_cost->dist += var_offset;
836
837
0
  rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
838
0
}
839
840
static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
841
0
                        int64_t *rd_cost, bool is_inter_pred) {
842
0
  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
843
0
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
844
0
      is_inter_pred) {
845
0
    *rd_cost += *rd_cost >> 3;
846
0
    return;
847
0
  }
848
849
0
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
850
851
0
  if (frame_is_kf_gf_arf(cpi)) return;
852
853
0
  int64_t src_var, rec_var;
854
0
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
855
856
0
  if (is_hbd)
857
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
858
0
  else
859
0
    get_variance_stats(x, &src_var, &rec_var);
860
861
0
  if (src_var <= rec_var) return;
862
863
0
  int64_t var_offset = src_var - rec_var;
864
865
0
  *rd_cost += RDCOST(x->rdmult, 0, var_offset);
866
0
}
867
868
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
869
0
                       int64_t *sse_y) {
870
0
  const AV1_COMMON *cm = &cpi->common;
871
0
  const int num_planes = av1_num_planes(cm);
872
0
  const MACROBLOCKD *xd = &x->e_mbd;
873
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
874
0
  int64_t total_sse = 0;
875
0
  for (int plane = 0; plane < num_planes; ++plane) {
876
0
    if (plane && !xd->is_chroma_ref) break;
877
0
    const struct macroblock_plane *const p = &x->plane[plane];
878
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
879
0
    const BLOCK_SIZE bs =
880
0
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
881
0
    unsigned int sse;
882
883
0
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
884
0
                            pd->dst.stride, &sse);
885
0
    total_sse += sse;
886
0
    if (!plane && sse_y) *sse_y = sse;
887
0
  }
888
0
  total_sse <<= 4;
889
0
  return total_sse;
890
0
}
891
892
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
893
0
                          intptr_t block_size, int64_t *ssz) {
894
0
  int i;
895
0
  int64_t error = 0, sqcoeff = 0;
896
897
0
  for (i = 0; i < block_size; i++) {
898
0
    const int diff = coeff[i] - dqcoeff[i];
899
0
    error += diff * diff;
900
0
    sqcoeff += coeff[i] * coeff[i];
901
0
  }
902
903
0
  *ssz = sqcoeff;
904
0
  return error;
905
0
}
906
907
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
908
0
                             intptr_t block_size) {
909
0
  int64_t error = 0;
910
911
0
  for (int i = 0; i < block_size; i++) {
912
0
    const int diff = coeff[i] - dqcoeff[i];
913
0
    error += diff * diff;
914
0
  }
915
916
0
  return error;
917
0
}
918
919
#if CONFIG_AV1_HIGHBITDEPTH
920
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
921
                                 const tran_low_t *dqcoeff, intptr_t block_size,
922
0
                                 int64_t *ssz, int bd) {
923
0
  int i;
924
0
  int64_t error = 0, sqcoeff = 0;
925
0
  int shift = 2 * (bd - 8);
926
0
  int rounding = (1 << shift) >> 1;
927
928
0
  for (i = 0; i < block_size; i++) {
929
0
    const int64_t diff = coeff[i] - dqcoeff[i];
930
0
    error += diff * diff;
931
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
932
0
  }
933
0
  error = (error + rounding) >> shift;
934
0
  sqcoeff = (sqcoeff + rounding) >> shift;
935
936
0
  *ssz = sqcoeff;
937
0
  return error;
938
0
}
939
#endif
940
941
static int conditional_skipintra(PREDICTION_MODE mode,
942
0
                                 PREDICTION_MODE best_intra_mode) {
943
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
944
0
      best_intra_mode != D135_PRED)
945
0
    return 1;
946
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
947
0
      best_intra_mode != D45_PRED)
948
0
    return 1;
949
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
950
0
      best_intra_mode != D45_PRED)
951
0
    return 1;
952
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
953
0
      best_intra_mode != D135_PRED)
954
0
    return 1;
955
0
  return 0;
956
0
}
957
958
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
959
0
                       int16_t mode_context) {
960
0
  if (is_inter_compound_mode(mode)) {
961
0
    return mode_costs
962
0
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
963
0
  }
964
965
0
  int mode_cost = 0;
966
0
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
967
968
0
  assert(is_inter_mode(mode));
969
970
0
  if (mode == NEWMV) {
971
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
972
0
    return mode_cost;
973
0
  } else {
974
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
975
0
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
976
977
0
    if (mode == GLOBALMV) {
978
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
979
0
      return mode_cost;
980
0
    } else {
981
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
982
0
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
983
0
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
984
0
      return mode_cost;
985
0
    }
986
0
  }
987
0
}
988
989
static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
990
0
                                              int ref_idx) {
991
0
  return ref_idx ? compound_ref1_mode(this_mode)
992
0
                 : compound_ref0_mode(this_mode);
993
0
}
994
995
static inline void estimate_ref_frame_costs(
996
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
997
    int segment_id, unsigned int *ref_costs_single,
998
0
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
999
0
  int seg_ref_active =
1000
0
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
1001
0
  if (seg_ref_active) {
1002
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
1003
0
    int ref_frame;
1004
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1005
0
      memset(ref_costs_comp[ref_frame], 0,
1006
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
1007
0
  } else {
1008
0
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
1009
0
    ref_costs_single[INTRA_FRAME] =
1010
0
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
1011
0
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
1012
1013
0
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
1014
0
      ref_costs_single[i] = base_cost;
1015
1016
0
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
1017
0
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
1018
0
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
1019
0
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
1020
0
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
1021
0
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
1022
1023
    // Determine cost of a single ref frame, where frame types are represented
1024
    // by a tree:
1025
    // Level 0: add cost whether this ref is a forward or backward ref
1026
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1027
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1028
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1029
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1030
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
1031
0
    ref_costs_single[ALTREF2_FRAME] +=
1032
0
        mode_costs->single_ref_cost[ctx_p1][0][1];
1033
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
1034
1035
    // Level 1: if this ref is forward ref,
1036
    // add cost whether it is last/last2 or last3/golden
1037
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1038
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1039
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1040
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1041
1042
    // Level 1: if this ref is backward ref
1043
    // then add cost whether this ref is altref or backward ref
1044
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
1045
0
    ref_costs_single[ALTREF2_FRAME] +=
1046
0
        mode_costs->single_ref_cost[ctx_p2][1][0];
1047
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
1048
1049
    // Level 2: further add cost whether this ref is last or last2
1050
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
1051
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
1052
1053
    // Level 2: last3 or golden
1054
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
1055
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
1056
1057
    // Level 2: bwdref or altref2
1058
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
1059
0
    ref_costs_single[ALTREF2_FRAME] +=
1060
0
        mode_costs->single_ref_cost[ctx_p6][5][1];
1061
1062
0
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
1063
      // Similar to single ref, determine cost of compound ref frames.
1064
      // cost_compound_refs = cost_first_ref + cost_second_ref
1065
0
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
1066
0
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
1067
0
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
1068
0
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
1069
0
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
1070
1071
0
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
1072
0
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
1073
1074
0
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
1075
0
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
1076
0
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
1077
0
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
1078
0
      ref_bicomp_costs[ALTREF_FRAME] = 0;
1079
1080
      // cost of first ref frame
1081
0
      ref_bicomp_costs[LAST_FRAME] +=
1082
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1083
0
      ref_bicomp_costs[LAST2_FRAME] +=
1084
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1085
0
      ref_bicomp_costs[LAST3_FRAME] +=
1086
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1087
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
1088
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1089
1090
0
      ref_bicomp_costs[LAST_FRAME] +=
1091
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
1092
0
      ref_bicomp_costs[LAST2_FRAME] +=
1093
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
1094
1095
0
      ref_bicomp_costs[LAST3_FRAME] +=
1096
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
1097
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
1098
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
1099
1100
      // cost of second ref frame
1101
0
      ref_bicomp_costs[BWDREF_FRAME] +=
1102
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1103
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
1104
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1105
0
      ref_bicomp_costs[ALTREF_FRAME] +=
1106
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
1107
1108
0
      ref_bicomp_costs[BWDREF_FRAME] +=
1109
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
1110
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
1111
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
1112
1113
      // cost: if one ref frame is forward ref, the other ref is backward ref
1114
0
      int ref0, ref1;
1115
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1116
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
1117
0
          ref_costs_comp[ref0][ref1] =
1118
0
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
1119
0
        }
1120
0
      }
1121
1122
      // cost: if both ref frames are the same side.
1123
0
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
1124
0
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
1125
0
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
1126
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
1127
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1128
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1129
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
1130
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
1131
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1132
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1133
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1134
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
1135
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
1136
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1137
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1138
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1139
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
1140
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
1141
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1142
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
1143
0
    } else {
1144
0
      int ref0, ref1;
1145
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1146
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
1147
0
          ref_costs_comp[ref0][ref1] = 512;
1148
0
      }
1149
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
1150
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
1151
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
1152
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
1153
0
    }
1154
0
  }
1155
0
}
1156
1157
static inline void store_coding_context(
1158
#if CONFIG_INTERNAL_STATS
1159
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
1160
#else
1161
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1162
#endif  // CONFIG_INTERNAL_STATS
1163
0
    int skippable) {
1164
0
  MACROBLOCKD *const xd = &x->e_mbd;
1165
1166
  // Take a snapshot of the coding context so it can be
1167
  // restored if we decide to encode this way
1168
0
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
1169
0
  ctx->skippable = skippable;
1170
#if CONFIG_INTERNAL_STATS
1171
  ctx->best_mode_index = mode_index;
1172
#endif  // CONFIG_INTERNAL_STATS
1173
0
  ctx->mic = *xd->mi[0];
1174
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
1175
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1176
0
}
1177
1178
static inline void setup_buffer_ref_mvs_inter(
1179
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
1180
0
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
1181
0
  const AV1_COMMON *cm = &cpi->common;
1182
0
  const int num_planes = av1_num_planes(cm);
1183
0
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
1184
0
      av1_get_scaled_ref_frame(cpi, ref_frame);
1185
0
  MACROBLOCKD *const xd = &x->e_mbd;
1186
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1187
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1188
0
  const struct scale_factors *const sf =
1189
0
      get_ref_scale_factors_const(cm, ref_frame);
1190
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
1191
0
  assert(yv12 != NULL);
1192
1193
0
  if (scaled_ref_frame) {
1194
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
1195
    // support scaling.
1196
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
1197
0
                         num_planes);
1198
0
  } else {
1199
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1200
0
  }
1201
1202
  // Gets an initial list of candidate vectors from neighbours and orders them
1203
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
1204
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
1205
0
                   mbmi_ext->mode_context);
1206
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
1207
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
1208
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
1209
  // Further refinement that is encode side only to test the top few candidates
1210
  // in full and choose the best as the center point for subsequent searches.
1211
  // The current implementation doesn't support scaling.
1212
0
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
1213
0
              ref_frame, block_size);
1214
1215
  // Go back to unscaled reference.
1216
0
  if (scaled_ref_frame) {
1217
    // We had temporarily setup pred block based on scaled reference above. Go
1218
    // back to unscaled reference now, for subsequent use.
1219
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1220
0
  }
1221
0
}
1222
1223
0
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1224
0
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1225
1226
// TODO(jingning): this mv clamping function should be block size dependent.
1227
0
static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
1228
0
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
1229
0
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
1230
0
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
1231
0
                                     xd->mb_to_bottom_edge +
1232
0
                                         RIGHT_BOTTOM_MARGIN };
1233
0
  clamp_mv(mv, &mv_limits);
1234
0
}
1235
1236
/* If the current mode shares the same mv with other modes with higher cost,
1237
 * skip this mode. */
1238
static AOM_FORCE_INLINE int skip_repeated_mv(
1239
    const AV1_COMMON *const cm, const MACROBLOCK *const x,
1240
    PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frames[2],
1241
0
    InterModeSearchState *search_state) {
1242
0
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
1243
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
1244
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1245
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1246
0
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
1247
0
  if (!is_comp_pred) {
1248
0
    if (this_mode == NEARMV) {
1249
0
      if (ref_mv_count == 0) {
1250
        // NEARMV has the same motion vector as NEARESTMV
1251
0
        compare_mode = NEARESTMV;
1252
0
      }
1253
0
      if (ref_mv_count == 1 &&
1254
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1255
        // NEARMV has the same motion vector as GLOBALMV
1256
0
        compare_mode = GLOBALMV;
1257
0
      }
1258
0
    }
1259
0
    if (this_mode == GLOBALMV) {
1260
0
      if (ref_mv_count == 0 &&
1261
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1262
        // GLOBALMV has the same motion vector as NEARESTMV
1263
0
        compare_mode = NEARESTMV;
1264
0
      }
1265
0
      if (ref_mv_count == 1) {
1266
        // GLOBALMV has the same motion vector as NEARMV
1267
0
        compare_mode = NEARMV;
1268
0
      }
1269
0
    }
1270
1271
0
    if (compare_mode != MB_MODE_COUNT) {
1272
      // Use modelled_rd to check whether compare mode was searched
1273
0
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1274
0
          INT64_MAX) {
1275
0
        const int16_t mode_ctx =
1276
0
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1277
0
        const int compare_cost =
1278
0
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1279
0
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1280
1281
        // Only skip if the mode cost is larger than compare mode cost
1282
0
        if (this_cost > compare_cost) {
1283
0
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1284
0
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1285
0
          return 1;
1286
0
        }
1287
0
      }
1288
0
    }
1289
0
  }
1290
0
  return 0;
1291
0
}
1292
1293
static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1294
                                     const AV1_COMMON *cm,
1295
0
                                     const MACROBLOCK *x) {
1296
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1297
0
  *out_mv = in_mv;
1298
0
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1299
0
                     cm->features.cur_frame_force_integer_mv);
1300
0
  clamp_mv2(&out_mv->as_mv, xd);
1301
0
  return av1_is_fullmv_in_range(&x->mv_limits,
1302
0
                                get_fullmv_from_mv(&out_mv->as_mv));
1303
0
}
1304
1305
// To use single newmv directly for compound modes, need to clamp the mv to the
1306
// valid mv range. Without this, encoder would generate out of range mv, and
1307
// this is seen in 8k encoding.
1308
static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1309
0
                                     int ref_idx) {
1310
0
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1311
0
  SubpelMvLimits mv_limits;
1312
1313
0
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1314
0
  clamp_mv(&mv->as_mv, &mv_limits);
1315
0
}
1316
1317
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1318
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1319
                            int *const rate_mv, HandleInterModeArgs *const args,
1320
0
                            inter_mode_info *mode_info) {
1321
0
  MACROBLOCKD *const xd = &x->e_mbd;
1322
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1323
0
  const int is_comp_pred = has_second_ref(mbmi);
1324
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1325
0
  const int refs[2] = { mbmi->ref_frame[0],
1326
0
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1327
0
  const int ref_mv_idx = mbmi->ref_mv_idx;
1328
1329
0
  if (is_comp_pred) {
1330
0
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1331
0
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1332
0
    if (this_mode == NEW_NEWMV) {
1333
0
      if (valid_mv0) {
1334
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1335
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1336
0
      }
1337
0
      if (valid_mv1) {
1338
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1339
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1340
0
      }
1341
0
      *rate_mv = 0;
1342
0
      for (int i = 0; i < 2; ++i) {
1343
0
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1344
0
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1345
0
                                    x->mv_costs->nmv_joint_cost,
1346
0
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1347
0
      }
1348
0
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1349
0
      if (valid_mv1) {
1350
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1351
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1352
0
      }
1353
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1354
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1355
0
                                 x->mv_costs->nmv_joint_cost,
1356
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1357
0
    } else {
1358
0
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1359
0
      if (valid_mv0) {
1360
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1361
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1362
0
      }
1363
0
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1364
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1365
0
                                 x->mv_costs->nmv_joint_cost,
1366
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1367
0
    }
1368
0
  } else {
1369
    // Single ref case.
1370
0
    const int ref_idx = 0;
1371
0
    int search_range = INT_MAX;
1372
1373
0
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1374
0
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1375
0
      int min_mv_diff = INT_MAX;
1376
0
      int best_match = -1;
1377
0
      MV prev_ref_mv[2] = { { 0 } };
1378
0
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1379
0
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1380
0
                                                     idx, &x->mbmi_ext)
1381
0
                               .as_mv;
1382
0
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1383
0
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1384
1385
0
        if (min_mv_diff > ref_mv_diff) {
1386
0
          min_mv_diff = ref_mv_diff;
1387
0
          best_match = idx;
1388
0
        }
1389
0
      }
1390
1391
0
      if (min_mv_diff < (16 << 3)) {
1392
0
        if (args->single_newmv_valid[best_match][refs[0]]) {
1393
0
          search_range = min_mv_diff;
1394
0
          search_range +=
1395
0
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1396
0
                         prev_ref_mv[best_match].row),
1397
0
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1398
0
                         prev_ref_mv[best_match].col));
1399
          // Get full pixel search range.
1400
0
          search_range = (search_range + 4) >> 3;
1401
0
        }
1402
0
      }
1403
0
    }
1404
1405
0
    int_mv best_mv;
1406
0
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1407
0
                             mode_info, &best_mv, args);
1408
0
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1409
1410
0
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1411
0
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1412
0
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1413
0
    cur_mv[0].as_int = best_mv.as_int;
1414
1415
    // Return after single_newmv is set.
1416
0
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1417
0
  }
1418
1419
0
  return 0;
1420
0
}
1421
1422
static inline void update_mode_start_end_index(
1423
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1424
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1425
0
    int interintra_allowed, int eval_motion_mode) {
1426
0
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1427
0
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1428
0
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1429
0
    if (!eval_motion_mode) {
1430
0
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1431
0
    } else {
1432
      // Set the start index appropriately to process motion modes other than
1433
      // simple translation
1434
0
      *mode_index_start = 1;
1435
0
    }
1436
0
  }
1437
0
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1438
0
    *mode_index_end = SIMPLE_TRANSLATION;
1439
0
}
1440
1441
// Increase rd cost of warp and obmc motion modes for low complexity decoding.
1442
static inline void increase_motion_mode_rd(const MB_MODE_INFO *const best_mbmi,
1443
                                           const MB_MODE_INFO *const this_mbmi,
1444
                                           int64_t *const best_scaled_rd,
1445
                                           int64_t *const this_scaled_rd,
1446
                                           int rd_warp_bias_scale_pct,
1447
0
                                           float rd_obmc_bias_scale_pct) {
1448
0
  if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return;
1449
1450
  // Experiments have been performed with increasing the RD cost of warp and
1451
  // obmc motion modes at the below locations of inter mode evaluation.
1452
  // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode().
1453
  // (2). Motion mode evaluation during handle_inter_mode() call.
1454
  // (3). Motion mode evaluation for winner motion modes.
1455
  // (4). Tx search for best inter candidates.
1456
  // Based on the speed quality trade-off results of this speed feature, the rd
1457
  // bias logic is enabled only at (2), (3) and (4).
1458
0
  const double rd_warp_bias_scale = rd_warp_bias_scale_pct / 100.0;
1459
0
  const double rd_obmc_bias_scale = rd_obmc_bias_scale_pct / 100.0;
1460
0
  if (best_mbmi->motion_mode == WARPED_CAUSAL)
1461
0
    *best_scaled_rd += (int64_t)(rd_warp_bias_scale * *best_scaled_rd);
1462
0
  else if (best_mbmi->motion_mode == OBMC_CAUSAL)
1463
0
    *best_scaled_rd += (int64_t)(rd_obmc_bias_scale * *best_scaled_rd);
1464
1465
0
  if (this_mbmi->motion_mode == WARPED_CAUSAL)
1466
0
    *this_scaled_rd += (int64_t)(rd_warp_bias_scale * *this_scaled_rd);
1467
0
  else if (this_mbmi->motion_mode == OBMC_CAUSAL)
1468
0
    *this_scaled_rd += (int64_t)(rd_obmc_bias_scale * *this_scaled_rd);
1469
0
}
1470
1471
/*!\brief AV1 motion mode search
1472
 *
1473
 * \ingroup inter_mode_search
1474
 * Function to search over and determine the motion mode. It will update
1475
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1476
 * WARPED_CAUSAL and determine any necessary side information for the selected
1477
 * motion mode. It will also perform the full transform search, unless the
1478
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1479
 * than an RD corresponding to a full transform search. It will return the
1480
 * RD for the final motion_mode.
1481
 * Do the RD search for a given inter mode and compute all information relevant
1482
 * to the input mode. It will compute the best MV,
1483
 * compound parameters (if the mode is a compound mode) and interpolation filter
1484
 * parameters.
1485
 *
1486
 * \param[in]     cpi               Top-level encoder structure.
1487
 * \param[in]     tile_data         Pointer to struct holding adaptive
1488
 *                                  data/contexts/models for the tile during
1489
 *                                  encoding.
1490
 * \param[in]     x                 Pointer to struct holding all the data for
1491
 *                                  the current macroblock.
1492
 * \param[in]     bsize             Current block size.
1493
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1494
 *                                  information.
1495
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1496
 *                                  for only the Y plane.
1497
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1498
 *                                  for only the UV planes.
1499
 * \param[in]     args              HandleInterModeArgs struct holding
1500
 *                                  miscellaneous arguments for inter mode
1501
 *                                  search. See the documentation for this
1502
 *                                  struct for a description of each member.
1503
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1504
 *                                  It is used for early termination of this
1505
 *                                  search if the RD exceeds this value.
1506
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1507
 *                                  best total RD for a skip mode so far, and
1508
 *                                  skip_rd[1] is the best RD for a skip mode so
1509
 *                                  far in luma. This is used as a speed feature
1510
 *                                  to skip the transform search if the computed
1511
 *                                  skip RD for the current mode is not better
1512
 *                                  than the best skip_rd so far.
1513
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1514
 *                                  This will be modified if a motion search is
1515
 *                                  done in the motion mode search.
1516
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1517
 *                                  prediction. This will eventually hold the
1518
 *                                  final prediction, and the tmp_dst info will
1519
 *                                  be copied here.
1520
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1521
 *                                  do_tx_search (see below) is 0.
1522
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1523
 *                                  a full transform search. This will compute
1524
 *                                  an estimated RD for the modes without the
1525
 *                                  transform search and later perform the full
1526
 *                                  transform search on the best candidates.
1527
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1528
 *                                  information to perform a full transform
1529
 *                                  search only on winning candidates searched
1530
 *                                  with an estimate for transform coding RD.
1531
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1532
 *                                  motion modes other than SIMPLE_TRANSLATION.
1533
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1534
 *                                  the luma plane.
1535
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1536
 * current motion mode being tested should be skipped. It returns 0 if the
1537
 * motion mode search is a success.
1538
 */
1539
static int64_t motion_mode_rd(
1540
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1541
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1542
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1543
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1544
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1545
0
    int eval_motion_mode, int64_t *yrd) {
1546
0
  const AV1_COMMON *const cm = &cpi->common;
1547
0
  const FeatureFlags *const features = &cm->features;
1548
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1549
0
  const int num_planes = av1_num_planes(cm);
1550
0
  MACROBLOCKD *xd = &x->e_mbd;
1551
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1552
0
  const int is_comp_pred = has_second_ref(mbmi);
1553
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1554
0
  const int rate2_nocoeff = rd_stats->rate;
1555
0
  int best_xskip_txfm = 0;
1556
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1557
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1558
0
  const int rate_mv0 = *rate_mv;
1559
0
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1560
0
                                 is_interintra_allowed(mbmi) &&
1561
0
                                 mbmi->compound_idx;
1562
0
  WARP_SAMPLE_INFO *const warp_sample_info =
1563
0
      &x->warp_sample_info[mbmi->ref_frame[0]];
1564
0
  int *pts0 = warp_sample_info->pts;
1565
0
  int *pts_inref0 = warp_sample_info->pts_inref;
1566
1567
0
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1568
0
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1569
0
  av1_invalid_rd_stats(&best_rd_stats);
1570
0
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1571
0
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1572
0
  *yrd = INT64_MAX;
1573
0
  if (features->switchable_motion_mode) {
1574
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1575
    // is allowed.
1576
0
    last_motion_mode_allowed = motion_mode_allowed(
1577
0
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1578
0
  }
1579
1580
0
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1581
    // Collect projection samples used in least squares approximation of
1582
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1583
0
    if (warp_sample_info->num < 0) {
1584
0
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1585
0
    }
1586
0
    mbmi->num_proj_ref = warp_sample_info->num;
1587
0
  }
1588
0
  const int total_samples = mbmi->num_proj_ref;
1589
0
  if (total_samples == 0) {
1590
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1591
    // warped parameters.
1592
0
    last_motion_mode_allowed = OBMC_CAUSAL;
1593
0
  }
1594
1595
0
  const MB_MODE_INFO base_mbmi = *mbmi;
1596
0
  MB_MODE_INFO best_mbmi;
1597
0
  const int interp_filter = features->interp_filter;
1598
0
  const int switchable_rate =
1599
0
      av1_is_interp_needed(xd)
1600
0
          ? av1_get_switchable_rate(x, xd, interp_filter,
1601
0
                                    cm->seq_params->enable_dual_filter)
1602
0
          : 0;
1603
0
  int64_t best_rd = INT64_MAX;
1604
0
  int best_rate_mv = rate_mv0;
1605
0
  const int mi_row = xd->mi_row;
1606
0
  const int mi_col = xd->mi_col;
1607
0
  int mode_index_start, mode_index_end;
1608
0
  const int txfm_rd_gate_level =
1609
0
      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1610
0
                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1611
0
                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
1612
1613
  // Modify the start and end index according to speed features. For example,
1614
  // if SIMPLE_TRANSLATION has already been searched according to
1615
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1616
  // to avoid searching it again.
1617
0
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1618
0
                              last_motion_mode_allowed, interintra_allowed,
1619
0
                              eval_motion_mode);
1620
  // Main function loop. This loops over all of the possible motion modes and
1621
  // computes RD to determine the best one. This process includes computing
1622
  // any necessary side information for the motion mode and performing the
1623
  // transform search.
1624
0
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1625
0
       mode_index++) {
1626
0
    if (args->skip_motion_mode && mode_index) continue;
1627
0
    int tmp_rate2 = rate2_nocoeff;
1628
0
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1629
0
    int tmp_rate_mv = rate_mv0;
1630
1631
0
    *mbmi = base_mbmi;
1632
0
    if (is_interintra_mode) {
1633
      // Only use SIMPLE_TRANSLATION for interintra
1634
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1635
0
    } else {
1636
0
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1637
0
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1638
0
    }
1639
1640
0
    if (cpi->oxcf.algo_cfg.sharpness == 3 &&
1641
0
        (mbmi->motion_mode == OBMC_CAUSAL ||
1642
0
         mbmi->motion_mode == WARPED_CAUSAL))
1643
0
      continue;
1644
1645
    // Do not search OBMC if the probability of selecting it is below a
1646
    // predetermined threshold for this update_type and block size.
1647
0
    const FRAME_UPDATE_TYPE update_type =
1648
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1649
0
    int use_actual_frame_probs = 1;
1650
0
    int prune_obmc;
1651
#if CONFIG_FPMT_TEST
1652
    use_actual_frame_probs =
1653
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1654
    if (!use_actual_frame_probs) {
1655
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1656
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1657
    }
1658
#endif
1659
0
    if (use_actual_frame_probs) {
1660
0
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1661
0
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1662
0
    }
1663
0
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1664
0
        mbmi->motion_mode == OBMC_CAUSAL)
1665
0
      continue;
1666
1667
0
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1668
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1669
      // The prediction is calculated before motion_mode_rd() is called in
1670
      // handle_inter_mode()
1671
0
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1672
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1673
      // OBMC_CAUSAL not allowed for compound prediction
1674
0
      assert(!is_comp_pred);
1675
0
      if (have_newmv_in_inter_mode(this_mode)) {
1676
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1677
0
                                 &mbmi->mv[0], NULL);
1678
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1679
0
      }
1680
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1681
        // Build the predictor according to the current motion vector if it has
1682
        // not already been built
1683
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1684
0
                                      0, av1_num_planes(cm) - 1);
1685
0
      }
1686
      // Build the inter predictor by blending the predictor corresponding to
1687
      // this MV, and the neighboring blocks using the OBMC model
1688
0
      av1_build_obmc_inter_prediction(
1689
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1690
0
          args->left_pred_buf, args->left_pred_stride);
1691
0
#if !CONFIG_REALTIME_ONLY
1692
0
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1693
0
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1694
0
      mbmi->motion_mode = WARPED_CAUSAL;
1695
0
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1696
0
      mbmi->interp_filters =
1697
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1698
1699
0
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1700
0
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1701
      // Select the samples according to motion vector difference
1702
0
      if (mbmi->num_proj_ref > 1) {
1703
0
        mbmi->num_proj_ref = av1_selectSamples(
1704
0
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1705
0
      }
1706
1707
      // Compute the warped motion parameters with a least squares fit
1708
      //  using the collected samples
1709
0
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1710
0
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1711
0
                               &mbmi->wm_params, mi_row, mi_col)) {
1712
0
        assert(!is_comp_pred);
1713
0
        if (have_newmv_in_inter_mode(this_mode)) {
1714
          // Refine MV for NEWMV mode
1715
0
          const int_mv mv0 = mbmi->mv[0];
1716
0
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1717
0
          const int num_proj_ref0 = mbmi->num_proj_ref;
1718
1719
0
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1720
0
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1721
0
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1722
0
                                            &ref_mv.as_mv, NULL);
1723
1724
          // Refine MV in a small range.
1725
0
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1726
0
                               total_samples, cpi->sf.mv_sf.warp_search_method,
1727
0
                               cpi->sf.mv_sf.warp_search_iters);
1728
1729
0
          if (mv0.as_int != mbmi->mv[0].as_int) {
1730
            // Keep the refined MV and WM parameters.
1731
0
            tmp_rate_mv = av1_mv_bit_cost(
1732
0
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1733
0
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1734
0
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1735
0
          } else {
1736
            // Restore the old MV and WM parameters.
1737
0
            mbmi->mv[0] = mv0;
1738
0
            mbmi->wm_params = wm_params0;
1739
0
            mbmi->num_proj_ref = num_proj_ref0;
1740
0
          }
1741
0
        }
1742
1743
        // Build the warped predictor
1744
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1745
0
                                      av1_num_planes(cm) - 1);
1746
0
      } else {
1747
0
        continue;
1748
0
      }
1749
0
#endif  // !CONFIG_REALTIME_ONLY
1750
0
    } else if (is_interintra_mode) {
1751
0
      const int ret =
1752
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1753
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1754
0
      if (ret < 0) continue;
1755
0
    }
1756
1757
    // If we are searching newmv and the mv is the same as refmv, skip the
1758
    // current mode
1759
0
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1760
1761
    // Update rd_stats for the current motion mode
1762
0
    txfm_info->skip_txfm = 0;
1763
0
    rd_stats->dist = 0;
1764
0
    rd_stats->sse = 0;
1765
0
    rd_stats->skip_txfm = 1;
1766
0
    rd_stats->rate = tmp_rate2;
1767
0
    const ModeCosts *mode_costs = &x->mode_costs;
1768
0
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1769
0
    if (interintra_allowed) {
1770
0
      rd_stats->rate +=
1771
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1772
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1773
0
    }
1774
0
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1775
0
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1776
0
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1777
0
        rd_stats->rate +=
1778
0
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1779
0
      } else {
1780
0
        rd_stats->rate +=
1781
0
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1782
0
      }
1783
0
    }
1784
1785
0
    int64_t this_yrd = INT64_MAX;
1786
1787
0
    if (!do_tx_search) {
1788
      // Avoid doing a transform search here to speed up the overall mode
1789
      // search. It will be done later in the mode search if the current
1790
      // motion mode seems promising.
1791
0
      int64_t curr_sse = -1;
1792
0
      int64_t sse_y = -1;
1793
0
      int est_residue_cost = 0;
1794
0
      int64_t est_dist = 0;
1795
0
      int64_t est_rd = 0;
1796
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1797
0
        curr_sse = get_sse(cpi, x, &sse_y);
1798
0
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1799
0
                                                 &est_residue_cost, &est_dist);
1800
0
        (void)has_est_rd;
1801
0
        assert(has_est_rd);
1802
0
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1803
0
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1804
0
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1805
0
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1806
0
            NULL, &curr_sse, NULL, NULL, NULL);
1807
0
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1808
0
      }
1809
0
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1810
0
      if (est_rd * 0.80 > *best_est_rd) {
1811
0
        mbmi->ref_frame[1] = ref_frame_1;
1812
0
        continue;
1813
0
      }
1814
0
      const int mode_rate = rd_stats->rate;
1815
0
      rd_stats->rate += est_residue_cost;
1816
0
      rd_stats->dist = est_dist;
1817
0
      rd_stats->rdcost = est_rd;
1818
0
      if (rd_stats->rdcost < *best_est_rd) {
1819
0
        *best_est_rd = rd_stats->rdcost;
1820
0
        assert(sse_y >= 0);
1821
0
        ref_skip_rd[1] = txfm_rd_gate_level
1822
0
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1823
0
                             : INT64_MAX;
1824
0
      }
1825
0
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1826
0
        if (!is_comp_pred) {
1827
0
          assert(curr_sse >= 0);
1828
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1829
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1830
0
                                rd_stats_uv, mbmi);
1831
0
        }
1832
0
      } else {
1833
0
        assert(curr_sse >= 0);
1834
0
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1835
0
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1836
0
                              rd_stats_uv, mbmi);
1837
0
      }
1838
0
      mbmi->skip_txfm = 0;
1839
0
    } else {
1840
      // Perform full transform search
1841
0
      int64_t skip_rd = INT64_MAX;
1842
0
      int64_t skip_rdy = INT64_MAX;
1843
0
      if (txfm_rd_gate_level) {
1844
        // Check if the mode is good enough based on skip RD
1845
0
        int64_t sse_y = INT64_MAX;
1846
0
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1847
0
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1848
0
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1849
0
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1850
0
                                        txfm_rd_gate_level, 0);
1851
0
        if (!eval_txfm) continue;
1852
0
      }
1853
1854
      // Do transform search
1855
0
      const int mode_rate = rd_stats->rate;
1856
0
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1857
0
                           rd_stats->rate, ref_best_rd)) {
1858
0
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1859
0
          return INT64_MAX;
1860
0
        }
1861
0
        continue;
1862
0
      }
1863
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1864
0
      const int y_rate =
1865
0
          rd_stats->skip_txfm
1866
0
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1867
0
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1868
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1869
1870
0
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1871
0
      if (curr_rd < ref_best_rd) {
1872
0
        ref_best_rd = curr_rd;
1873
0
        ref_skip_rd[0] = skip_rd;
1874
0
        ref_skip_rd[1] = skip_rdy;
1875
0
      }
1876
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1877
0
        inter_mode_data_push(
1878
0
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1879
0
            rd_stats_y->rate + rd_stats_uv->rate +
1880
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1881
0
      }
1882
0
    }
1883
1884
0
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1885
0
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1886
0
        mbmi->interp_filters =
1887
0
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1888
0
      }
1889
0
    }
1890
1891
0
    if (this_yrd < INT64_MAX) {
1892
0
      adjust_cost(cpi, x, &this_yrd, /*is_inter_pred=*/true);
1893
0
    }
1894
0
    adjust_rdcost(cpi, x, rd_stats, /*is_inter_pred=*/true);
1895
    // Bug 494653438: If do_tx_search is 0, rd_stats_y is uninitialized, so
1896
    // valgrind will warn if we use rd_stats_y->rdcost in a conditional.
1897
0
    if (!do_tx_search || rd_stats_y->rdcost < INT64_MAX) {
1898
0
      adjust_rdcost(cpi, x, rd_stats_y, /*is_inter_pred=*/true);
1899
0
    }
1900
1901
0
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1902
0
    if (mode_index == 0) {
1903
0
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1904
0
    }
1905
0
    int64_t best_scaled_rd = best_rd;
1906
0
    int64_t this_scaled_rd = tmp_rd;
1907
0
    if (mode_index != 0)
1908
0
      increase_motion_mode_rd(&best_mbmi, mbmi, &best_scaled_rd,
1909
0
                              &this_scaled_rd,
1910
0
                              cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
1911
0
                              cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
1912
1913
0
    if (mode_index == 0 || this_scaled_rd < best_scaled_rd) {
1914
      // Update best_rd data if this is the best motion mode so far
1915
0
      best_mbmi = *mbmi;
1916
0
      best_rd = tmp_rd;
1917
0
      best_rd_stats = *rd_stats;
1918
0
      best_rd_stats_y = *rd_stats_y;
1919
0
      best_rate_mv = tmp_rate_mv;
1920
0
      *yrd = this_yrd;
1921
0
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1922
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1923
0
      best_xskip_txfm = mbmi->skip_txfm;
1924
0
    }
1925
0
  }
1926
  // Update RD and mbmi stats for selected motion mode
1927
0
  mbmi->ref_frame[1] = ref_frame_1;
1928
0
  *rate_mv = best_rate_mv;
1929
0
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1930
0
    av1_invalid_rd_stats(rd_stats);
1931
0
    restore_dst_buf(xd, *orig_dst, num_planes);
1932
0
    return INT64_MAX;
1933
0
  }
1934
0
  *mbmi = best_mbmi;
1935
0
  *rd_stats = best_rd_stats;
1936
0
  *rd_stats_y = best_rd_stats_y;
1937
0
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1938
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1939
0
  txfm_info->skip_txfm = best_xskip_txfm;
1940
1941
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1942
0
  return 0;
1943
0
}
1944
1945
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1946
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1947
0
                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
1948
0
  assert(bsize < BLOCK_SIZES_ALL);
1949
0
  const AV1_COMMON *cm = &cpi->common;
1950
0
  const int num_planes = av1_num_planes(cm);
1951
0
  MACROBLOCKD *const xd = &x->e_mbd;
1952
0
  const int mi_row = xd->mi_row;
1953
0
  const int mi_col = xd->mi_col;
1954
0
  int64_t total_sse = 0;
1955
0
  int64_t this_rd = INT64_MAX;
1956
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1957
0
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1958
1959
0
  for (int plane = 0; plane < num_planes; ++plane) {
1960
    // Call av1_enc_build_inter_predictor() for one plane at a time.
1961
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1962
0
                                  plane, plane);
1963
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1964
0
    const BLOCK_SIZE plane_bsize =
1965
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1966
1967
0
    av1_subtract_plane(x, plane_bsize, plane);
1968
1969
0
    int64_t sse =
1970
0
        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1971
0
    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1972
0
    sse <<= 4;
1973
0
    total_sse += sse;
1974
    // When current rd cost is more than the best rd, skip evaluation of
1975
    // remaining planes.
1976
0
    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1977
0
    if (this_rd > best_rd) break;
1978
0
  }
1979
1980
0
  rd_stats->dist = rd_stats->sse = total_sse;
1981
0
  rd_stats->rdcost = this_rd;
1982
1983
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1984
0
  return 0;
1985
0
}
1986
1987
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1988
// mode
1989
// Note(rachelbarker): This speed feature currently does not interact correctly
1990
// with global motion. The issue is that, when global motion is used, GLOBALMV
1991
// produces a different prediction to NEARESTMV/NEARMV even if the motion
1992
// vectors are the same. Thus GLOBALMV should not be pruned in this case.
1993
static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1994
                                      int ref_idx,
1995
                                      const MV_REFERENCE_FRAME *ref_frame,
1996
0
                                      PREDICTION_MODE single_mode) {
1997
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1998
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1999
0
  assert(single_mode != NEWMV);
2000
0
  if (single_mode == NEARESTMV) {
2001
0
    return 0;
2002
0
  } else if (single_mode == NEARMV) {
2003
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
2004
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
2005
0
    if (ref_mv_count < 2) return 1;
2006
0
  } else if (single_mode == GLOBALMV) {
2007
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
2008
0
    if (ref_mv_count == 0) return 1;
2009
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
2010
0
    else if (ref_mv_count == 1)
2011
0
      return 0;
2012
2013
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
2014
    // Check GLOBALMV is matching with any mv in ref_mv_stack
2015
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
2016
0
      int_mv this_mv;
2017
2018
0
      if (ref_idx == 0)
2019
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
2020
0
      else
2021
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
2022
2023
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
2024
0
        return 1;
2025
0
    }
2026
0
  }
2027
0
  return 0;
2028
0
}
2029
2030
static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
2031
                              int ref_idx, int ref_mv_idx,
2032
                              int skip_repeated_ref_mv,
2033
                              const MV_REFERENCE_FRAME *ref_frame,
2034
0
                              const MB_MODE_INFO_EXT *mbmi_ext) {
2035
0
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2036
0
  assert(is_inter_singleref_mode(single_mode));
2037
0
  if (single_mode == NEWMV) {
2038
0
    this_mv->as_int = INVALID_MV;
2039
0
  } else if (single_mode == GLOBALMV) {
2040
0
    if (skip_repeated_ref_mv &&
2041
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2042
0
      return 0;
2043
0
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2044
0
  } else {
2045
0
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
2046
0
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2047
0
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
2048
0
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
2049
0
      assert(ref_mv_offset >= 0);
2050
0
      if (ref_idx == 0) {
2051
0
        *this_mv =
2052
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
2053
0
      } else {
2054
0
        *this_mv =
2055
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
2056
0
      }
2057
0
    } else {
2058
0
      if (skip_repeated_ref_mv &&
2059
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2060
0
        return 0;
2061
0
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2062
0
    }
2063
0
  }
2064
0
  return 1;
2065
0
}
2066
2067
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
2068
// population
2069
static inline int skip_nearest_near_mv_using_refmv_weight(
2070
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
2071
0
    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
2072
0
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
2073
  // Do not skip the mode if the current block has not yet obtained a valid
2074
  // inter mode.
2075
0
  if (!is_inter_mode(best_mode)) return 0;
2076
2077
0
  const MACROBLOCKD *xd = &x->e_mbd;
2078
  // Do not skip the mode if both the top and left neighboring blocks are not
2079
  // available.
2080
0
  if (!xd->left_available || !xd->up_available) return 0;
2081
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2082
0
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
2083
0
  const int ref_mv_count =
2084
0
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
2085
2086
0
  if (ref_mv_count == 0) return 0;
2087
  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
2088
0
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
2089
2090
  // Count number of ref mvs populated from nearest candidates
2091
0
  int nearest_refmv_count = 0;
2092
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
2093
0
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
2094
0
  }
2095
2096
  // nearest_refmv_count indicates the closeness of block motion characteristics
2097
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
2098
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
2099
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
2100
  // mode since these modes work well for blocks that shares similar motion
2101
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
2102
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
2103
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
2104
0
  const int prune_thresh = 1 + (ref_mv_count >= 2);
2105
0
  if (nearest_refmv_count < prune_thresh) return 1;
2106
0
  return 0;
2107
0
}
2108
2109
// This function update the non-new mv for the current prediction mode
2110
static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
2111
                               const AV1_COMMON *cm, const MACROBLOCK *x,
2112
0
                               int skip_repeated_ref_mv) {
2113
0
  const MACROBLOCKD *xd = &x->e_mbd;
2114
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2115
0
  const int is_comp_pred = has_second_ref(mbmi);
2116
2117
0
  int ret = 1;
2118
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2119
0
    int_mv this_mv;
2120
0
    this_mv.as_int = INVALID_MV;
2121
0
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
2122
0
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
2123
0
    if (!ret) return 0;
2124
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
2125
0
    if (single_mode == NEWMV) {
2126
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2127
0
      cur_mv[i] =
2128
0
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2129
0
                         .this_mv
2130
0
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2131
0
                         .comp_mv;
2132
0
    } else {
2133
0
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
2134
0
    }
2135
0
  }
2136
0
  return ret;
2137
0
}
2138
2139
static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
2140
                               const MB_MODE_INFO_EXT *mbmi_ext,
2141
                               const int (*const drl_mode_cost0)[2],
2142
0
                               int8_t ref_frame_type) {
2143
0
  int cost = 0;
2144
0
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
2145
0
    for (int idx = 0; idx < 2; ++idx) {
2146
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2147
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2148
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
2149
0
        if (mbmi->ref_mv_idx == idx) return cost;
2150
0
      }
2151
0
    }
2152
0
    return cost;
2153
0
  }
2154
2155
0
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
2156
0
    for (int idx = 1; idx < 3; ++idx) {
2157
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2158
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2159
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
2160
0
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
2161
0
      }
2162
0
    }
2163
0
    return cost;
2164
0
  }
2165
0
  return cost;
2166
0
}
2167
2168
static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
2169
                                        const MB_MODE_INFO *const mbmi,
2170
0
                                        PREDICTION_MODE this_mode) {
2171
0
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
2172
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2173
0
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
2174
0
    if (single_mode == NEWMV &&
2175
0
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
2176
0
      return 0;
2177
0
    }
2178
0
  }
2179
0
  return 1;
2180
0
}
2181
2182
static int get_drl_refmv_count(const MACROBLOCK *const x,
2183
                               const MV_REFERENCE_FRAME *ref_frame,
2184
0
                               PREDICTION_MODE mode) {
2185
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2186
0
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2187
0
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
2188
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
2189
0
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
2190
0
  const int has_drl =
2191
0
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
2192
0
  const int ref_set =
2193
0
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
2194
2195
0
  return ref_set;
2196
0
}
2197
2198
// Checks if particular ref_mv_idx should be pruned.
2199
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
2200
                                         const int qindex,
2201
0
                                         const int ref_mv_idx) {
2202
0
  if (reduce_inter_modes >= 3) return 1;
2203
  // Q-index logic based pruning is enabled only for
2204
  // reduce_inter_modes = 2.
2205
0
  assert(reduce_inter_modes == 2);
2206
  // When reduce_inter_modes=2, pruning happens as below based on q index.
2207
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
2208
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
2209
  // For q index range between 171 and 255: no pruning.
2210
0
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
2211
0
  return (ref_mv_idx >= min_prune_ref_mv_idx);
2212
0
}
2213
2214
// Whether this reference motion vector can be skipped, based on initial
2215
// heuristics.
2216
static bool ref_mv_idx_early_breakout(
2217
    const SPEED_FEATURES *const sf,
2218
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
2219
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
2220
0
    int ref_mv_idx) {
2221
0
  MACROBLOCKD *xd = &x->e_mbd;
2222
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2223
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2224
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2225
0
  const int is_comp_pred = has_second_ref(mbmi);
2226
0
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
2227
0
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
2228
0
        mbmi->ref_frame[0] == LAST3_FRAME ||
2229
0
        mbmi->ref_frame[1] == LAST2_FRAME ||
2230
0
        mbmi->ref_frame[1] == LAST3_FRAME) {
2231
0
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2232
0
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2233
0
          REF_CAT_LEVEL) {
2234
0
        return true;
2235
0
      }
2236
0
    }
2237
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
2238
0
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
2239
0
        have_newmv_in_inter_mode(mbmi->mode)) {
2240
0
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
2241
0
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
2242
0
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2243
0
        const int do_prune = prune_ref_mv_idx_using_qindex(
2244
0
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
2245
0
        if (do_prune &&
2246
0
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2247
0
             REF_CAT_LEVEL)) {
2248
0
          return true;
2249
0
        }
2250
0
      }
2251
0
    }
2252
0
  }
2253
2254
0
  mbmi->ref_mv_idx = ref_mv_idx;
2255
0
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
2256
0
    return true;
2257
0
  }
2258
0
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
2259
0
  const int drl_cost = get_drl_cost(
2260
0
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
2261
0
  est_rd_rate += drl_cost;
2262
0
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
2263
0
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2264
0
    return true;
2265
0
  }
2266
0
  return false;
2267
0
}
2268
2269
// Compute the estimated RD cost for the motion vector with simple translation.
2270
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
2271
                                          HandleInterModeArgs *args,
2272
                                          int ref_mv_idx, int64_t ref_best_rd,
2273
0
                                          BLOCK_SIZE bsize) {
2274
0
  MACROBLOCKD *xd = &x->e_mbd;
2275
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2276
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2277
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2278
0
  const AV1_COMMON *cm = &cpi->common;
2279
0
  const int is_comp_pred = has_second_ref(mbmi);
2280
0
  const ModeCosts *mode_costs = &x->mode_costs;
2281
2282
0
  struct macroblockd_plane *p = xd->plane;
2283
0
  const BUFFER_SET orig_dst = {
2284
0
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2285
0
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2286
0
  };
2287
0
  RD_STATS rd_stats;
2288
0
  av1_init_rd_stats(&rd_stats);
2289
2290
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2291
0
  mbmi->comp_group_idx = 0;
2292
0
  mbmi->compound_idx = 1;
2293
0
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
2294
0
    mbmi->ref_frame[1] = NONE_FRAME;
2295
0
  }
2296
0
  int16_t mode_ctx =
2297
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2298
2299
0
  mbmi->num_proj_ref = 0;
2300
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2301
0
  mbmi->ref_mv_idx = ref_mv_idx;
2302
2303
0
  rd_stats.rate += args->ref_frame_cost + args->single_comp_cost;
2304
0
  const int drl_cost =
2305
0
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2306
0
  rd_stats.rate += drl_cost;
2307
2308
0
  int_mv cur_mv[2];
2309
0
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2310
0
    return INT64_MAX;
2311
0
  }
2312
0
  assert(have_nearmv_in_inter_mode(mbmi->mode));
2313
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2314
0
    mbmi->mv[i].as_int = cur_mv[i].as_int;
2315
0
  }
2316
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2317
0
  rd_stats.rate += ref_mv_cost;
2318
2319
0
  if (RDCOST(x->rdmult, rd_stats.rate, 0) > ref_best_rd) {
2320
0
    return INT64_MAX;
2321
0
  }
2322
2323
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2324
0
  mbmi->num_proj_ref = 0;
2325
0
  if (is_comp_pred) {
2326
    // Only compound_average
2327
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2328
0
    mbmi->comp_group_idx = 0;
2329
0
    mbmi->compound_idx = 1;
2330
0
  }
2331
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2332
2333
0
  const int mi_row = xd->mi_row;
2334
0
  const int mi_col = xd->mi_col;
2335
0
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2336
0
                                AOM_PLANE_Y, AOM_PLANE_Y);
2337
0
  int est_rate;
2338
0
  int64_t est_dist;
2339
0
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2340
0
                                  NULL, NULL, NULL, NULL, NULL);
2341
0
  return RDCOST(x->rdmult, rd_stats.rate + est_rate, est_dist);
2342
0
}
2343
2344
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2345
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2346
// it is included.
2347
0
static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2348
2349
0
static inline bool mask_check_bit(int mask, int index) {
2350
0
  return (mask >> index) & 0x1;
2351
0
}
2352
2353
// Before performing the full MV search in handle_inter_mode, do a simple
2354
// translation search and see if we can eliminate any motion vectors.
2355
// Returns an integer where, if the i-th bit is set, it means that the i-th
2356
// motion vector should be searched. This is only set for NEAR_MV.
2357
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2358
                                HandleInterModeArgs *const args,
2359
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2360
0
                                const int ref_set) {
2361
  // If the number of ref mv count is equal to 1, do not prune the same. It
2362
  // is better to evaluate the same than to prune it.
2363
0
  if (ref_set == 1) return 1;
2364
0
  AV1_COMMON *const cm = &cpi->common;
2365
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2366
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2367
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2368
2369
  // Only search indices if they have some chance of being good.
2370
0
  int good_indices = 0;
2371
0
  for (int i = 0; i < ref_set; ++i) {
2372
0
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2373
0
                                  ref_best_rd, i)) {
2374
0
      continue;
2375
0
    }
2376
0
    mask_set_bit(&good_indices, i);
2377
0
  }
2378
2379
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2380
  // is large enough. If these conditions are not met, return all good indices
2381
  // found so far.
2382
0
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2383
0
    return good_indices;
2384
0
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2385
0
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2386
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2387
  // so b/2384 can be resolved.
2388
0
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2389
0
      (mbmi->ref_frame[1] > 0 &&
2390
0
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2391
0
    return good_indices;
2392
0
  }
2393
2394
  // Calculate the RD cost for the motion vectors using simple translation.
2395
0
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2396
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2397
    // If this index is bad, ignore it.
2398
0
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2399
0
      continue;
2400
0
    }
2401
0
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2402
0
        cpi, x, args, ref_mv_idx, ref_best_rd, bsize);
2403
0
  }
2404
  // Find the index with the best RD cost.
2405
0
  int best_idx = 0;
2406
0
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2407
0
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2408
0
      best_idx = i;
2409
0
    }
2410
0
  }
2411
  // Only include indices that are good and within a % of the best.
2412
0
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2413
  // If the simple translation cost is not within this multiple of the
2414
  // best RD, skip it. Note that the cutoff is derived experimentally.
2415
0
  const double ref_dth = 5;
2416
0
  int result = 0;
2417
0
  for (int i = 0; i < ref_set; ++i) {
2418
0
    if (mask_check_bit(good_indices, i) &&
2419
0
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2420
0
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2421
0
      mask_set_bit(&result, i);
2422
0
    }
2423
0
  }
2424
0
  return result;
2425
0
}
2426
2427
/*!\brief Motion mode information for inter mode search speedup.
2428
 *
2429
 * Used in a speed feature to search motion modes other than
2430
 * SIMPLE_TRANSLATION only on winning candidates.
2431
 */
2432
typedef struct motion_mode_candidate {
2433
  /*!
2434
   * Mode info for the motion mode candidate.
2435
   */
2436
  MB_MODE_INFO mbmi;
2437
  /*!
2438
   * Rate describing the cost of the motion vectors for this candidate.
2439
   */
2440
  int rate_mv;
2441
  /*!
2442
   * Rate before motion mode search and transform coding is applied.
2443
   */
2444
  int rate2_nocoeff;
2445
  /*!
2446
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2447
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2448
   * candidate.
2449
   */
2450
  int skip_motion_mode;
2451
  /*!
2452
   * Total RD cost for this candidate.
2453
   */
2454
  int64_t rd_cost;
2455
} motion_mode_candidate;
2456
2457
/*!\cond */
2458
typedef struct motion_mode_best_st_candidate {
2459
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2460
  int num_motion_mode_cand;
2461
} motion_mode_best_st_candidate;
2462
2463
// Checks if the current reference frame matches with neighbouring block's
2464
// (top/left) reference frames
2465
static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2466
0
                                               MB_MODE_INFO *nb_mbmi) {
2467
0
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2468
0
                                          nb_mbmi->ref_frame[1] };
2469
0
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2470
0
                                           cur_mbmi->ref_frame[1] };
2471
0
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2472
0
  int match_found = 0;
2473
2474
0
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2475
0
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2476
0
        (cur_ref_frames[i] == nb_ref_frames[1]))
2477
0
      match_found = 1;
2478
0
  }
2479
0
  return match_found;
2480
0
}
2481
2482
static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2483
0
                                              MACROBLOCKD *xd) {
2484
0
  if (!xd->up_available) return 1;
2485
0
  const int mi_col = xd->mi_col;
2486
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2487
  // prev_row_mi points into the mi array, starting at the beginning of the
2488
  // previous row.
2489
0
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2490
0
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2491
0
  uint8_t mi_step;
2492
0
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2493
0
       above_mi_col += mi_step) {
2494
0
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2495
0
    mi_step = mi_size_wide[above_mi[0]->bsize];
2496
0
    int match_found = 0;
2497
0
    if (is_inter_block(*above_mi))
2498
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2499
0
    if (match_found) return 1;
2500
0
  }
2501
0
  return 0;
2502
0
}
2503
2504
static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2505
0
                                             MACROBLOCKD *xd) {
2506
0
  if (!xd->left_available) return 1;
2507
0
  const int mi_row = xd->mi_row;
2508
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2509
  // prev_col_mi points into the mi array, starting at the top of the
2510
  // previous column
2511
0
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2512
0
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2513
0
  uint8_t mi_step;
2514
0
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2515
0
       left_mi_row += mi_step) {
2516
0
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2517
0
    mi_step = mi_size_high[left_mi[0]->bsize];
2518
0
    int match_found = 0;
2519
0
    if (is_inter_block(*left_mi))
2520
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2521
0
    if (match_found) return 1;
2522
0
  }
2523
0
  return 0;
2524
0
}
2525
/*!\endcond */
2526
2527
/*! \brief Struct used to hold TPL data to
2528
 * narrow down parts of the inter mode search.
2529
 */
2530
typedef struct {
2531
  /*!
2532
   * The best inter cost out of all of the reference frames.
2533
   */
2534
  int64_t best_inter_cost;
2535
  /*!
2536
   * The inter cost for each reference frame.
2537
   */
2538
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2539
} PruneInfoFromTpl;
2540
2541
#if !CONFIG_REALTIME_ONLY
2542
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2543
static inline void get_block_level_tpl_stats(
2544
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2545
0
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2546
0
  AV1_COMMON *const cm = &cpi->common;
2547
2548
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2549
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2550
0
  const int tpl_idx = cpi->gf_frame_index;
2551
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2552
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2553
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2554
0
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2555
0
  const int mi_wide = mi_size_wide[bsize];
2556
0
  const int mi_high = mi_size_high[bsize];
2557
0
  const int tpl_stride = tpl_frame->stride;
2558
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2559
0
  const int mi_col_sr =
2560
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2561
0
  const int mi_col_end_sr =
2562
0
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2563
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2564
2565
0
  const int row_step = step;
2566
0
  const int col_step_sr =
2567
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2568
0
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2569
0
       row += row_step) {
2570
0
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2571
0
         col += col_step_sr) {
2572
0
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2573
0
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2574
2575
      // Sums up the inter cost of corresponding ref frames
2576
0
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2577
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2578
0
            this_stats->pred_error[ref_idx];
2579
0
      }
2580
0
    }
2581
0
  }
2582
2583
  // Computes the best inter cost (minimum inter_cost)
2584
0
  int64_t best_inter_cost = INT64_MAX;
2585
0
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2586
0
    const int64_t cur_inter_cost =
2587
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2588
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2589
    // calculating the minimum inter_cost
2590
0
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2591
0
        valid_refs[ref_idx])
2592
0
      best_inter_cost = cur_inter_cost;
2593
0
  }
2594
0
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2595
0
}
2596
#endif
2597
2598
static inline int prune_modes_based_on_tpl_stats(
2599
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2600
0
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2601
0
  const int is_ref_last2 = refs[0] == LAST2_FRAME || refs[1] == LAST2_FRAME;
2602
0
  if (prune_mode_level == 1 && !is_ref_last2) return 0;
2603
2604
0
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2605
0
  if ((prune_mode_level == 2) && have_newmv) return 0;
2606
2607
0
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2608
0
  if (best_inter_cost == INT64_MAX) return 0;
2609
2610
0
  int64_t cur_inter_cost;
2611
2612
0
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2613
0
  if (!is_comp_pred) {
2614
0
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2615
0
  } else {
2616
0
    const int64_t inter_cost_ref0 =
2617
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2618
0
    const int64_t inter_cost_ref1 =
2619
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2620
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2621
    // more aggressive pruning
2622
0
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2623
0
  }
2624
2625
0
  if (is_ref_last2) return (cur_inter_cost > best_inter_cost);
2626
2627
0
  const int is_globalmv =
2628
0
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2629
0
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2630
0
  const int prune_level = prune_mode_level - 2;
2631
2632
  // Thresholds used for pruning:
2633
  // Lower value indicates aggressive pruning and higher value indicates
2634
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2635
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2.
2636
  // prune_index 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2637
0
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2638
0
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2639
0
  };
2640
2641
  // Prune the mode if cur_inter_cost is greater than threshold times
2642
  // best_inter_cost
2643
0
  if (cur_inter_cost >
2644
0
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2645
0
        best_inter_cost) >>
2646
0
       2))
2647
0
    return 1;
2648
0
  return 0;
2649
0
}
2650
2651
/*!\brief High level function to select parameters for compound mode.
2652
 *
2653
 * \ingroup inter_mode_search
2654
 * The main search functionality is done in the call to av1_compound_type_rd().
2655
 *
2656
 * \param[in]     cpi               Top-level encoder structure.
2657
 * \param[in]     x                 Pointer to struct holding all the data for
2658
 *                                  the current macroblock.
2659
 * \param[in]     args              HandleInterModeArgs struct holding
2660
 *                                  miscellaneous arguments for inter mode
2661
 *                                  search. See the documentation for this
2662
 *                                  struct for a description of each member.
2663
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2664
 *                                  It is used for early termination of this
2665
 *                                  search if the RD exceeds this value.
2666
 * \param[in,out] cur_mv            Current motion vector.
2667
 * \param[in]     bsize             Current block size.
2668
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2669
                                    compound mode.
2670
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2671
 *                                  allocated buffers for the compound
2672
 *                                  predictors and masks in the compound type
2673
 *                                  search.
2674
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2675
 *                                  prediction. This will eventually hold the
2676
 *                                  final prediction, and the tmp_dst info will
2677
 *                                  be copied here.
2678
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2679
 *                                  computed prediction.
2680
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2681
 *                                  This will be modified if a motion search is
2682
 *                                  done in the motion mode search.
2683
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2684
 *                                  information.
2685
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2686
 *                                  best total RD for a skip mode so far, and
2687
 *                                  skip_rd[1] is the best RD for a skip mode so
2688
 *                                  far in luma. This is used as a speed feature
2689
 *                                  to skip the transform search if the computed
2690
 *                                  skip RD for the current mode is not better
2691
 *                                  than the best skip_rd so far.
2692
 * \param[out] skip_build_pred      Indicates whether or not to build the inter
2693
 *                                  predictor during/after interpolation
2694
 *                                  filter search.
2695
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2696
 * a viable candidate.
2697
 */
2698
static int process_compound_inter_mode(
2699
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2700
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2701
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2702
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2703
0
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2704
0
  MACROBLOCKD *xd = &x->e_mbd;
2705
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2706
0
  const AV1_COMMON *cm = &cpi->common;
2707
0
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2708
0
                                   cm->seq_params->enable_masked_compound;
2709
0
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2710
0
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2711
2712
0
  const int num_planes = av1_num_planes(cm);
2713
0
  const int mi_row = xd->mi_row;
2714
0
  const int mi_col = xd->mi_col;
2715
0
  int is_luma_interp_done = 0;
2716
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2717
2718
0
  int64_t best_rd_compound;
2719
0
  int64_t rd_thresh;
2720
0
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2721
0
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2722
0
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2723
0
                                         comp_type_rd_scale);
2724
  // Select compound type and any parameters related to that type
2725
  // (for example, the mask parameters if it is a masked mode) and compute
2726
  // the RD
2727
0
  *compmode_interinter_cost = av1_compound_type_rd(
2728
0
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2729
0
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2730
0
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2731
0
  if (ref_best_rd < INT64_MAX &&
2732
0
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2733
0
          ref_best_rd) {
2734
0
    restore_dst_buf(xd, *orig_dst, num_planes);
2735
0
    return 1;
2736
0
  }
2737
2738
  // Build only uv predictor for COMPOUND_AVERAGE.
2739
  // Note there is no need to call av1_enc_build_inter_predictor
2740
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2741
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2742
  // rather than the tmp_buf.
2743
0
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2744
0
    if (num_planes > 1) {
2745
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2746
0
                                    AOM_PLANE_U, num_planes - 1);
2747
0
    }
2748
0
    *skip_build_pred = INTERP_SKIP_LUMA_SKIP_CHROMA;
2749
0
  }
2750
0
  return 0;
2751
0
}
2752
2753
// Speed feature to prune out MVs that are similar to previous MVs if they
2754
// don't achieve the best RD advantage.
2755
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2756
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2757
0
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2758
0
  int i;
2759
0
  const int is_comp_pred = has_second_ref(mbmi);
2760
0
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2761
2762
  // Skip the evaluation if an MV match is found.
2763
0
  if (ref_mv_idx > 0) {
2764
0
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2765
0
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2766
2767
0
      int mv_diff = 0;
2768
0
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2769
0
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2770
0
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2771
0
      }
2772
2773
      // If this mode is not the best one, and current MV is similar to
2774
      // previous stored MV, terminate this ref_mv_idx evaluation.
2775
0
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2776
0
    }
2777
0
  }
2778
2779
0
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2780
0
    for (i = 0; i < is_comp_pred + 1; ++i)
2781
0
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2782
0
  }
2783
2784
0
  return 0;
2785
0
}
2786
2787
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2788
 *
2789
 * \ingroup inter_mode_search
2790
 *
2791
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2792
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2793
 * Else returns 0.
2794
 *
2795
 * Note that the sse of here comes from single_motion_search. So it is
2796
 * interpolated with the filter in motion search, not the actual interpolation
2797
 * filter used in encoding.
2798
 *
2799
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2800
 * \param[in]     x                 Pointer to struct holding all the data for
2801
 *                                  the current macroblock.
2802
 * \param[in]     bsize             The current block_size.
2803
 * \param[in]     args              The args to handle_inter_mode, used to track
2804
 *                                  the best SSE.
2805
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2806
 *                                       prune_zero_mv_with_sse value
2807
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2808
 */
2809
static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2810
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
2811
                                         const HandleInterModeArgs *args,
2812
0
                                         int prune_zero_mv_with_sse) {
2813
0
  const MACROBLOCKD *xd = &x->e_mbd;
2814
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2815
2816
0
  const int is_comp_pred = has_second_ref(mbmi);
2817
0
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2818
2819
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2820
0
    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2821
      // Pruning logic only works for IDENTITY type models
2822
      // Note: In theory we could apply similar logic for TRANSLATION
2823
      // type models, but we do not code these due to a spec bug
2824
      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2825
0
      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2826
0
      return 0;
2827
0
    }
2828
2829
    // Don't prune if we have invalid data
2830
0
    assert(mbmi->mv[idx].as_int == 0);
2831
0
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2832
0
      return 0;
2833
0
    }
2834
0
  }
2835
2836
  // Sum up the sse of ZEROMV and best NEWMV
2837
0
  unsigned int this_sse_sum = 0;
2838
0
  unsigned int best_sse_sum = 0;
2839
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2840
0
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2841
0
    const struct macroblockd_plane *pd = xd->plane;
2842
0
    const struct buf_2d *src_buf = &p->src;
2843
0
    const struct buf_2d *ref_buf = &pd->pre[idx];
2844
0
    const uint8_t *src = src_buf->buf;
2845
0
    const uint8_t *ref = ref_buf->buf;
2846
0
    const int src_stride = src_buf->stride;
2847
0
    const int ref_stride = ref_buf->stride;
2848
2849
0
    unsigned int this_sse;
2850
0
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2851
0
    this_sse_sum += this_sse;
2852
2853
0
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2854
0
    best_sse_sum += best_sse;
2855
0
  }
2856
2857
0
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2858
0
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2859
0
    return 1;
2860
0
  }
2861
2862
0
  return 0;
2863
0
}
2864
2865
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2866
 *
2867
 * \ingroup inter_mode_search
2868
 *
2869
 * Does a simple interpolation filter search during winner mode evaluation. This
2870
 * is currently only used by realtime mode as \ref
2871
 * av1_interpolation_filter_search is not called during realtime encoding.
2872
 *
2873
 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2874
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2875
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2876
 *  *
2877
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2878
 *                                  flags.
2879
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2880
 *                                  used to access the buffers.
2881
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2882
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2883
 * \param[in]     bsize             The current block_size.
2884
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2885
 */
2886
static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2887
                                      int mi_row, int mi_col,
2888
0
                                      BLOCK_SIZE bsize) {
2889
0
  static const InterpFilters filters_ref_set[3] = {
2890
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2891
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2892
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2893
0
  };
2894
2895
0
  const AV1_COMMON *const cm = &cpi->common;
2896
0
  MACROBLOCKD *const xd = &x->e_mbd;
2897
0
  MB_MODE_INFO *const mi = xd->mi[0];
2898
0
  int64_t best_cost = INT64_MAX;
2899
0
  int best_filter_index = -1;
2900
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2901
0
  const int num_planes = av1_num_planes(cm);
2902
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2903
0
  assert(is_inter_mode(mi->mode));
2904
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2905
0
  assert(!is_inter_compound_mode(mi->mode));
2906
2907
0
  if (!av1_is_interp_needed(xd)) {
2908
0
    return false;
2909
0
  }
2910
2911
0
  struct macroblockd_plane *pd = xd->plane;
2912
0
  const BUFFER_SET orig_dst = {
2913
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2914
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2915
0
  };
2916
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2917
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2918
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2919
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2920
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2921
2922
0
  for (int i = 0; i < 3; ++i) {
2923
0
    if (is_240p_or_lesser) {
2924
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2925
0
        continue;
2926
0
      }
2927
0
    } else {
2928
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2929
0
        continue;
2930
0
      }
2931
0
    }
2932
0
    int64_t cost;
2933
0
    RD_STATS tmp_rd = { 0 };
2934
2935
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2936
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2937
2938
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2939
0
                       ? MODELRD_LEGACY
2940
0
                       : MODELRD_TYPE_INTERP_FILTER](
2941
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2942
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2943
2944
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2945
0
                                           cm->seq_params->enable_dual_filter);
2946
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2947
0
    if (cost < best_cost) {
2948
0
      best_filter_index = i;
2949
0
      best_cost = cost;
2950
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2951
0
    }
2952
0
  }
2953
0
  assert(best_filter_index >= 0);
2954
2955
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2956
2957
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2958
2959
0
  if (is_best_pred_in_orig) {
2960
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2961
0
  } else {
2962
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2963
    // is_best_pred_in_orig is false, that means the current buffer is the
2964
    // original one.
2965
0
    assert(&orig_dst == dst_bufs[0]);
2966
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2967
0
    const int width = block_size_wide[bsize];
2968
0
    const int height = block_size_high[bsize];
2969
0
#if CONFIG_AV1_HIGHBITDEPTH
2970
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2971
0
    if (is_hbd) {
2972
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2973
0
                               tmp_dst.stride[AOM_PLANE_Y],
2974
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2975
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2976
0
    } else {
2977
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2978
0
                        orig_dst.plane[AOM_PLANE_Y],
2979
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2980
0
    }
2981
#else
2982
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2983
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2984
                      width, height);
2985
#endif
2986
0
  }
2987
2988
  // Build the YUV predictor.
2989
0
  if (num_planes > 1) {
2990
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2991
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2992
0
  }
2993
2994
0
  return true;
2995
0
}
2996
2997
/*!\brief AV1 inter mode RD computation
2998
 *
2999
 * \ingroup inter_mode_search
3000
 * Do the RD search for a given inter mode and compute all information relevant
3001
 * to the input mode. It will compute the best MV,
3002
 * compound parameters (if the mode is a compound mode) and interpolation filter
3003
 * parameters.
3004
 *
3005
 * \param[in]     cpi               Top-level encoder structure.
3006
 * \param[in]     tile_data         Pointer to struct holding adaptive
3007
 *                                  data/contexts/models for the tile during
3008
 *                                  encoding.
3009
 * \param[in]     x                 Pointer to structure holding all the data
3010
 *                                  for the current macroblock.
3011
 * \param[in]     bsize             Current block size.
3012
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
3013
 *                                  information.
3014
 * \param[out]    rd_stats_y        Struct to keep track of the RD information
3015
 *                                  for only the Y plane.
3016
 * \param[out]    rd_stats_uv       Struct to keep track of the RD information
3017
 *                                  for only the UV planes.
3018
 * \param[in]     args              HandleInterModeArgs struct holding
3019
 *                                  miscellaneous arguments for inter mode
3020
 *                                  search. See the documentation for this
3021
 *                                  struct for a description of each member.
3022
 * \param[in]     ref_best_rd       Best RD found so far for this block.
3023
 *                                  It is used for early termination of this
3024
 *                                  search if the RD exceeds this value.
3025
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
3026
 *                                  built in this search.
3027
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
3028
 *                                  allocated buffers for the compound
3029
 *                                  predictors and masks in the compound type
3030
 *                                  search.
3031
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
3032
 *                                  do_tx_search (see below) is 0.
3033
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
3034
 *                                  a full transform search. This will compute
3035
 *                                  an estimated RD for the modes without the
3036
 *                                  transform search and later perform the full
3037
 *                                  transform search on the best candidates.
3038
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
3039
 *                                  information to perform a full transform
3040
 *                                  search only on winning candidates searched
3041
 *                                  with an estimate for transform coding RD.
3042
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
3043
 *                                  motion mode information used in a speed
3044
 *                                  feature to search motion modes other than
3045
 *                                  SIMPLE_TRANSLATION only on winning
3046
 *                                  candidates.
3047
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
3048
 *                                  best total RD for a skip mode so far, and
3049
 *                                  skip_rd[1] is the best RD for a skip mode so
3050
 *                                  far in luma. This is used as a speed feature
3051
 *                                  to skip the transform search if the computed
3052
 *                                  skip RD for the current mode is not better
3053
 *                                  than the best skip_rd so far.
3054
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
3055
 *                                         narrow down the search based on data
3056
 *                                         collected in the TPL model.
3057
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
3058
 *                                  the luma plane.
3059
 *
3060
 * \return The RD cost for the mode being searched. If the return value is
3061
 *         INT64_MAX, the output parameters are not set; do not use them.
3062
 */
3063
static int64_t handle_inter_mode(
3064
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
3065
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
3066
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
3067
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
3068
    int64_t *best_est_rd, const int do_tx_search,
3069
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
3070
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
3071
0
    int64_t *yrd) {
3072
0
  const AV1_COMMON *cm = &cpi->common;
3073
0
  const int num_planes = av1_num_planes(cm);
3074
0
  MACROBLOCKD *xd = &x->e_mbd;
3075
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3076
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3077
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3078
0
  const int is_comp_pred = has_second_ref(mbmi);
3079
0
  const PREDICTION_MODE this_mode = mbmi->mode;
3080
3081
#if CONFIG_REALTIME_ONLY
3082
  const int prune_modes_based_on_tpl = 0;
3083
#else   // CONFIG_REALTIME_ONLY
3084
0
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
3085
0
  const int prune_modes_based_on_tpl =
3086
0
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
3087
0
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
3088
0
#endif  // CONFIG_REALTIME_ONLY
3089
0
  int i;
3090
  // Reference frames for this mode
3091
0
  const int refs[2] = { mbmi->ref_frame[0],
3092
0
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3093
0
  int rate_mv = 0;
3094
0
  int64_t rd = INT64_MAX;
3095
  // Do first prediction into the destination buffer. Do the next
3096
  // prediction into a temporary buffer. Then keep track of which one
3097
  // of these currently holds the best predictor, and use the other
3098
  // one for future predictions. In the end, copy from tmp_buf to
3099
  // dst if necessary.
3100
0
  struct macroblockd_plane *pd = xd->plane;
3101
0
  const BUFFER_SET orig_dst = {
3102
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
3103
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
3104
0
  };
3105
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
3106
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
3107
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
3108
3109
0
  int64_t ret_val = INT64_MAX;
3110
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3111
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
3112
0
  int64_t best_rd = INT64_MAX;
3113
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3114
0
  int64_t best_yrd = INT64_MAX;
3115
0
  MB_MODE_INFO best_mbmi = *mbmi;
3116
0
  int best_xskip_txfm = 0;
3117
0
  int64_t newmv_ret_val = INT64_MAX;
3118
0
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
3119
3120
  // Do not prune the mode based on inter cost from tpl if the current ref frame
3121
  // is the winner ref in neighbouring blocks.
3122
0
  int ref_match_found_in_above_nb = 0;
3123
0
  int ref_match_found_in_left_nb = 0;
3124
0
  if (prune_modes_based_on_tpl) {
3125
0
    ref_match_found_in_above_nb =
3126
0
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
3127
0
    ref_match_found_in_left_nb =
3128
0
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
3129
0
  }
3130
3131
  // First, perform a simple translation search for each of the indices. If
3132
  // an index performs well, it will be fully searched in the main loop
3133
  // of this function.
3134
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
3135
  // Save MV results from first 2 ref_mv_idx.
3136
0
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
3137
0
  int best_ref_mv_idx = -1;
3138
0
  const int idx_mask =
3139
0
      ref_mv_idx_to_search(cpi, x, args, ref_best_rd, bsize, ref_set);
3140
0
  const int16_t mode_ctx =
3141
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
3142
0
  const ModeCosts *mode_costs = &x->mode_costs;
3143
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
3144
0
  const int base_rate =
3145
0
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
3146
3147
0
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
3148
0
    save_mv[i][0].as_int = INVALID_MV;
3149
0
    save_mv[i][1].as_int = INVALID_MV;
3150
0
  }
3151
0
  args->start_mv_cnt = 0;
3152
3153
  // Main loop of this function. This will  iterate over all of the ref mvs
3154
  // in the dynamic reference list and do the following:
3155
  //    1.) Get the current MV. Create newmv MV if necessary
3156
  //    2.) Search compound type and parameters if applicable
3157
  //    3.) Do interpolation filter search
3158
  //    4.) Build the inter predictor
3159
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
3160
  //        WARPED_CAUSAL)
3161
  //    6.) Update stats if best so far
3162
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
3163
0
    mbmi->ref_mv_idx = ref_mv_idx;
3164
3165
0
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
3166
0
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
3167
0
    const int drl_cost = get_drl_cost(
3168
0
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
3169
0
    mode_info[ref_mv_idx].drl_cost = drl_cost;
3170
0
    mode_info[ref_mv_idx].skip = 0;
3171
3172
0
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
3173
      // MV did not perform well in simple translation search. Skip it.
3174
0
      continue;
3175
0
    }
3176
0
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
3177
0
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
3178
      // Skip mode if TPL model indicates it will not be beneficial.
3179
0
      if (prune_modes_based_on_tpl_stats(
3180
0
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
3181
0
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
3182
0
        continue;
3183
0
    }
3184
0
    av1_init_rd_stats(rd_stats);
3185
3186
    // Initialize compound mode data
3187
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3188
0
    mbmi->comp_group_idx = 0;
3189
0
    mbmi->compound_idx = 1;
3190
0
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
3191
3192
0
    mbmi->num_proj_ref = 0;
3193
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3194
3195
    // Compute cost for signalling this DRL index
3196
0
    rd_stats->rate = base_rate;
3197
0
    rd_stats->rate += drl_cost;
3198
3199
0
    int rs = 0;
3200
0
    int compmode_interinter_cost = 0;
3201
3202
0
    int_mv cur_mv[2];
3203
3204
    // TODO(Cherma): Extend this speed feature to support compound mode
3205
0
    int skip_repeated_ref_mv =
3206
0
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
3207
    // Generate the current mv according to the prediction mode
3208
0
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
3209
0
      continue;
3210
0
    }
3211
3212
    // The above call to build_cur_mv does not handle NEWMV modes. Build
3213
    // the mv here if we have NEWMV for any predictors.
3214
0
    if (have_newmv_in_inter_mode(this_mode)) {
3215
#if CONFIG_COLLECT_COMPONENT_TIMING
3216
      start_timing(cpi, handle_newmv_time);
3217
#endif
3218
0
      newmv_ret_val =
3219
0
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
3220
#if CONFIG_COLLECT_COMPONENT_TIMING
3221
      end_timing(cpi, handle_newmv_time);
3222
#endif
3223
3224
0
      if (newmv_ret_val != 0) continue;
3225
3226
0
      if (is_inter_singleref_mode(this_mode) &&
3227
0
          cur_mv[0].as_int != INVALID_MV) {
3228
0
        const MV_REFERENCE_FRAME ref = refs[0];
3229
0
        const unsigned int this_sse = x->pred_sse[ref];
3230
0
        if (this_sse < args->best_single_sse_in_refs[ref]) {
3231
0
          args->best_single_sse_in_refs[ref] = this_sse;
3232
0
        }
3233
3234
0
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
3235
0
          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
3236
0
          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
3237
0
          const double scale_factor[3][11] = {
3238
0
            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
3239
0
            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
3240
0
            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
3241
0
          };
3242
0
          assert(pix_idx >= 0);
3243
0
          assert(th_idx <= 2);
3244
0
          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
3245
0
            continue;
3246
0
        }
3247
0
      }
3248
3249
0
      rd_stats->rate += rate_mv;
3250
0
    }
3251
    // Copy the motion vector for this mode into mbmi struct
3252
0
    for (i = 0; i < is_comp_pred + 1; ++i) {
3253
0
      mbmi->mv[i].as_int = cur_mv[i].as_int;
3254
0
    }
3255
3256
0
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
3257
0
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
3258
0
      continue;
3259
0
    }
3260
3261
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
3262
    // is enabled, and the current MV is similar to a previous one.
3263
0
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
3264
0
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
3265
0
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
3266
0
      continue;
3267
3268
0
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
3269
0
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
3270
0
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
3271
0
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
3272
0
        continue;
3273
0
      }
3274
0
    }
3275
3276
    // Flag to indicate whether to skip av1_enc_build_inter_predictor() after
3277
    // interpolation filter search
3278
0
    int skip_build_pred = INTERP_EVAL_LUMA_EVAL_CHROMA;
3279
0
    const int mi_row = xd->mi_row;
3280
0
    const int mi_col = xd->mi_col;
3281
3282
    // Handle a compound predictor, continue if it is determined this
3283
    // cannot be the best compound mode
3284
0
    if (is_comp_pred) {
3285
#if CONFIG_COLLECT_COMPONENT_TIMING
3286
      start_timing(cpi, compound_type_rd_time);
3287
#endif
3288
0
      const int not_best_mode = process_compound_inter_mode(
3289
0
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
3290
0
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
3291
0
          &skip_build_pred);
3292
#if CONFIG_COLLECT_COMPONENT_TIMING
3293
      end_timing(cpi, compound_type_rd_time);
3294
#endif
3295
0
      if (not_best_mode) continue;
3296
0
    }
3297
3298
0
    if (!args->skip_ifs) {
3299
#if CONFIG_COLLECT_COMPONENT_TIMING
3300
      start_timing(cpi, interpolation_filter_search_time);
3301
#endif
3302
      // Determine the interpolation filter for this mode
3303
0
      ret_val = av1_interpolation_filter_search(
3304
0
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
3305
0
          &skip_build_pred, args, ref_best_rd);
3306
#if CONFIG_COLLECT_COMPONENT_TIMING
3307
      end_timing(cpi, interpolation_filter_search_time);
3308
#endif
3309
0
      if (args->modelled_rd != NULL && !is_comp_pred) {
3310
0
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3311
0
      }
3312
0
      if (ret_val != 0) {
3313
0
        restore_dst_buf(xd, orig_dst, num_planes);
3314
0
        continue;
3315
0
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3316
0
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3317
0
        restore_dst_buf(xd, orig_dst, num_planes);
3318
0
        continue;
3319
0
      }
3320
3321
      // Compute modelled RD if enabled
3322
0
      if (args->modelled_rd != NULL) {
3323
0
        if (is_comp_pred) {
3324
0
          const int mode0 = compound_ref0_mode(this_mode);
3325
0
          const int mode1 = compound_ref1_mode(this_mode);
3326
0
          const int64_t mrd =
3327
0
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3328
0
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3329
0
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3330
0
            restore_dst_buf(xd, orig_dst, num_planes);
3331
0
            continue;
3332
0
          }
3333
0
        }
3334
0
      }
3335
0
    }
3336
3337
0
    rd_stats->rate += compmode_interinter_cost;
3338
0
    if (skip_build_pred != INTERP_SKIP_LUMA_SKIP_CHROMA) {
3339
      // Chroma plane of COMPOUND_DIFFWTD mode shares the segment mask of luma
3340
      // which is stored in xd->seg_mask. Hence, the predictor is populated for
3341
      // all planes. This should avoid usage of incorrect segment mask when the
3342
      // call is made only for chroma.
3343
0
      const int skip_luma_plane =
3344
0
          skip_build_pred == INTERP_SKIP_LUMA_EVAL_CHROMA &&
3345
0
          mbmi->interinter_comp.type != COMPOUND_DIFFWTD;
3346
0
      const int start_plane = skip_luma_plane ? AOM_PLANE_U : AOM_PLANE_Y;
3347
      // Build this inter predictor if it has not been previously built
3348
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
3349
0
                                    start_plane, num_planes - 1);
3350
0
    }
3351
#if CONFIG_COLLECT_COMPONENT_TIMING
3352
    start_timing(cpi, motion_mode_rd_time);
3353
#endif
3354
0
    int rate2_nocoeff = rd_stats->rate;
3355
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3356
    // OBMC_CAUSAL or WARPED_CAUSAL
3357
0
    int64_t this_yrd;
3358
0
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3359
0
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3360
0
                             &orig_dst, best_est_rd, do_tx_search,
3361
0
                             inter_modes_info, 0, &this_yrd);
3362
#if CONFIG_COLLECT_COMPONENT_TIMING
3363
    end_timing(cpi, motion_mode_rd_time);
3364
#endif
3365
0
    assert(
3366
0
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3367
3368
0
    if (ret_val != INT64_MAX) {
3369
0
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3370
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
3371
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3372
      // Collect mode stats for multiwinner mode processing
3373
0
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3374
0
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3375
0
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3376
0
                              do_tx_search);
3377
0
      if (tmp_rd < best_rd) {
3378
0
        best_yrd = this_yrd;
3379
        // Update the best rd stats if we found the best mode so far
3380
0
        best_rd_stats = *rd_stats;
3381
0
        best_rd_stats_y = *rd_stats_y;
3382
0
        best_rd_stats_uv = *rd_stats_uv;
3383
0
        best_rd = tmp_rd;
3384
0
        best_mbmi = *mbmi;
3385
0
        best_xskip_txfm = txfm_info->skip_txfm;
3386
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3387
0
                       xd->height * xd->width);
3388
0
        motion_mode_cand->rate_mv = rate_mv;
3389
0
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3390
0
      }
3391
3392
0
      if (tmp_rd < ref_best_rd) {
3393
0
        ref_best_rd = tmp_rd;
3394
0
        best_ref_mv_idx = ref_mv_idx;
3395
0
      }
3396
0
    }
3397
0
    restore_dst_buf(xd, orig_dst, num_planes);
3398
0
  }
3399
3400
0
  if (best_rd == INT64_MAX) return INT64_MAX;
3401
3402
  // re-instate status of the best choice
3403
0
  *rd_stats = best_rd_stats;
3404
0
  *rd_stats_y = best_rd_stats_y;
3405
0
  *rd_stats_uv = best_rd_stats_uv;
3406
0
  *yrd = best_yrd;
3407
0
  *mbmi = best_mbmi;
3408
0
  txfm_info->skip_txfm = best_xskip_txfm;
3409
0
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3410
0
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3411
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3412
3413
0
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3414
3415
0
  return rd_stats->rdcost;
3416
0
}
3417
3418
/*!\brief Search for the best intrabc predictor
3419
 *
3420
 * \ingroup intra_mode_search
3421
 * \callergraph
3422
 * This function performs a motion search to find the best intrabc predictor.
3423
 *
3424
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3425
 * search before this function).
3426
 */
3427
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3428
                                       PICK_MODE_CONTEXT *ctx,
3429
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3430
0
                                       int64_t best_rd) {
3431
0
  const AV1_COMMON *const cm = &cpi->common;
3432
0
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3433
0
      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3434
0
    return INT64_MAX;
3435
0
  if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 &&
3436
0
      bsize != BLOCK_8X8 && bsize != BLOCK_16X16) {
3437
0
    return INT64_MAX;
3438
0
  }
3439
0
  const int num_planes = av1_num_planes(cm);
3440
3441
0
  MACROBLOCKD *const xd = &x->e_mbd;
3442
0
  const TileInfo *tile = &xd->tile;
3443
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3444
3445
0
  const int mi_row = xd->mi_row;
3446
0
  const int mi_col = xd->mi_col;
3447
0
  const int w = block_size_wide[bsize];
3448
0
  const int h = block_size_high[bsize];
3449
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3450
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3451
3452
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3453
0
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3454
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3455
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3456
0
                   mbmi_ext->mode_context);
3457
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3458
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3459
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3460
0
  int_mv nearestmv, nearmv;
3461
0
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3462
0
                                   0);
3463
3464
0
  if (nearestmv.as_int == INVALID_MV) {
3465
0
    nearestmv.as_int = 0;
3466
0
  }
3467
0
  if (nearmv.as_int == INVALID_MV) {
3468
0
    nearmv.as_int = 0;
3469
0
  }
3470
3471
0
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3472
0
  if (dv_ref.as_int == 0) {
3473
0
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3474
0
  }
3475
  // Ref DV should not have sub-pel.
3476
0
  assert((dv_ref.as_mv.col & 7) == 0);
3477
0
  assert((dv_ref.as_mv.row & 7) == 0);
3478
0
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3479
3480
0
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3481
0
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3482
0
  for (int i = 0; i < num_planes; ++i) {
3483
0
    xd->plane[i].pre[0] = yv12_mb[i];
3484
0
  }
3485
3486
0
  enum IntrabcMotionDirection {
3487
0
    IBC_MOTION_ABOVE,
3488
0
    IBC_MOTION_LEFT,
3489
0
    IBC_MOTION_DIRECTIONS
3490
0
  };
3491
3492
0
  MB_MODE_INFO best_mbmi = *mbmi;
3493
0
  RD_STATS best_rdstats = *rd_stats;
3494
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3495
0
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3496
3497
0
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3498
0
  const SEARCH_METHODS search_method =
3499
0
      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3500
0
  const search_site_config *lookahead_search_sites =
3501
0
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3502
0
  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3503
0
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3504
0
                                     &dv_ref.as_mv, start_mv,
3505
0
                                     lookahead_search_sites, search_method,
3506
0
                                     /*fine_search_interval=*/0);
3507
0
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3508
0
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3509
3510
0
  const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level
3511
0
                                                  ? IBC_MOTION_LEFT
3512
0
                                                  : IBC_MOTION_DIRECTIONS;
3513
3514
0
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir;
3515
0
       ++dir) {
3516
0
    switch (dir) {
3517
0
      case IBC_MOTION_ABOVE:
3518
0
        fullms_params.mv_limits.col_min =
3519
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3520
0
        fullms_params.mv_limits.col_max =
3521
0
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3522
0
        fullms_params.mv_limits.row_min =
3523
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3524
0
        fullms_params.mv_limits.row_max =
3525
0
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3526
0
        break;
3527
0
      case IBC_MOTION_LEFT:
3528
0
        fullms_params.mv_limits.col_min =
3529
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3530
0
        fullms_params.mv_limits.col_max =
3531
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3532
        // TODO(aconverse@google.com): Minimize the overlap between above and
3533
        // left areas.
3534
0
        fullms_params.mv_limits.row_min =
3535
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3536
0
        int bottom_coded_mi_edge =
3537
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3538
0
        fullms_params.mv_limits.row_max =
3539
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3540
0
        break;
3541
0
      default: assert(0);
3542
0
    }
3543
0
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3544
0
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3545
0
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3546
0
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3547
3548
0
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3549
3550
0
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3551
0
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3552
0
      continue;
3553
0
    }
3554
3555
0
    const int step_param = cpi->mv_search_params.mv_step_param;
3556
0
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3557
0
    int_mv best_mv;
3558
0
    FULLPEL_MV_STATS best_mv_stats;
3559
0
    int bestsme = INT_MAX;
3560
3561
    // Perform a hash search first, and see if we get any matches.
3562
0
    if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
3563
0
      bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params,
3564
0
                                        intrabc_hash_info, &best_mv.as_fullmv);
3565
0
    }
3566
3567
    // If intrabc_search_level is not 0 and we found a hash search match, do
3568
    // not proceed with pixel search as the hash match is very likely to be the
3569
    // best intrabc candidate anyway.
3570
0
    if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) {
3571
0
      int_mv best_pixel_mv;
3572
0
      const int pixelsme =
3573
0
          av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3574
0
                                &best_pixel_mv.as_fullmv, &best_mv_stats, NULL);
3575
0
      if (pixelsme < bestsme) {
3576
0
        bestsme = pixelsme;
3577
0
        best_mv = best_pixel_mv;
3578
0
      }
3579
0
    }
3580
0
    if (bestsme == INT_MAX) continue;
3581
0
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3582
0
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3583
0
                                get_fullmv_from_mv(&dv)))
3584
0
      continue;
3585
0
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3586
0
                         cm->seq_params->mib_size_log2))
3587
0
      continue;
3588
3589
    // DV should not have sub-pel.
3590
0
    assert((dv.col & 7) == 0);
3591
0
    assert((dv.row & 7) == 0);
3592
0
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3593
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3594
0
    mbmi->use_intrabc = 1;
3595
0
    mbmi->mode = DC_PRED;
3596
0
    mbmi->uv_mode = UV_DC_PRED;
3597
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3598
0
    mbmi->mv[0].as_mv = dv;
3599
0
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3600
0
    mbmi->skip_txfm = 0;
3601
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3602
0
                                  av1_num_planes(cm) - 1);
3603
3604
    // TODO(aconverse@google.com): The full motion field defining discount
3605
    // in MV_COST_WEIGHT is too large. Explore other values.
3606
0
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3607
0
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3608
0
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3609
0
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3610
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3611
0
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3612
0
      continue;
3613
0
    rd_stats_yuv.rdcost =
3614
0
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3615
0
    if (rd_stats_yuv.rdcost < best_rd) {
3616
0
      best_rd = rd_stats_yuv.rdcost;
3617
0
      best_mbmi = *mbmi;
3618
0
      best_rdstats = rd_stats_yuv;
3619
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3620
0
    }
3621
0
  }
3622
0
  *mbmi = best_mbmi;
3623
0
  *rd_stats = best_rdstats;
3624
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3625
#if CONFIG_RD_DEBUG
3626
  mbmi->rd_stats = *rd_stats;
3627
#endif
3628
0
  return best_rd;
3629
0
}
3630
3631
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3632
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3633
// the typedef will prevent doxygen from finding this function and generating
3634
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3635
// doxygen, we can revert back to using the typedefs.
3636
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3637
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3638
0
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3639
0
  const AV1_COMMON *const cm = &cpi->common;
3640
0
  MACROBLOCKD *const xd = &x->e_mbd;
3641
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3642
0
  const int num_planes = av1_num_planes(cm);
3643
0
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3644
0
  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3645
0
  int64_t dist_y = 0, dist_uv = 0;
3646
3647
0
  ctx->rd_stats.skip_txfm = 0;
3648
0
  mbmi->ref_frame[0] = INTRA_FRAME;
3649
0
  mbmi->ref_frame[1] = NONE_FRAME;
3650
0
  mbmi->use_intrabc = 0;
3651
0
  mbmi->mv[0].as_int = 0;
3652
0
  mbmi->skip_mode = 0;
3653
3654
0
  const int64_t intra_yrd =
3655
0
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3656
0
                                 &y_skip_txfm, bsize, best_rd, ctx);
3657
3658
  // Initialize default mode evaluation params
3659
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3660
3661
0
  if (intra_yrd < best_rd) {
3662
    // Search intra modes for uv planes if needed
3663
0
    if (num_planes > 1) {
3664
      // Set up the tx variables for reproducing the y predictions in case we
3665
      // need it for chroma-from-luma.
3666
0
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3667
0
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3668
0
      }
3669
0
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3670
0
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3671
0
                                  &dist_uv, &uv_skip_txfm, bsize,
3672
0
                                  max_uv_tx_size);
3673
0
    }
3674
3675
    // Intra block is always coded as non-skip
3676
0
    rd_cost->rate =
3677
0
        rate_y + rate_uv +
3678
0
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3679
0
    rd_cost->dist = dist_y + dist_uv;
3680
0
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3681
0
    rd_cost->skip_txfm = 0;
3682
0
  } else {
3683
0
    rd_cost->rate = INT_MAX;
3684
0
  }
3685
3686
0
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3687
0
    best_rd = rd_cost->rdcost;
3688
0
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3689
0
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3690
0
    assert(rd_cost->rate != INT_MAX);
3691
0
  }
3692
0
  if (rd_cost->rate == INT_MAX) return;
3693
3694
0
  ctx->mic = *mbmi;
3695
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3696
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3697
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3698
0
}
3699
3700
static inline void calc_target_weighted_pred(
3701
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3702
    const uint8_t *above, int above_stride, const uint8_t *left,
3703
    int left_stride);
3704
3705
static inline void rd_pick_skip_mode(
3706
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3707
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3708
0
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3709
0
  const AV1_COMMON *const cm = &cpi->common;
3710
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3711
0
  const int num_planes = av1_num_planes(cm);
3712
0
  MACROBLOCKD *const xd = &x->e_mbd;
3713
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3714
3715
0
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3716
0
  RD_STATS skip_mode_rd_stats;
3717
0
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3718
3719
0
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3720
0
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3721
0
    return;
3722
0
  }
3723
3724
0
  const MV_REFERENCE_FRAME ref_frame =
3725
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3726
0
  const MV_REFERENCE_FRAME second_ref_frame =
3727
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3728
0
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3729
0
  const THR_MODES mode_index =
3730
0
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3731
3732
0
  if (mode_index == THR_INVALID) {
3733
0
    return;
3734
0
  }
3735
3736
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3737
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
3738
0
      cpi->all_one_sided_refs) {
3739
0
    return;
3740
0
  }
3741
3742
0
  mbmi->mode = this_mode;
3743
0
  mbmi->uv_mode = UV_DC_PRED;
3744
0
  mbmi->ref_frame[0] = ref_frame;
3745
0
  mbmi->ref_frame[1] = second_ref_frame;
3746
0
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3747
0
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3748
0
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3749
0
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3750
0
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3751
0
      return;
3752
0
    }
3753
0
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3754
0
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3755
0
                     mbmi_ext->mode_context);
3756
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3757
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3758
0
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3759
0
  }
3760
3761
0
  assert(this_mode == NEAREST_NEARESTMV);
3762
0
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3763
0
    return;
3764
0
  }
3765
3766
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3767
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3768
0
  mbmi->comp_group_idx = 0;
3769
0
  mbmi->compound_idx = x->compound_idx;
3770
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3771
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3772
0
  mbmi->ref_mv_idx = 0;
3773
0
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3774
0
  mbmi->palette_mode_info.palette_size[0] = 0;
3775
0
  mbmi->palette_mode_info.palette_size[1] = 0;
3776
3777
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3778
3779
0
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3780
0
  for (int i = 0; i < num_planes; i++) {
3781
0
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3782
0
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3783
0
  }
3784
3785
0
  BUFFER_SET orig_dst;
3786
0
  for (int i = 0; i < num_planes; i++) {
3787
0
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3788
0
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3789
0
  }
3790
3791
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3792
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3793
0
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3794
0
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3795
0
    const ModeCosts *mode_costs = &x->mode_costs;
3796
0
    best_intra_inter_mode_cost = RDCOST(
3797
0
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3798
0
        rd_cost->dist);
3799
    // Account for non-skip mode rate in total rd stats
3800
0
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3801
0
    av1_rd_cost_update(x->rdmult, rd_cost);
3802
0
  }
3803
3804
  // Obtain the rdcost for skip_mode.
3805
0
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3806
0
               best_intra_inter_mode_cost);
3807
3808
0
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3809
0
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3810
0
    assert(mode_index != THR_INVALID);
3811
0
    search_state->best_mbmode.skip_mode = 1;
3812
0
    search_state->best_mbmode = *mbmi;
3813
0
    memset(search_state->best_mbmode.inter_tx_size,
3814
0
           search_state->best_mbmode.tx_size,
3815
0
           sizeof(search_state->best_mbmode.inter_tx_size));
3816
0
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3817
0
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3818
0
                  xd);
3819
0
    search_state->best_mode_index = mode_index;
3820
3821
    // Update rd_cost
3822
0
    rd_cost->rate = skip_mode_rd_stats.rate;
3823
0
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3824
0
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3825
3826
0
    search_state->best_rd = rd_cost->rdcost;
3827
0
    search_state->best_skip2 = 1;
3828
0
    search_state->best_mode_skippable = 1;
3829
3830
0
    x->txfm_search_info.skip_txfm = 1;
3831
0
  }
3832
0
}
3833
3834
// Get winner mode stats of given mode index
3835
static inline MB_MODE_INFO *get_winner_mode_stats(
3836
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3837
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3838
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3839
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3840
0
    int mode_idx) {
3841
0
  MB_MODE_INFO *winner_mbmi;
3842
0
  if (multi_winner_mode_type) {
3843
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3844
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3845
0
    winner_mbmi = &winner_mode_stat->mbmi;
3846
3847
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3848
0
    *winner_rate_y = winner_mode_stat->rate_y;
3849
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3850
0
    *winner_mode_index = winner_mode_stat->mode_index;
3851
0
  } else {
3852
0
    winner_mbmi = best_mbmode;
3853
0
    *winner_rd_cost = best_rd_cost;
3854
0
    *winner_rate_y = best_rate_y;
3855
0
    *winner_rate_uv = best_rate_uv;
3856
0
    *winner_mode_index = *best_mode_index;
3857
0
  }
3858
0
  return winner_mbmi;
3859
0
}
3860
3861
// speed feature: fast intra/inter transform type search
3862
// Used for speed >= 2
3863
// When this speed feature is on, in rd mode search, only DCT is used.
3864
// After the mode is determined, this function is called, to select
3865
// transform types and get accurate rdcost.
3866
static inline void refine_winner_mode_tx(
3867
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3868
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3869
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3870
0
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3871
0
  const AV1_COMMON *const cm = &cpi->common;
3872
0
  MACROBLOCKD *const xd = &x->e_mbd;
3873
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3874
0
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3875
0
  int64_t best_rd;
3876
0
  const int num_planes = av1_num_planes(cm);
3877
3878
0
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3879
0
                                         rd_cost->skip_txfm))
3880
0
    return;
3881
3882
  // Set params for winner mode evaluation
3883
0
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3884
3885
  // No best mode identified so far
3886
0
  if (*best_mode_index == THR_INVALID) return;
3887
3888
0
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3889
0
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3890
0
    RD_STATS *winner_rd_stats = NULL;
3891
0
    int winner_rate_y = 0, winner_rate_uv = 0;
3892
0
    THR_MODES winner_mode_index = 0;
3893
3894
    // TODO(any): Combine best mode and multi-winner mode processing paths
3895
    // Get winner mode stats for current mode index
3896
0
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3897
0
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3898
0
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3899
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3900
3901
0
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3902
0
        winner_mode_index != THR_INVALID &&
3903
0
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3904
0
                                          rd_cost->skip_txfm)) {
3905
0
      RD_STATS rd_stats = *winner_rd_stats;
3906
0
      int skip_blk = 0;
3907
0
      RD_STATS rd_stats_y, rd_stats_uv;
3908
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3909
3910
0
      *mbmi = *winner_mbmi;
3911
3912
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3913
3914
      // Select prediction reference frames.
3915
0
      for (int i = 0; i < num_planes; i++) {
3916
0
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3917
0
        if (has_second_ref(mbmi))
3918
0
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3919
0
      }
3920
3921
0
      if (is_inter_mode(mbmi->mode)) {
3922
0
        const int mi_row = xd->mi_row;
3923
0
        const int mi_col = xd->mi_col;
3924
0
        bool is_predictor_built = false;
3925
0
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3926
        // Do interpolation filter search for realtime mode if applicable.
3927
0
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3928
0
            cpi->oxcf.mode == REALTIME &&
3929
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3930
0
            is_inter_mode(prediction_mode) &&
3931
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3932
0
            !is_inter_compound_mode(prediction_mode)) {
3933
0
          is_predictor_built =
3934
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3935
0
        }
3936
0
        if (!is_predictor_built) {
3937
0
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3938
0
                                        av1_num_planes(cm) - 1);
3939
0
        }
3940
0
        if (mbmi->motion_mode == OBMC_CAUSAL)
3941
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3942
3943
0
        av1_subtract_plane(x, bsize, 0);
3944
0
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3945
0
            !xd->lossless[mbmi->segment_id]) {
3946
0
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3947
0
                                              INT64_MAX);
3948
0
          assert(rd_stats_y.rate != INT_MAX);
3949
0
        } else {
3950
0
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3951
0
                                            INT64_MAX);
3952
0
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3953
0
                 sizeof(mbmi->inter_tx_size));
3954
0
        }
3955
0
      } else {
3956
0
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3957
0
                                          INT64_MAX);
3958
0
      }
3959
3960
0
      if (num_planes > 1) {
3961
0
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3962
0
      } else {
3963
0
        av1_init_rd_stats(&rd_stats_uv);
3964
0
      }
3965
3966
0
      const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
3967
3968
0
      const ModeCosts *mode_costs = &x->mode_costs;
3969
0
      if (is_inter_mode(mbmi->mode) &&
3970
0
          (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
3971
0
          RDCOST(x->rdmult,
3972
0
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3973
0
                     rd_stats_uv.rate,
3974
0
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3975
0
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3976
0
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3977
0
        skip_blk = 1;
3978
0
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3979
0
        rd_stats_uv.rate = 0;
3980
0
        rd_stats_y.dist = rd_stats_y.sse;
3981
0
        rd_stats_uv.dist = rd_stats_uv.sse;
3982
0
      } else {
3983
0
        skip_blk = 0;
3984
0
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3985
0
      }
3986
0
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3987
0
                      winner_rate_y - winner_rate_uv;
3988
0
      int64_t this_rd =
3989
0
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3990
0
      if (best_rd > this_rd) {
3991
0
        *best_mbmode = *mbmi;
3992
0
        *best_mode_index = winner_mode_index;
3993
0
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3994
0
        rd_cost->rate = this_rate;
3995
0
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3996
0
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3997
0
        rd_cost->rdcost = this_rd;
3998
0
        best_rd = this_rd;
3999
0
        *best_skip2 = skip_blk;
4000
0
      }
4001
0
    }
4002
0
  }
4003
0
}
4004
4005
/*!\cond */
4006
typedef struct {
4007
  // Mask for each reference frame, specifying which prediction modes to NOT try
4008
  // during search.
4009
  uint32_t pred_modes[REF_FRAMES];
4010
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
4011
  // reference frames (i, j).
4012
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
4013
  // (NONE_FRAME).
4014
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
4015
} mode_skip_mask_t;
4016
/*!\endcond */
4017
4018
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
4019
static inline void disable_reference(
4020
0
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
4021
0
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4022
0
    ref_combo[ref][ref2 + 1] = true;
4023
0
  }
4024
0
}
4025
4026
// Update 'ref_combo' mask to disable all inter references except ALTREF.
4027
static inline void disable_inter_references_except_altref(
4028
0
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
4029
0
  disable_reference(LAST_FRAME, ref_combo);
4030
0
  disable_reference(LAST2_FRAME, ref_combo);
4031
0
  disable_reference(LAST3_FRAME, ref_combo);
4032
0
  disable_reference(GOLDEN_FRAME, ref_combo);
4033
0
  disable_reference(BWDREF_FRAME, ref_combo);
4034
0
  disable_reference(ALTREF2_FRAME, ref_combo);
4035
0
}
4036
4037
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
4038
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
4039
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
4040
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
4041
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
4042
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
4043
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
4044
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
4045
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
4046
};
4047
4048
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
4049
4050
0
static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
4051
0
  if (ref_set == REF_SET_FULL) {
4052
    // Everything available by default.
4053
0
    memset(mask, 0, sizeof(*mask));
4054
0
  } else {
4055
    // All modes available by default.
4056
0
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
4057
    // All references disabled first.
4058
0
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
4059
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4060
0
        mask->ref_combo[ref1][ref2 + 1] = true;
4061
0
      }
4062
0
    }
4063
0
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
4064
0
    int num_ref_combos;
4065
4066
    // Then enable reduced set of references explicitly.
4067
0
    switch (ref_set) {
4068
0
      case REF_SET_REDUCED:
4069
0
        ref_set_combos = reduced_ref_combos;
4070
0
        num_ref_combos =
4071
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
4072
0
        break;
4073
0
      case REF_SET_REALTIME:
4074
0
        ref_set_combos = real_time_ref_combos;
4075
0
        num_ref_combos =
4076
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
4077
0
        break;
4078
0
      default: assert(0); num_ref_combos = 0;
4079
0
    }
4080
4081
0
    for (int i = 0; i < num_ref_combos; ++i) {
4082
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
4083
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
4084
0
    }
4085
0
  }
4086
0
}
4087
4088
static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
4089
                                       const AV1_COMP *cpi, MACROBLOCK *x,
4090
0
                                       BLOCK_SIZE bsize) {
4091
0
  const AV1_COMMON *const cm = &cpi->common;
4092
0
  const struct segmentation *const seg = &cm->seg;
4093
0
  MACROBLOCKD *const xd = &x->e_mbd;
4094
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4095
0
  unsigned char segment_id = mbmi->segment_id;
4096
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4097
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
4098
0
  REF_SET ref_set = REF_SET_FULL;
4099
4100
0
  if (sf->rt_sf.use_real_time_ref_set)
4101
0
    ref_set = REF_SET_REALTIME;
4102
0
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
4103
0
    ref_set = REF_SET_REDUCED;
4104
4105
0
  default_skip_mask(mask, ref_set);
4106
4107
0
  int min_pred_mv_sad = INT_MAX;
4108
0
  MV_REFERENCE_FRAME ref_frame;
4109
0
  if (ref_set == REF_SET_REALTIME) {
4110
    // For real-time encoding, we only look at a subset of ref frames. So the
4111
    // threshold for pruning should be computed from this subset as well.
4112
0
    const int num_rt_refs =
4113
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
4114
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
4115
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
4116
0
      if (ref != INTRA_FRAME) {
4117
0
        const MV_REFERENCE_FRAME ref_frames[2] = { ref, NONE_FRAME };
4118
0
        const int_mv ref_mv =
4119
0
            av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
4120
0
        const FULLPEL_MV full_mv = get_fullmv_from_mv(&ref_mv.as_mv);
4121
0
        if (av1_is_fullmv_in_range(&x->mv_limits, full_mv)) {
4122
0
          min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
4123
0
        }
4124
0
      }
4125
0
    }
4126
0
  } else {
4127
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
4128
0
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
4129
0
  }
4130
4131
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4132
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
4133
      // Skip checking missing reference in both single and compound reference
4134
      // modes.
4135
0
      disable_reference(ref_frame, mask->ref_combo);
4136
0
    } else {
4137
      // Skip fixed mv modes for poor references
4138
0
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
4139
0
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4140
0
      }
4141
0
    }
4142
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4143
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4144
      // Reference not used for the segment.
4145
0
      disable_reference(ref_frame, mask->ref_combo);
4146
0
    }
4147
0
  }
4148
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
4149
  // is disabled for this segment. This is to prevent the possibility that we
4150
  // end up unable to pick any mode.
4151
0
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4152
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
4153
    // unless ARNR filtering is enabled in which case we want
4154
    // an unfiltered alternative. We allow near/nearest as well
4155
    // because they may result in zero-zero MVs but be cheaper.
4156
0
    if (cpi->rc.is_src_frame_alt_ref &&
4157
0
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
4158
0
      disable_inter_references_except_altref(mask->ref_combo);
4159
4160
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4161
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
4162
0
      int_mv near_mv, nearest_mv, global_mv;
4163
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
4164
0
                  &x->mbmi_ext);
4165
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4166
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4167
4168
0
      if (near_mv.as_int != global_mv.as_int)
4169
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
4170
0
      if (nearest_mv.as_int != global_mv.as_int)
4171
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
4172
0
    }
4173
0
  }
4174
4175
0
  if (cpi->rc.is_src_frame_alt_ref) {
4176
0
    if (inter_sf->alt_ref_search_fp &&
4177
0
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
4178
0
      mask->pred_modes[ALTREF_FRAME] = 0;
4179
0
      disable_inter_references_except_altref(mask->ref_combo);
4180
0
      disable_reference(INTRA_FRAME, mask->ref_combo);
4181
0
    }
4182
0
  }
4183
4184
0
  if (inter_sf->alt_ref_search_fp) {
4185
0
    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
4186
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
4187
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
4188
      // those are past frames
4189
0
      MV_REFERENCE_FRAME start_frame =
4190
0
          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
4191
0
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
4192
0
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4193
0
            0) {
4194
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
4195
          // to the relative dist of LAST_FRAME.
4196
0
          if (abs(cpi->ref_frame_dist_info
4197
0
                      .ref_relative_dist[ref_frame - LAST_FRAME] -
4198
0
                  cpi->ref_frame_dist_info
4199
0
                      .ref_relative_dist[LAST_FRAME - LAST_FRAME]) > 4) {
4200
0
            continue;
4201
0
          }
4202
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
4203
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
4204
0
        }
4205
0
      }
4206
0
    }
4207
0
  }
4208
4209
0
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4210
0
    if (x->best_pred_mv_sad[0] < INT_MAX) {
4211
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
4212
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
4213
4214
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
4215
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
4216
0
        ref_frame = prune_ref_list[ref_idx];
4217
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
4218
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4219
0
      }
4220
0
    }
4221
0
  }
4222
4223
0
  if (bsize > sf->part_sf.max_intra_bsize) {
4224
0
    disable_reference(INTRA_FRAME, mask->ref_combo);
4225
0
  }
4226
4227
0
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
4228
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4229
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
4230
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
4231
0
    }
4232
0
  }
4233
4234
0
  mask->pred_modes[INTRA_FRAME] |=
4235
0
      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
4236
4237
  // Prune reference frames which are not the closest to the current
4238
  // frame and with large pred_mv_sad.
4239
0
  if (inter_sf->prune_single_ref) {
4240
0
    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 5);
4241
0
    const double prune_thresh = (inter_sf->prune_single_ref <= 3) ? 1.20 : 1.05;
4242
4243
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4244
0
      const RefFrameDistanceInfo *const ref_frame_dist_info =
4245
0
          &cpi->ref_frame_dist_info;
4246
0
      const int is_closest_ref =
4247
0
          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
4248
0
          (ref_frame == ref_frame_dist_info->nearest_future_ref);
4249
0
      const int ref_idx = ref_frame - LAST_FRAME;
4250
4251
0
      if (!(cpi->keep_single_ref_frame_mask & (1 << ref_idx) ||
4252
0
            is_closest_ref)) {
4253
0
        const int dir =
4254
0
            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
4255
0
                ? 0
4256
0
                : 1;
4257
0
        if (x->best_pred_mv_sad[dir] < INT_MAX &&
4258
0
            x->pred_mv_sad[ref_frame] > prune_thresh * x->best_pred_mv_sad[dir])
4259
0
          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
4260
0
      }
4261
0
    }
4262
0
  }
4263
0
}
4264
4265
static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
4266
                                          HandleInterModeArgs *const args,
4267
0
                                          int is_hbd) {
4268
0
  if (is_hbd) {
4269
0
    const int len = sizeof(uint16_t);
4270
0
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
4271
0
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
4272
0
                                                 (MAX_SB_SQUARE >> 1) * len);
4273
0
    args->above_pred_buf[2] =
4274
0
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
4275
0
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
4276
0
    args->left_pred_buf[1] =
4277
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
4278
0
    args->left_pred_buf[2] =
4279
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
4280
0
  } else {
4281
0
    args->above_pred_buf[0] = obmc_buffer->above_pred;
4282
0
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
4283
0
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
4284
0
    args->left_pred_buf[0] = obmc_buffer->left_pred;
4285
0
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
4286
0
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
4287
0
  }
4288
0
}
4289
4290
static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
4291
0
                                  MV_REFERENCE_FRAME ref_frame) {
4292
0
  const AV1_COMMON *const cm = &cpi->common;
4293
0
  MV_REFERENCE_FRAME rf[2];
4294
0
  av1_set_ref_frame(rf, ref_frame);
4295
4296
0
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
4297
4298
0
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
4299
0
                                       cm->cur_frame->ref_display_order_hint)) {
4300
0
    return 1;
4301
0
  }
4302
4303
0
  return 0;
4304
0
}
4305
4306
static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
4307
0
                                                    int skip_ref_frame_mask) {
4308
0
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
4309
0
    if (!(skip_ref_frame_mask & (1 << r))) {
4310
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
4311
0
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
4312
0
        return 1;
4313
0
      }
4314
0
    }
4315
0
  }
4316
0
  return 0;
4317
0
}
4318
4319
static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
4320
0
                                             const MB_MODE_INFO *mi_cache) {
4321
0
  if (!mi_cache) {
4322
0
    return 0;
4323
0
  }
4324
4325
0
  if (ref_frame < REF_FRAMES) {
4326
0
    return (ref_frame == mi_cache->ref_frame[0] ||
4327
0
            ref_frame == mi_cache->ref_frame[1]);
4328
0
  }
4329
4330
  // if we are here, then the current mode is compound.
4331
0
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4332
0
  return ref_frame == cached_ref_type;
4333
0
}
4334
4335
// Please add/modify parameter setting in this function, making it consistent
4336
// and easy to read and maintain.
4337
static inline void set_params_rd_pick_inter_mode(
4338
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4339
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4340
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4341
0
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4342
0
  const AV1_COMMON *const cm = &cpi->common;
4343
0
  MACROBLOCKD *const xd = &x->e_mbd;
4344
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4345
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4346
0
  unsigned char segment_id = mbmi->segment_id;
4347
4348
0
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4349
0
  av1_collect_neighbors_ref_counts(xd);
4350
0
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4351
0
                           ref_costs_comp);
4352
4353
0
  const int mi_row = xd->mi_row;
4354
0
  const int mi_col = xd->mi_col;
4355
0
  x->best_pred_mv_sad[0] = INT_MAX;
4356
0
  x->best_pred_mv_sad[1] = INT_MAX;
4357
4358
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4359
0
       ++ref_frame) {
4360
0
    x->pred_mv_sad[ref_frame] = INT_MAX;
4361
0
    mbmi_ext->mode_context[ref_frame] = 0;
4362
0
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4363
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4364
      // Skip the ref frame if the mask says skip and the ref is not used by
4365
      // compound ref.
4366
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4367
0
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4368
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4369
0
        continue;
4370
0
      }
4371
0
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4372
0
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4373
0
    }
4374
0
    if (cpi->sf.inter_sf.alt_ref_search_fp ||
4375
0
        cpi->sf.inter_sf.prune_single_ref ||
4376
0
        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4377
      // Store the best pred_mv_sad across all past frames
4378
0
      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4379
0
          0)
4380
0
        x->best_pred_mv_sad[0] =
4381
0
            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4382
0
      else
4383
        // Store the best pred_mv_sad across all future frames
4384
0
        x->best_pred_mv_sad[1] =
4385
0
            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4386
0
    }
4387
0
  }
4388
4389
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4390
    // No second reference on RT ref set, so no need to initialize
4391
0
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4392
0
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4393
0
      mbmi_ext->mode_context[ref_frame] = 0;
4394
0
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4395
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4396
0
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4397
0
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4398
0
        continue;
4399
0
      }
4400
4401
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4402
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4403
0
        continue;
4404
0
      }
4405
      // Ref mv list population is not required, when compound references are
4406
      // pruned.
4407
0
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4408
4409
0
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4410
0
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4411
0
                       mbmi_ext->mode_context);
4412
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4413
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4414
0
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4415
0
    }
4416
0
  }
4417
4418
0
  av1_count_overlappable_neighbors(cm, xd);
4419
0
  const FRAME_UPDATE_TYPE update_type =
4420
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4421
0
  int use_actual_frame_probs = 1;
4422
0
  int prune_obmc;
4423
#if CONFIG_FPMT_TEST
4424
  use_actual_frame_probs =
4425
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4426
  if (!use_actual_frame_probs) {
4427
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4428
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4429
  }
4430
#endif
4431
0
  if (use_actual_frame_probs) {
4432
0
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4433
0
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4434
0
  }
4435
0
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4436
0
    if (check_num_overlappable_neighbors(mbmi) &&
4437
0
        is_motion_variation_allowed_bsize(bsize)) {
4438
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4439
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4440
0
                                       MAX_SB_SIZE >> 1 };
4441
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4442
0
                                        MAX_SB_SIZE >> 1 };
4443
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4444
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4445
0
                                          dst_width1, dst_height1,
4446
0
                                          args->above_pred_stride);
4447
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4448
0
                                         dst_width2, dst_height2,
4449
0
                                         args->left_pred_stride);
4450
0
      const int num_planes = av1_num_planes(cm);
4451
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4452
0
                           mi_col, 0, num_planes);
4453
0
      calc_target_weighted_pred(
4454
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4455
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4456
0
    }
4457
0
  }
4458
4459
0
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4460
4461
  // Set params for mode evaluation
4462
0
  set_mode_eval_params(cpi, x, MODE_EVAL);
4463
4464
0
  x->comp_rd_stats_idx = 0;
4465
4466
0
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4467
0
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4468
0
  }
4469
0
}
4470
4471
static inline void init_single_inter_mode_search_state(
4472
0
    InterModeSearchState *search_state) {
4473
0
  for (int dir = 0; dir < 2; ++dir) {
4474
0
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4475
0
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4476
0
        SingleInterModeState *state;
4477
4478
0
        state = &search_state->single_state[dir][mode][ref_frame];
4479
0
        state->ref_frame = NONE_FRAME;
4480
0
        state->rd = INT64_MAX;
4481
4482
0
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4483
0
        state->ref_frame = NONE_FRAME;
4484
0
        state->rd = INT64_MAX;
4485
4486
0
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4487
0
      }
4488
0
    }
4489
0
  }
4490
4491
0
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4492
0
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4493
0
    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4494
0
  }
4495
0
  av1_zero(search_state->single_state_cnt);
4496
0
  av1_zero(search_state->single_state_modelled_cnt);
4497
0
}
4498
4499
static inline void init_inter_mode_search_state(
4500
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4501
0
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4502
0
  init_intra_mode_search_state(&search_state->intra_search_state);
4503
0
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4504
4505
0
  search_state->best_rd = best_rd_so_far;
4506
0
  search_state->best_skip_rd[0] = INT64_MAX;
4507
0
  search_state->best_skip_rd[1] = INT64_MAX;
4508
4509
0
  av1_zero(search_state->best_mbmode);
4510
4511
0
  search_state->best_rate_y = INT_MAX;
4512
4513
0
  search_state->best_rate_uv = INT_MAX;
4514
4515
0
  search_state->best_mode_skippable = 0;
4516
4517
0
  search_state->best_skip2 = 0;
4518
4519
0
  search_state->best_mode_index = THR_INVALID;
4520
4521
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4522
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4523
0
  const unsigned char segment_id = mbmi->segment_id;
4524
4525
0
  search_state->num_available_refs = 0;
4526
0
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4527
0
  memset(search_state->dist_order_refs, -1,
4528
0
         sizeof(search_state->dist_order_refs));
4529
4530
0
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4531
0
    search_state->mode_threshold[i] = 0;
4532
0
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4533
0
  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4534
0
    search_state->mode_threshold[i] =
4535
0
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4536
0
        RD_THRESH_FAC_FRAC_BITS;
4537
4538
0
  search_state->best_intra_rd = INT64_MAX;
4539
4540
0
  search_state->best_pred_sse = UINT_MAX;
4541
4542
0
  av1_zero(search_state->single_newmv);
4543
0
  av1_zero(search_state->single_newmv_rate);
4544
0
  av1_zero(search_state->single_newmv_valid);
4545
0
  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4546
0
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4547
0
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4548
0
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4549
0
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4550
0
      }
4551
0
    }
4552
0
  }
4553
4554
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4555
0
    search_state->best_pred_rd[i] = INT64_MAX;
4556
0
  }
4557
4558
0
  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4559
0
    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4560
0
      search_state->mode_threshold[i] =
4561
0
          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4562
0
          RD_THRESH_FAC_FRAC_BITS;
4563
4564
0
    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4565
0
      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4566
0
        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4567
0
          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4568
0
          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4569
0
        }
4570
0
      }
4571
0
    }
4572
4573
0
    init_single_inter_mode_search_state(search_state);
4574
0
  }
4575
0
}
4576
4577
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4578
                           const MV_REFERENCE_FRAME *ref_frame,
4579
0
                           const PREDICTION_MODE this_mode) {
4580
0
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4581
0
    return true;
4582
0
  }
4583
4584
0
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4585
0
}
4586
4587
static AOM_FORCE_INLINE int inter_mode_compatible_skip(
4588
    const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize,
4589
0
    PREDICTION_MODE curr_mode, const MV_REFERENCE_FRAME *ref_frames) {
4590
0
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4591
0
  if (comp_pred) {
4592
0
    if (!is_comp_ref_allowed(bsize)) return 1;
4593
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4594
0
      return 1;
4595
0
    }
4596
4597
0
    const AV1_COMMON *const cm = &cpi->common;
4598
0
    if (frame_is_intra_only(cm)) return 1;
4599
4600
0
    const CurrentFrame *const current_frame = &cm->current_frame;
4601
0
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4602
4603
0
    const struct segmentation *const seg = &cm->seg;
4604
0
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4605
    // Do not allow compound prediction if the segment level reference frame
4606
    // feature is in use as in this case there can only be one reference.
4607
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4608
0
  }
4609
4610
0
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4611
    // Mode must be compatible
4612
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4613
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4614
0
  }
4615
4616
0
  return 0;
4617
0
}
4618
4619
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4620
0
                                        BLOCK_SIZE bsize, int mib_size) {
4621
0
  const int sb_size_mask = mib_size - 1;
4622
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4623
0
  const int mi_row = xd->mi_row;
4624
0
  const int mi_col = xd->mi_col;
4625
0
  const int mi_row_in_sb = mi_row & sb_size_mask;
4626
0
  const int mi_col_in_sb = mi_col & sb_size_mask;
4627
0
  const int mi_w = mi_size_wide[bsize];
4628
0
  const int mi_h = mi_size_high[bsize];
4629
0
  int picked_ref_frames_mask = 0;
4630
0
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4631
0
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4632
0
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4633
0
    }
4634
0
  }
4635
0
  return picked_ref_frames_mask;
4636
0
}
4637
4638
// Check if reference frame pair of the current block matches with the given
4639
// block.
4640
static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4641
0
                                       const MV_REFERENCE_FRAME *ref_frames) {
4642
0
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4643
0
          (ref_frames[1] == mbmi->ref_frame[1]));
4644
0
}
4645
4646
// Case 1: return 0, means don't skip this mode
4647
// Case 2: return 1, means skip this mode completely
4648
// Case 3: return 2, means skip compound only, but still try single motion modes
4649
static AOM_FORCE_INLINE int inter_mode_search_order_independent_skip(
4650
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4651
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4652
0
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4653
0
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4654
0
    return 1;
4655
0
  }
4656
4657
0
  const int ref_type = av1_ref_frame_type(ref_frame);
4658
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set)
4659
0
    if (prune_ref_frame(cpi, x, ref_type)) return 1;
4660
4661
  // This is only used in motion vector unit test.
4662
0
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4663
0
      ref_frame[0] == INTRA_FRAME)
4664
0
    return 1;
4665
4666
0
  const AV1_COMMON *const cm = &cpi->common;
4667
0
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4668
0
    return 1;
4669
0
  }
4670
4671
  // Reuse the prediction mode in cache
4672
0
  if (x->use_mb_mode_cache) {
4673
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4674
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4675
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4676
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4677
4678
    // If the cached mode is intra, then we just need to match the mode.
4679
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4680
0
      return 1;
4681
0
    }
4682
4683
    // If the cached mode is single inter mode, then we match the mode and
4684
    // reference frame.
4685
0
    if (cached_mode_is_single) {
4686
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4687
0
        return 1;
4688
0
      }
4689
0
    } else {
4690
      // If the cached mode is compound, then we need to consider several cases.
4691
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4692
0
      if (mode_is_single) {
4693
        // If the mode is single, we know the modes can't match. But we might
4694
        // still want to search it if compound mode depends on the current mode.
4695
0
        int skip_motion_mode_only = 0;
4696
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4697
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4698
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4699
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4700
0
        } else if (cached_mode == NEW_NEWMV) {
4701
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4702
0
                                   ref_frame[0] == cached_frame[1]);
4703
0
        }
4704
4705
0
        return 1 + skip_motion_mode_only;
4706
0
      } else {
4707
        // If both modes are compound, then everything must match.
4708
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4709
0
            ref_frame[1] != cached_frame[1]) {
4710
0
          return 1;
4711
0
        }
4712
0
      }
4713
0
    }
4714
0
  }
4715
4716
0
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4717
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4718
  // valid partition is required, do not skip mode.
4719
0
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4720
0
      x->must_find_valid_partition)
4721
0
    return 0;
4722
4723
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4724
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4725
  // frames
4726
0
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4727
0
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4728
0
    const MACROBLOCKD *const xd = &x->e_mbd;
4729
0
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4730
0
        xd->up_available) {
4731
0
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4732
0
                                                    { 1, 1, 0 },
4733
0
                                                    { 2, 1, 0 } };
4734
0
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4735
4736
0
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4737
0
             qindex_sub_range < 3);
4738
0
      const int num_ref_frame_pair_match_thresh =
4739
0
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4740
0
                    [qindex_sub_range];
4741
4742
0
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4743
0
             num_ref_frame_pair_match_thresh >= 0);
4744
0
      int num_ref_frame_pair_match = 0;
4745
4746
0
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4747
0
      num_ref_frame_pair_match +=
4748
0
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4749
4750
      // Pruning based on ref frame pair match with neighbors.
4751
0
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4752
0
    }
4753
0
  }
4754
4755
0
  int skip_motion_mode = 0;
4756
0
  if (mbmi->partition != PARTITION_NONE) {
4757
0
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4758
0
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4759
      // Since the compound ref modes depends on the motion estimation result of
4760
      // two single ref modes (best mv of single ref modes as the start point),
4761
      // if current single ref mode is marked skip, we need to check if it will
4762
      // be used in compound ref modes.
4763
0
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4764
        // Found a not skipped compound ref mode which contains current
4765
        // single ref. So this single ref can't be skipped completely
4766
        // Just skip its motion mode search, still try its simple
4767
        // transition mode.
4768
0
        skip_motion_mode = 1;
4769
0
        skip_ref = 0;
4770
0
      }
4771
0
    }
4772
    // If we are reusing the prediction from cache, and the current frame is
4773
    // required by the cache, then we cannot prune it.
4774
0
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4775
0
      skip_ref = 0;
4776
      // If the cache only needs the current reference type for compound
4777
      // prediction, then we can skip motion mode search.
4778
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4779
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4780
0
    }
4781
0
    if (skip_ref) return 1;
4782
0
  }
4783
4784
0
  if (ref_frame[0] == INTRA_FRAME) {
4785
0
    if (mode != DC_PRED) {
4786
      // Disable intra modes other than DC_PRED for blocks with low variance
4787
      // Threshold for intra skipping based on source variance
4788
      // TODO(debargha): Specialize the threshold for super block sizes
4789
0
      const unsigned int skip_intra_var_thresh = 64;
4790
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4791
0
          x->source_variance < skip_intra_var_thresh)
4792
0
        return 1;
4793
0
    }
4794
0
  }
4795
4796
0
  if (skip_motion_mode) return 2;
4797
4798
0
  return 0;
4799
0
}
4800
4801
static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4802
                             const MV_REFERENCE_FRAME *ref_frames,
4803
0
                             const AV1_COMMON *cm) {
4804
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4805
0
  mbmi->ref_mv_idx = 0;
4806
0
  mbmi->mode = curr_mode;
4807
0
  mbmi->uv_mode = UV_DC_PRED;
4808
0
  mbmi->ref_frame[0] = ref_frames[0];
4809
0
  mbmi->ref_frame[1] = ref_frames[1];
4810
0
  pmi->palette_size[0] = 0;
4811
0
  pmi->palette_size[1] = 0;
4812
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4813
0
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4814
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4815
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4816
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4817
0
}
4818
4819
static inline void collect_single_states(MACROBLOCK *x,
4820
                                         InterModeSearchState *search_state,
4821
0
                                         const MB_MODE_INFO *const mbmi) {
4822
0
  int i, j;
4823
0
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4824
0
  const PREDICTION_MODE this_mode = mbmi->mode;
4825
0
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4826
0
  const int mode_offset = INTER_OFFSET(this_mode);
4827
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4828
4829
  // Simple rd
4830
0
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4831
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4832
0
    const int64_t rd =
4833
0
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4834
0
    if (rd < simple_rd) simple_rd = rd;
4835
0
  }
4836
4837
  // Insertion sort of single_state
4838
0
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4839
0
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4840
0
  i = search_state->single_state_cnt[dir][mode_offset];
4841
0
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4842
0
    state_s[j] = state_s[j - 1];
4843
0
  state_s[j] = this_state_s;
4844
0
  search_state->single_state_cnt[dir][mode_offset]++;
4845
4846
  // Modelled rd
4847
0
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4848
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4849
0
    const int64_t rd =
4850
0
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4851
0
    if (rd < modelled_rd) modelled_rd = rd;
4852
0
  }
4853
4854
  // Insertion sort of single_state_modelled
4855
0
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4856
0
  SingleInterModeState *state_m =
4857
0
      search_state->single_state_modelled[dir][mode_offset];
4858
0
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4859
0
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4860
0
    state_m[j] = state_m[j - 1];
4861
0
  state_m[j] = this_state_m;
4862
0
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4863
0
}
4864
4865
static inline void analyze_single_states(const AV1_COMP *cpi,
4866
0
                                         InterModeSearchState *search_state) {
4867
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4868
0
  assert(prune_level >= 1);
4869
0
  int i, j, dir, mode;
4870
4871
0
  for (dir = 0; dir < 2; ++dir) {
4872
0
    int64_t best_rd;
4873
0
    SingleInterModeState(*state)[FWD_REFS];
4874
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4875
4876
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4877
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4878
    // have same motion vectors). Always keep the best of each mode
4879
    // because it might form the best possible combination with other mode.
4880
0
    state = search_state->single_state[dir];
4881
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4882
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4883
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4884
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4885
0
        if (state[mode][i].rd != INT64_MAX &&
4886
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4887
0
          state[mode][i].valid = 0;
4888
0
        }
4889
0
      }
4890
0
    }
4891
4892
0
    state = search_state->single_state_modelled[dir];
4893
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4894
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4895
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4896
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4897
0
        if (state[mode][i].rd != INT64_MAX &&
4898
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4899
0
          state[mode][i].valid = 0;
4900
0
        }
4901
0
      }
4902
0
    }
4903
0
  }
4904
4905
  // Ordering by simple rd first, then by modelled rd
4906
0
  for (dir = 0; dir < 2; ++dir) {
4907
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4908
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4909
0
      const int state_cnt_m =
4910
0
          search_state->single_state_modelled_cnt[dir][mode];
4911
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4912
0
      SingleInterModeState *state_m =
4913
0
          search_state->single_state_modelled[dir][mode];
4914
0
      int count = 0;
4915
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4916
0
      for (i = 0; i < state_cnt_s; ++i) {
4917
0
        if (state_s[i].rd == INT64_MAX) break;
4918
0
        if (state_s[i].valid) {
4919
0
          search_state->single_rd_order[dir][mode][count++] =
4920
0
              state_s[i].ref_frame;
4921
0
        }
4922
0
      }
4923
0
      if (count >= max_candidates) continue;
4924
4925
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4926
0
        if (state_m[i].rd == INT64_MAX) break;
4927
0
        if (!state_m[i].valid) continue;
4928
0
        const int ref_frame = state_m[i].ref_frame;
4929
0
        int match = 0;
4930
        // Check if existing already
4931
0
        for (j = 0; j < count; ++j) {
4932
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4933
0
            match = 1;
4934
0
            break;
4935
0
          }
4936
0
        }
4937
0
        if (match) continue;
4938
        // Check if this ref_frame is removed in simple rd
4939
0
        int valid = 1;
4940
0
        for (j = 0; j < state_cnt_s; ++j) {
4941
0
          if (ref_frame == state_s[j].ref_frame) {
4942
0
            valid = state_s[j].valid;
4943
0
            break;
4944
0
          }
4945
0
        }
4946
0
        if (valid) {
4947
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4948
0
        }
4949
0
      }
4950
0
    }
4951
0
  }
4952
0
}
4953
4954
static int compound_skip_get_candidates(
4955
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4956
0
    const int dir, const PREDICTION_MODE mode) {
4957
0
  const int mode_offset = INTER_OFFSET(mode);
4958
0
  const SingleInterModeState *state =
4959
0
      search_state->single_state[dir][mode_offset];
4960
0
  const SingleInterModeState *state_modelled =
4961
0
      search_state->single_state_modelled[dir][mode_offset];
4962
4963
0
  int max_candidates = 0;
4964
0
  for (int i = 0; i < FWD_REFS; ++i) {
4965
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4966
0
    max_candidates++;
4967
0
  }
4968
4969
0
  int candidates = max_candidates;
4970
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4971
0
    candidates = AOMMIN(2, max_candidates);
4972
0
  }
4973
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4974
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4975
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4976
0
      candidates = 1;
4977
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4978
0
  }
4979
4980
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4981
    // Limit the number of candidates to 1 in each direction for compound
4982
    // prediction
4983
0
    candidates = AOMMIN(1, candidates);
4984
0
  }
4985
0
  return candidates;
4986
0
}
4987
4988
static AOM_FORCE_INLINE int compound_skip_by_single_states(
4989
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4990
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4991
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4992
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4993
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4994
0
                        compound_ref1_mode(this_mode) };
4995
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4996
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4997
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4998
0
  int ref_searched[2] = { 0, 0 };
4999
0
  int ref_mv_match[2] = { 1, 1 };
5000
0
  int i, j;
5001
5002
0
  for (i = 0; i < 2; ++i) {
5003
0
    const SingleInterModeState *state =
5004
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
5005
0
    const int state_cnt =
5006
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
5007
0
    for (j = 0; j < state_cnt; ++j) {
5008
0
      if (state[j].ref_frame == refs[i]) {
5009
0
        ref_searched[i] = 1;
5010
0
        break;
5011
0
      }
5012
0
    }
5013
0
  }
5014
5015
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
5016
0
  for (i = 0; i < 2; ++i) {
5017
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
5018
0
      continue;
5019
0
    }
5020
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
5021
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
5022
0
      int_mv single_mv;
5023
0
      int_mv comp_mv;
5024
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
5025
0
                  &x->mbmi_ext);
5026
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
5027
0
      if (single_mv.as_int != comp_mv.as_int) {
5028
0
        ref_mv_match[i] = 0;
5029
0
        break;
5030
0
      }
5031
0
    }
5032
0
  }
5033
5034
0
  for (i = 0; i < 2; ++i) {
5035
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
5036
0
    const int candidates =
5037
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
5038
0
    const MV_REFERENCE_FRAME *ref_order =
5039
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
5040
0
    int match = 0;
5041
0
    for (j = 0; j < candidates; ++j) {
5042
0
      if (refs[i] == ref_order[j]) {
5043
0
        match = 1;
5044
0
        break;
5045
0
      }
5046
0
    }
5047
0
    if (!match) return 1;
5048
0
  }
5049
5050
0
  return 0;
5051
0
}
5052
5053
// Check if ref frames of current block matches with given block.
5054
static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
5055
                                   const MV_REFERENCE_FRAME *ref_frames,
5056
0
                                   int *const is_ref_match) {
5057
0
  if (is_inter_block(mbmi)) {
5058
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
5059
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
5060
0
    if (has_second_ref(mbmi)) {
5061
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
5062
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
5063
0
    }
5064
0
  }
5065
0
}
5066
5067
// Prune compound mode using ref frames of neighbor blocks.
5068
static inline int compound_skip_using_neighbor_refs(
5069
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
5070
0
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
5071
  // Exclude non-extended compound modes from pruning
5072
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5073
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5074
0
    return 0;
5075
5076
0
  if (prune_ext_comp_using_neighbors >= 3) return 1;
5077
5078
0
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
5079
                                // 1 - match for backward refs
5080
  // Check if ref frames of this block matches with left neighbor.
5081
0
  if (xd->left_available)
5082
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
5083
5084
  // Check if ref frames of this block matches with above neighbor.
5085
0
  if (xd->up_available)
5086
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
5087
5088
  // Combine ref frame match with neighbors in forward and backward refs.
5089
0
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
5090
5091
  // Pruning based on ref frame match with neighbors.
5092
0
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
5093
0
  return 1;
5094
0
}
5095
5096
// Update best single mode for the given reference frame based on simple rd.
5097
static inline void update_best_single_mode(InterModeSearchState *search_state,
5098
                                           const PREDICTION_MODE this_mode,
5099
                                           const MV_REFERENCE_FRAME ref_frame,
5100
0
                                           int64_t this_rd) {
5101
0
  if (this_rd < search_state->best_single_rd[ref_frame]) {
5102
0
    search_state->best_single_rd[ref_frame] = this_rd;
5103
0
    search_state->best_single_mode[ref_frame] = this_mode;
5104
0
  }
5105
0
}
5106
5107
// Prune compound mode using best single mode for the same reference.
5108
static inline int skip_compound_using_best_single_mode_ref(
5109
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
5110
    const PREDICTION_MODE *best_single_mode,
5111
0
    int prune_comp_using_best_single_mode_ref) {
5112
  // Exclude non-extended compound modes from pruning
5113
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5114
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5115
0
    return 0;
5116
5117
0
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
5118
0
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
5119
  // Get ref frame direction corresponding to NEWMV
5120
  // 0 - NEWMV corresponding to forward direction
5121
  // 1 - NEWMV corresponding to backward direction
5122
0
  const int newmv_dir = comp_mode_ref0 != NEWMV;
5123
5124
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
5125
  // have NEWMV as single mode winner.
5126
  // Example: For an extended-compound mode,
5127
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
5128
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
5129
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
5130
  //   ALTREF_FRAME is NEWMV
5131
0
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
5132
0
  if (single_mode == NEWMV) return 0;
5133
5134
  // Avoid pruning the compound mode when best single mode is not available
5135
0
  if (prune_comp_using_best_single_mode_ref == 1)
5136
0
    if (single_mode == MB_MODE_COUNT) return 0;
5137
0
  return 1;
5138
0
}
5139
5140
0
static int compare_int64(const void *a, const void *b) {
5141
0
  int64_t a64 = *((int64_t *)a);
5142
0
  int64_t b64 = *((int64_t *)b);
5143
0
  if (a64 < b64) {
5144
0
    return -1;
5145
0
  } else if (a64 == b64) {
5146
0
    return 0;
5147
0
  } else {
5148
0
    return 1;
5149
0
  }
5150
0
}
5151
5152
static inline void update_search_state(
5153
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
5154
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
5155
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
5156
0
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
5157
0
  const MACROBLOCKD *xd = &x->e_mbd;
5158
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
5159
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5160
0
  const int skip_txfm =
5161
0
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
5162
5163
0
  search_state->best_rd = new_best_rd_stats->rdcost;
5164
0
  search_state->best_mode_index = new_best_mode;
5165
0
  *best_rd_stats_dst = *new_best_rd_stats;
5166
0
  search_state->best_mbmode = *mbmi;
5167
0
  search_state->best_skip2 = skip_txfm;
5168
0
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
5169
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
5170
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
5171
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
5172
  // These two values will be updated when av1_txfm_search is called.
5173
0
  if (txfm_search_done) {
5174
0
    search_state->best_rate_y =
5175
0
        new_best_rd_stats_y->rate +
5176
0
        x->mode_costs.skip_txfm_cost[skip_ctx]
5177
0
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
5178
0
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
5179
0
  }
5180
0
  search_state->best_y_rdcost = *new_best_rd_stats_y;
5181
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5182
0
}
5183
5184
// Find the best RD for a reference frame (among single reference modes)
5185
// and store +10% of it in the 0-th element in ref_frame_rd.
5186
0
static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
5187
0
  assert(ref_frame_rd[0] == INT64_MAX);
5188
0
  int64_t ref_copy[REF_FRAMES - 1];
5189
0
  memcpy(ref_copy, ref_frame_rd + 1,
5190
0
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
5191
0
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
5192
5193
0
  int64_t cutoff = ref_copy[0];
5194
  // The cut-off is within 10% of the best.
5195
0
  if (cutoff != INT64_MAX) {
5196
0
    assert(cutoff < INT64_MAX / 200);
5197
0
    cutoff = (110 * cutoff) / 100;
5198
0
  }
5199
0
  ref_frame_rd[0] = cutoff;
5200
0
}
5201
5202
// Check if either frame is within the cutoff.
5203
static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
5204
                                        MV_REFERENCE_FRAME frame1,
5205
0
                                        MV_REFERENCE_FRAME frame2) {
5206
0
  assert(frame2 > 0);
5207
0
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
5208
0
         ref_frame_rd[frame2] <= ref_frame_rd[0];
5209
0
}
5210
5211
static inline void evaluate_motion_mode_for_winner_candidates(
5212
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
5213
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
5214
    PICK_MODE_CONTEXT *const ctx,
5215
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
5216
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
5217
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
5218
0
    InterModeSearchState *const search_state, int64_t *yrd) {
5219
0
  const AV1_COMMON *const cm = &cpi->common;
5220
0
  const int num_planes = av1_num_planes(cm);
5221
0
  MACROBLOCKD *const xd = &x->e_mbd;
5222
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5223
0
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
5224
0
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
5225
5226
0
  for (int cand = 0; cand < num_best_cand; cand++) {
5227
0
    RD_STATS rd_stats;
5228
0
    RD_STATS rd_stats_y;
5229
0
    RD_STATS rd_stats_uv;
5230
0
    av1_init_rd_stats(&rd_stats);
5231
0
    av1_init_rd_stats(&rd_stats_y);
5232
0
    av1_init_rd_stats(&rd_stats_uv);
5233
0
    int rate_mv;
5234
5235
0
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
5236
0
    args->skip_motion_mode =
5237
0
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
5238
0
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
5239
0
    rd_stats.rate =
5240
0
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
5241
5242
    // Continue if the best candidate is compound.
5243
0
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
5244
5245
0
    x->txfm_search_info.skip_txfm = 0;
5246
0
    struct macroblockd_plane *pd = xd->plane;
5247
0
    const BUFFER_SET orig_dst = {
5248
0
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
5249
0
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
5250
0
    };
5251
5252
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5253
    // Initialize motion mode to simple translation
5254
    // Calculation of switchable rate depends on it.
5255
0
    mbmi->motion_mode = 0;
5256
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5257
0
    for (int i = 0; i < num_planes; i++) {
5258
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5259
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5260
0
    }
5261
5262
0
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
5263
0
                           search_state->best_skip_rd[1] };
5264
0
    int64_t this_yrd = INT64_MAX;
5265
0
    int64_t ret_value = motion_mode_rd(
5266
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
5267
0
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
5268
0
        do_tx_search, inter_modes_info, 1, &this_yrd);
5269
5270
0
    if (ret_value != INT64_MAX) {
5271
0
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5272
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
5273
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5274
      // Collect mode stats for multiwinner mode processing
5275
0
      store_winner_mode_stats(
5276
0
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
5277
0
          mode_enum, NULL, bsize, rd_stats.rdcost,
5278
0
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
5279
5280
0
      int64_t best_scaled_rd = search_state->best_rd;
5281
0
      int64_t this_scaled_rd = rd_stats.rdcost;
5282
0
      if (search_state->best_mode_index != THR_INVALID)
5283
0
        increase_motion_mode_rd(&search_state->best_mbmode, mbmi,
5284
0
                                &best_scaled_rd, &this_scaled_rd,
5285
0
                                cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
5286
0
                                cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
5287
5288
0
      if (this_scaled_rd < best_scaled_rd) {
5289
0
        *yrd = this_yrd;
5290
0
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5291
0
                            &rd_stats_uv, mode_enum, x, do_tx_search);
5292
0
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
5293
0
      }
5294
0
    }
5295
0
  }
5296
0
}
5297
5298
/*!\cond */
5299
// Arguments for speed feature pruning of inter mode search
5300
typedef struct {
5301
  int *skip_motion_mode;
5302
  mode_skip_mask_t *mode_skip_mask;
5303
  InterModeSearchState *search_state;
5304
  int skip_ref_frame_mask;
5305
  int reach_first_comp_mode;
5306
  int mode_thresh_mul_fact;
5307
  int num_single_modes_processed;
5308
  int prune_cpd_using_sr_stats_ready;
5309
} InterModeSFArgs;
5310
/*!\endcond */
5311
5312
static AOM_FORCE_INLINE int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x,
5313
                                            const BLOCK_SIZE bsize,
5314
                                            int64_t *ref_frame_rd, int midx,
5315
                                            InterModeSFArgs *args,
5316
0
                                            int is_low_temp_var) {
5317
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5318
0
  MACROBLOCKD *const xd = &x->e_mbd;
5319
  // Get the actual prediction mode we are trying in this iteration
5320
0
  const THR_MODES mode_enum = av1_default_mode_order[midx];
5321
0
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5322
0
  const PREDICTION_MODE this_mode = mode_def->mode;
5323
0
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5324
0
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5325
0
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5326
0
  const int comp_pred = second_ref_frame > INTRA_FRAME;
5327
5328
0
  if (ref_frame == INTRA_FRAME) return 1;
5329
5330
0
  const FRAME_UPDATE_TYPE update_type =
5331
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5332
0
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5333
0
      comp_pred) {
5334
0
    return 1;
5335
0
  }
5336
5337
  // This is for real time encoding.
5338
0
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5339
0
      this_mode != NEARESTMV)
5340
0
    return 1;
5341
5342
  // Check if this mode should be skipped because it is incompatible with the
5343
  // current frame
5344
0
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5345
0
    return 1;
5346
0
  const int ret = inter_mode_search_order_independent_skip(
5347
0
      cpi, x, args->mode_skip_mask, args->search_state,
5348
0
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5349
0
  if (ret == 1) return 1;
5350
0
  *(args->skip_motion_mode) = (ret == 2);
5351
5352
  // We've reached the first compound prediction mode, get stats from the
5353
  // single reference predictors to help with pruning.
5354
  // Disable this pruning logic if interpolation filter search was skipped for
5355
  // single prediction modes as it can result in aggressive pruning of compound
5356
  // prediction modes due to the absence of modelled_rd populated by
5357
  // av1_interpolation_filter_search().
5358
  // TODO(Remya): Check the impact of the sf
5359
  // 'prune_comp_search_by_single_result' if compound prediction modes are
5360
  // enabled in future for REALTIME encode.
5361
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5362
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5363
0
      args->reach_first_comp_mode == 0) {
5364
0
    analyze_single_states(cpi, args->search_state);
5365
0
    args->reach_first_comp_mode = 1;
5366
0
  }
5367
5368
  // Prune aggressively when best mode is skippable.
5369
0
  int mul_fact = args->search_state->best_mode_skippable
5370
0
                     ? args->mode_thresh_mul_fact
5371
0
                     : (1 << MODE_THRESH_QBITS);
5372
0
  int64_t mode_threshold =
5373
0
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5374
0
      MODE_THRESH_QBITS;
5375
5376
0
  if (args->search_state->best_rd < mode_threshold) return 1;
5377
5378
  // Skip this compound mode based on the RD results from the single prediction
5379
  // modes
5380
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5381
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5382
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5383
0
                                       ref_frame, second_ref_frame, x))
5384
0
      return 1;
5385
0
  }
5386
5387
0
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5388
    // After we done with single reference modes, find the 2nd best RD
5389
    // for a reference frame. Only search compound modes that have a reference
5390
    // frame at least as good as the 2nd best.
5391
0
    if (!args->prune_cpd_using_sr_stats_ready &&
5392
0
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5393
0
      find_top_ref(ref_frame_rd);
5394
0
      args->prune_cpd_using_sr_stats_ready = 1;
5395
0
    }
5396
0
    if (args->prune_cpd_using_sr_stats_ready &&
5397
0
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5398
0
      return 1;
5399
0
  }
5400
5401
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5402
0
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5403
0
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5404
0
    return 1;
5405
0
  }
5406
5407
0
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5408
0
    if (compound_skip_using_neighbor_refs(
5409
0
            xd, this_mode, ref_frames,
5410
0
            sf->inter_sf.prune_ext_comp_using_neighbors))
5411
0
      return 1;
5412
0
  }
5413
5414
0
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5415
0
    if (skip_compound_using_best_single_mode_ref(
5416
0
            this_mode, ref_frames, args->search_state->best_single_mode,
5417
0
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5418
0
      return 1;
5419
0
  }
5420
5421
0
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5422
0
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5423
0
    if (skip_nearest_near_mv_using_refmv_weight(
5424
0
            x, this_mode, ref_frame_type,
5425
0
            args->search_state->best_mbmode.mode)) {
5426
      // Ensure the mode is pruned only when the current block has obtained a
5427
      // valid inter mode.
5428
0
      assert(is_inter_mode(args->search_state->best_mbmode.mode));
5429
0
      return 1;
5430
0
    }
5431
0
  }
5432
5433
0
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5434
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5435
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5436
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5437
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5438
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5439
0
        return 1;
5440
0
    }
5441
0
  }
5442
5443
0
  return 0;
5444
0
}
5445
5446
static void record_best_compound(REFERENCE_MODE reference_mode,
5447
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5448
                                 InterModeSearchState *search_state,
5449
0
                                 int compmode_cost) {
5450
0
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5451
5452
0
  if (reference_mode == REFERENCE_MODE_SELECT) {
5453
0
    single_rate = rd_stats->rate - compmode_cost;
5454
0
    hybrid_rate = rd_stats->rate;
5455
0
  } else {
5456
0
    single_rate = rd_stats->rate;
5457
0
    hybrid_rate = rd_stats->rate + compmode_cost;
5458
0
  }
5459
5460
0
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5461
0
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5462
5463
0
  if (!comp_pred) {
5464
0
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5465
0
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5466
0
  } else {
5467
0
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5468
0
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5469
0
  }
5470
0
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5471
0
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5472
0
}
5473
5474
// Does a transform search over a list of the best inter mode candidates.
5475
// This is called if the original mode search computed an RD estimate
5476
// for the transform search rather than doing a full search.
5477
static void tx_search_best_inter_candidates(
5478
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5479
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5480
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5481
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5482
0
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5483
0
  AV1_COMMON *const cm = &cpi->common;
5484
0
  MACROBLOCKD *const xd = &x->e_mbd;
5485
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5486
0
  const ModeCosts *mode_costs = &x->mode_costs;
5487
0
  const int num_planes = av1_num_planes(cm);
5488
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5489
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5490
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5491
0
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5492
0
  search_state->best_rd = best_rd_so_far;
5493
0
  search_state->best_mode_index = THR_INVALID;
5494
  // Initialize best mode stats for winner mode processing
5495
0
  x->winner_mode_count = 0;
5496
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5497
0
                          NULL, bsize, best_rd_so_far,
5498
0
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5499
0
  inter_modes_info->num =
5500
0
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5501
0
          ? inter_modes_info->num
5502
0
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5503
0
  const int64_t top_est_rd =
5504
0
      inter_modes_info->num > 0
5505
0
          ? inter_modes_info
5506
0
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5507
0
          : INT64_MAX;
5508
0
  *yrd = INT64_MAX;
5509
0
  int64_t best_rd_in_this_partition = INT64_MAX;
5510
0
  int num_inter_mode_cands = inter_modes_info->num;
5511
0
  int newmv_mode_evaled = 0;
5512
0
  int max_allowed_cands = INT_MAX;
5513
0
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5514
    // The bound on the no. of inter mode candidates, beyond which the
5515
    // candidates are limited if a newmv mode got evaluated, is set as
5516
    // max_allowed_cands + 1.
5517
0
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5518
0
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5519
0
    max_allowed_cands =
5520
0
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5521
0
  }
5522
5523
0
  int num_mode_thresh = INT_MAX;
5524
0
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5525
    // Bound the no. of transform searches per prediction mode beyond a
5526
    // threshold.
5527
0
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5528
0
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5529
0
    num_mode_thresh =
5530
0
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5531
0
  }
5532
5533
0
  int num_tx_cands = 0;
5534
0
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5535
  // Iterate over best inter mode candidates and perform tx search
5536
0
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5537
0
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5538
0
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5539
0
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5540
0
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5541
0
    if (curr_est_rd * 0.80 > top_est_rd) break;
5542
5543
0
    if (num_tx_cands > num_mode_thresh) {
5544
0
      if ((prediction_mode != NEARESTMV &&
5545
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5546
0
          (prediction_mode == NEARESTMV &&
5547
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5548
0
        continue;
5549
0
    }
5550
5551
0
    txfm_info->skip_txfm = 0;
5552
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5553
5554
    // Select prediction reference frames.
5555
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5556
0
    for (int i = 0; i < num_planes; i++) {
5557
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5558
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5559
0
    }
5560
5561
0
    bool is_predictor_built = false;
5562
5563
    // Initialize RD stats
5564
0
    RD_STATS rd_stats;
5565
0
    RD_STATS rd_stats_y;
5566
0
    RD_STATS rd_stats_uv;
5567
0
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5568
0
    int64_t skip_rd = INT64_MAX;
5569
0
    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5570
0
        cm->seq_params->enable_masked_compound,
5571
0
        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5572
0
        /*eval_motion_mode=*/0);
5573
0
    if (txfm_rd_gate_level) {
5574
      // Check if the mode is good enough based on skip RD
5575
0
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5576
0
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5577
0
      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5578
0
                                      skip_rd, txfm_rd_gate_level, 0);
5579
0
      if (!eval_txfm) continue;
5580
0
    }
5581
5582
    // Build the prediction for this mode
5583
0
    if (!is_predictor_built) {
5584
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5585
0
                                    av1_num_planes(cm) - 1);
5586
0
    }
5587
0
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5588
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5589
0
    }
5590
5591
0
    num_tx_cands++;
5592
0
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5593
0
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5594
0
    int64_t this_yrd = INT64_MAX;
5595
    // Do the transform search
5596
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5597
0
                         mode_rate, search_state->best_rd)) {
5598
0
      continue;
5599
0
    } else {
5600
0
      const int y_rate =
5601
0
          rd_stats.skip_txfm
5602
0
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5603
0
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5604
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5605
5606
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5607
0
        inter_mode_data_push(
5608
0
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5609
0
            rd_stats_y.rate + rd_stats_uv.rate +
5610
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5611
0
      }
5612
0
    }
5613
5614
0
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5615
5616
0
    const THR_MODES mode_enum = get_prediction_mode_idx(
5617
0
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5618
5619
    // Collect mode stats for multiwinner mode processing
5620
0
    const int txfm_search_done = 1;
5621
0
    store_winner_mode_stats(
5622
0
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5623
0
        NULL, bsize, rd_stats.rdcost,
5624
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5625
5626
0
    int64_t best_scaled_rd = search_state->best_rd;
5627
0
    int64_t this_scaled_rd = rd_stats.rdcost;
5628
0
    increase_motion_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5629
0
                            &this_scaled_rd,
5630
0
                            cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
5631
0
                            cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
5632
0
    if (this_scaled_rd < best_rd_in_this_partition) {
5633
0
      best_rd_in_this_partition = rd_stats.rdcost;
5634
0
      *yrd = this_yrd;
5635
0
    }
5636
5637
0
    if (this_scaled_rd < best_scaled_rd) {
5638
0
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5639
0
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5640
0
      search_state->best_skip_rd[0] = skip_rd;
5641
      // Limit the total number of modes to be evaluated if the first is valid
5642
      // and transform skip or compound
5643
0
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5644
0
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5645
          // Evaluate more candidates at high quantizers where occurrence of
5646
          // transform skip is high.
5647
0
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5648
0
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5649
0
          num_inter_mode_cands =
5650
0
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5651
0
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5652
0
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5653
          // Evaluate more candidates at low quantizers where occurrence of
5654
          // single reference mode is high.
5655
0
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5656
0
                                                { 10, 7, 5, 3 } };
5657
0
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5658
0
          num_inter_mode_cands = AOMMIN(
5659
0
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5660
0
        }
5661
0
      }
5662
0
    }
5663
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5664
    // a newmv mode was evaluated already.
5665
0
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5666
0
  }
5667
0
}
5668
5669
// Indicates number of winner simple translation modes to be used
5670
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5671
5672
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5673
// speed feature. This list consists of modes that have only searched
5674
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5675
// modes after the initial RD search.
5676
static void handle_winner_cand(
5677
    MB_MODE_INFO *const mbmi,
5678
    motion_mode_best_st_candidate *best_motion_mode_cands,
5679
    int max_winner_motion_mode_cand, int64_t this_rd,
5680
0
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5681
  // Number of current motion mode candidates in list
5682
0
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5683
0
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5684
5685
  // find the best location to insert new motion mode candidate
5686
0
  for (int j = 0; j < num_motion_mode_cand; j++) {
5687
0
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5688
0
      valid_motion_mode_cand_loc = j;
5689
0
      break;
5690
0
    }
5691
0
  }
5692
5693
  // Insert motion mode if location is found
5694
0
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5695
0
    if (num_motion_mode_cand > 0 &&
5696
0
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5697
0
      memmove(
5698
0
          &best_motion_mode_cands
5699
0
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5700
0
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5701
0
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5702
0
           valid_motion_mode_cand_loc) *
5703
0
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5704
0
    motion_mode_cand->mbmi = *mbmi;
5705
0
    motion_mode_cand->rd_cost = this_rd;
5706
0
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5707
0
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5708
0
        *motion_mode_cand;
5709
0
    best_motion_mode_cands->num_motion_mode_cand =
5710
0
        AOMMIN(max_winner_motion_mode_cand,
5711
0
               best_motion_mode_cands->num_motion_mode_cand + 1);
5712
0
  }
5713
0
}
5714
5715
/*!\brief Search intra modes in interframes
5716
 *
5717
 * \ingroup intra_mode_search
5718
 *
5719
 * This function searches for the best intra mode when the current frame is an
5720
 * interframe. This function however does *not* handle luma palette mode.
5721
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5722
 *
5723
 * This function will first iterate through the luma mode candidates to find the
5724
 * best luma intra mode. Once the best luma mode it's found, it will then search
5725
 * for the best chroma mode. Because palette mode is currently not handled by
5726
 * here, a cache of uv mode is stored in
5727
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5728
 * av1_search_palette_mode.
5729
 *
5730
 * \param[in,out] search_state      Struct keep track of the prediction mode
5731
 *                                  search state in interframe.
5732
 *
5733
 * \param[in]     cpi               Top-level encoder structure.
5734
 * \param[in,out] x                 Pointer to struct holding all the data for
5735
 *                                  the current prediction block.
5736
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5737
 *                                  prediction modes searched.
5738
 * \param[in]     bsize             Current block size.
5739
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5740
 *                                  copy the tx_type and txfm_skip arrays.
5741
 *                                  for only the Y plane.
5742
 * \param[in]     sf_args           Stores the list of intra mode candidates
5743
 *                                  to be searched.
5744
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5745
 *                                      current ref frame is an intra frame.
5746
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5747
 *                                  terminate chroma intra mode search.
5748
 *
5749
 * \remark If a new best mode is found, search_state and rd_costs are updated
5750
 * correspondingly. While x is also modified, it is only used as a temporary
5751
 * buffer, and the final decisions are stored in search_state.
5752
 */
5753
static inline void search_intra_modes_in_interframe(
5754
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5755
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5756
    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5757
0
    int64_t yrd_threshold) {
5758
0
  const AV1_COMMON *const cm = &cpi->common;
5759
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5760
0
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5761
0
  MACROBLOCKD *const xd = &x->e_mbd;
5762
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5763
0
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5764
5765
0
  int is_best_y_mode_intra = 0;
5766
0
  RD_STATS best_intra_rd_stats_y;
5767
0
  int64_t best_rd_y = INT64_MAX;
5768
0
  int best_mode_cost_y = -1;
5769
0
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5770
0
  THR_MODES best_mode_enum = THR_INVALID;
5771
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5772
0
  const int num_4x4 = bsize_to_num_blk(bsize);
5773
5774
  // Performs luma search
5775
0
  int64_t best_model_rd = INT64_MAX;
5776
0
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5777
0
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5778
0
    top_intra_model_rd[i] = INT64_MAX;
5779
0
  }
5780
5781
0
  if (cpi->oxcf.algo_cfg.sharpness) {
5782
0
    int bh = mi_size_high[bsize];
5783
0
    int bw = mi_size_wide[bsize];
5784
0
    if (bh > 4 || bw > 4) return;
5785
0
  }
5786
5787
0
  mbmi->skip_txfm = 0;
5788
5789
0
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5790
0
    if (sf->intra_sf.skip_intra_in_interframe &&
5791
0
        search_state->intra_search_state.skip_intra_modes)
5792
0
      break;
5793
0
    set_y_mode_and_delta_angle(
5794
0
        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5795
0
    assert(mbmi->mode < INTRA_MODE_END);
5796
5797
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5798
0
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5799
0
      continue;
5800
5801
0
    const THR_MODES mode_enum =
5802
0
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5803
0
    if ((!intra_mode_cfg->enable_smooth_intra ||
5804
0
         cpi->sf.intra_sf.disable_smooth_intra) &&
5805
0
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5806
0
         mbmi->mode == SMOOTH_V_PRED))
5807
0
      continue;
5808
0
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5809
0
      continue;
5810
0
    if (av1_is_directional_mode(mbmi->mode) &&
5811
0
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5812
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5813
0
      continue;
5814
0
    const PREDICTION_MODE this_mode = mbmi->mode;
5815
5816
0
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5817
0
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5818
0
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5819
0
    x->txfm_search_info.skip_txfm = 0;
5820
5821
0
    if (this_mode != DC_PRED) {
5822
      // Only search the oblique modes if the best so far is
5823
      // one of the neighboring directional modes
5824
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5825
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5826
0
        if (search_state->best_mode_index != THR_INVALID &&
5827
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5828
0
          continue;
5829
0
      }
5830
0
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5831
0
        if (conditional_skipintra(
5832
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5833
0
          continue;
5834
0
      }
5835
0
    }
5836
5837
0
    RD_STATS intra_rd_stats_y;
5838
0
    int mode_cost_y;
5839
0
    int64_t intra_rd_y = INT64_MAX;
5840
0
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5841
0
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5842
0
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5843
0
        &best_model_rd, top_intra_model_rd);
5844
5845
0
    if (intra_rd_y < INT64_MAX) {
5846
0
      adjust_cost(cpi, x, &intra_rd_y, /*is_inter_pred=*/false);
5847
0
    }
5848
5849
0
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5850
0
      is_best_y_mode_intra = 1;
5851
0
      if (intra_rd_y < best_rd_y) {
5852
0
        best_intra_rd_stats_y = intra_rd_stats_y;
5853
0
        best_mode_cost_y = mode_cost_y;
5854
0
        best_rd_y = intra_rd_y;
5855
0
        best_mbmi = *mbmi;
5856
0
        best_mode_enum = mode_enum;
5857
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5858
0
      }
5859
0
    }
5860
0
  }
5861
5862
0
  if (!is_best_y_mode_intra) {
5863
0
    return;
5864
0
  }
5865
5866
0
  assert(best_rd_y < INT64_MAX);
5867
5868
  // Restores the best luma mode
5869
0
  *mbmi = best_mbmi;
5870
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5871
5872
  // Performs chroma search
5873
0
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5874
0
  av1_init_rd_stats(&intra_rd_stats);
5875
0
  av1_init_rd_stats(&intra_rd_stats_uv);
5876
0
  const int num_planes = av1_num_planes(cm);
5877
0
  if (num_planes > 1) {
5878
0
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5879
0
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5880
0
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5881
5882
0
    if (!intra_uv_mode_valid) {
5883
0
      return;
5884
0
    }
5885
0
  }
5886
5887
  // Merge the luma and chroma rd stats
5888
0
  assert(best_mode_cost_y >= 0);
5889
0
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5890
0
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5891
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5892
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5893
    // (prediction granularity), so we account for it in the full rate,
5894
    // not the tokenonly rate.
5895
0
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5896
0
  }
5897
5898
0
  const ModeCosts *mode_costs = &x->mode_costs;
5899
0
  const PREDICTION_MODE mode = mbmi->mode;
5900
0
  if (num_planes > 1 && xd->is_chroma_ref) {
5901
0
    const int uv_mode_cost =
5902
0
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5903
0
    intra_rd_stats.rate +=
5904
0
        intra_rd_stats_uv.rate +
5905
0
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5906
0
  }
5907
5908
  // Intra block is always coded as non-skip
5909
0
  intra_rd_stats.skip_txfm = 0;
5910
0
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5911
  // Add in the cost of the no skip flag.
5912
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5913
0
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5914
  // Calculate the final RD estimate for this mode.
5915
0
  const int64_t this_rd =
5916
0
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5917
  // Keep record of best intra rd
5918
0
  if (this_rd < search_state->best_intra_rd) {
5919
0
    search_state->best_intra_rd = this_rd;
5920
0
    intra_search_state->best_intra_mode = mode;
5921
0
  }
5922
5923
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5924
0
    search_state->best_pred_rd[i] =
5925
0
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5926
0
  }
5927
5928
0
  intra_rd_stats.rdcost = this_rd;
5929
5930
0
  adjust_rdcost(cpi, x, &intra_rd_stats, /*is_inter_pred=*/false);
5931
5932
  // Collect mode stats for multiwinner mode processing
5933
0
  const int txfm_search_done = 1;
5934
0
  store_winner_mode_stats(
5935
0
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5936
0
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5937
0
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5938
0
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5939
0
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5940
0
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5941
0
                        best_mode_enum, x, txfm_search_done);
5942
0
  }
5943
0
}
5944
5945
// Initialize the table that stores best RD Costs of transform no-split.
5946
static inline void init_top_tx_no_split_rd_for_inter_modes(
5947
0
    MACROBLOCK *x, int prune_inter_tx_split_rd_eval_lvl) {
5948
0
  if (!prune_inter_tx_split_rd_eval_lvl) return;
5949
5950
0
  for (int i = 0; i < MAX_TX_BLOCKS_IN_MAX_SB; i++) {
5951
0
    for (int j = 0; j < TOP_INTER_TX_NO_SPLIT_COUNT; j++) {
5952
0
      x->top_inter_tx_no_split_rd[i][j] = INT64_MAX;
5953
0
    }
5954
0
  }
5955
0
}
5956
5957
#if !CONFIG_REALTIME_ONLY
5958
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5959
// features in intra mode pruning.
5960
static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5961
                                                MACROBLOCK *x, BLOCK_SIZE bsize,
5962
                                                int mi_row, int mi_col,
5963
                                                int64_t *inter_cost,
5964
0
                                                int64_t *intra_cost) {
5965
0
  const AV1_COMMON *const cm = &cpi->common;
5966
  // Only consider full SB.
5967
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5968
0
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5969
0
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5970
0
                  (block_size_high[sb_size] / tpl_bsize_1d);
5971
0
  SuperBlockEnc *sb_enc = &x->sb_enc;
5972
0
  if (sb_enc->tpl_data_count == len) {
5973
0
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5974
0
    const int tpl_stride = sb_enc->tpl_stride;
5975
0
    const int tplw = mi_size_wide[tpl_bsize];
5976
0
    const int tplh = mi_size_high[tpl_bsize];
5977
0
    const int nw = mi_size_wide[bsize] / tplw;
5978
0
    const int nh = mi_size_high[bsize] / tplh;
5979
0
    if (nw >= 1 && nh >= 1) {
5980
0
      const int of_h = mi_row % mi_size_high[sb_size];
5981
0
      const int of_w = mi_col % mi_size_wide[sb_size];
5982
0
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5983
5984
0
      for (int k = 0; k < nh; k++) {
5985
0
        for (int l = 0; l < nw; l++) {
5986
0
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5987
0
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5988
0
        }
5989
0
      }
5990
0
      *inter_cost /= nw * nh;
5991
0
      *intra_cost /= nw * nh;
5992
0
    }
5993
0
  }
5994
0
}
5995
#endif  // !CONFIG_REALTIME_ONLY
5996
5997
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5998
// intra mode search.
5999
static inline void skip_intra_modes_in_interframe(
6000
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
6001
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
6002
0
    int64_t inter_cost, int64_t intra_cost) {
6003
0
  MACROBLOCKD *const xd = &x->e_mbd;
6004
0
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
6005
0
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
6006
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
6007
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
6008
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
6009
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
6010
0
        x->source_variance > 128) {
6011
0
      search_state->intra_search_state.skip_intra_modes = 1;
6012
0
      return;
6013
0
    }
6014
0
  }
6015
6016
0
  const unsigned int src_var_thresh_intra_skip = 1;
6017
0
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
6018
0
  if (!(skip_intra_in_interframe &&
6019
0
        (x->source_variance > src_var_thresh_intra_skip)))
6020
0
    return;
6021
6022
  // Prune intra search based on best inter mode being transfrom skip.
6023
0
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
6024
0
    const int qindex_thresh[2] = { 200, MAXQ };
6025
0
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
6026
0
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
6027
0
        (x->qindex <= qindex_thresh[ind])) {
6028
0
      search_state->intra_search_state.skip_intra_modes = 1;
6029
0
      return;
6030
0
    } else if ((skip_intra_in_interframe >= 4) &&
6031
0
               (inter_cost < 0 || intra_cost < 0)) {
6032
0
      search_state->intra_search_state.skip_intra_modes = 1;
6033
0
      return;
6034
0
    }
6035
0
  }
6036
  // Use ML model to prune intra search.
6037
0
  if (inter_cost >= 0 && intra_cost >= 0) {
6038
0
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
6039
0
                                     ? &av1_intrap_nn_config
6040
0
                                     : &av1_intrap_hd_nn_config;
6041
0
    float nn_features[6];
6042
0
    float scores[2] = { 0.0f };
6043
6044
0
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
6045
0
    nn_features[1] = (float)mi_size_wide_log2[bsize];
6046
0
    nn_features[2] = (float)mi_size_high_log2[bsize];
6047
0
    nn_features[3] = (float)intra_cost;
6048
0
    nn_features[4] = (float)inter_cost;
6049
0
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
6050
0
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
6051
0
    nn_features[5] = (float)(ac_q_max / ac_q);
6052
6053
0
    av1_nn_predict(nn_features, nn_config, 1, scores);
6054
6055
    // For two parameters, the max prob returned from av1_nn_softmax equals
6056
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
6057
    // calling of av1_nn_softmax.
6058
0
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
6059
0
    assert(skip_intra_in_interframe <= 5);
6060
0
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
6061
0
      search_state->intra_search_state.skip_intra_modes = 1;
6062
0
    }
6063
0
  }
6064
0
}
6065
6066
static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
6067
0
                                             int is_single_pred) {
6068
0
  const MODE encoding_mode = cpi->oxcf.mode;
6069
0
  if (encoding_mode == REALTIME) {
6070
0
    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
6071
0
            (cpi->sf.interp_sf.skip_interp_filter_search ||
6072
0
             cpi->sf.winner_mode_sf.winner_mode_ifs));
6073
0
  } else if (encoding_mode == GOOD) {
6074
    // Skip interpolation filter search for single prediction modes.
6075
0
    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
6076
0
  }
6077
0
  return false;
6078
0
}
6079
6080
static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
6081
0
                                     BLOCK_SIZE bsize) {
6082
0
  const AV1_COMMON *const cm = &cpi->common;
6083
0
  const SPEED_FEATURES *const sf = &cpi->sf;
6084
6085
0
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
6086
0
      !sf->rt_sf.short_circuit_low_temp_var ||
6087
0
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
6088
0
    return 0;
6089
0
  }
6090
6091
0
  const int mi_row = x->e_mbd.mi_row;
6092
0
  const int mi_col = x->e_mbd.mi_col;
6093
0
  int is_low_temp_var = 0;
6094
6095
0
  if (cm->seq_params->sb_size == BLOCK_64X64)
6096
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
6097
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6098
0
  else
6099
0
    is_low_temp_var = av1_get_force_skip_low_temp_var(
6100
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6101
6102
0
  return is_low_temp_var;
6103
0
}
6104
6105
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
6106
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
6107
                            struct macroblock *x, struct RD_STATS *rd_cost,
6108
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
6109
0
                            int64_t best_rd_so_far) {
6110
0
  AV1_COMMON *const cm = &cpi->common;
6111
0
  const FeatureFlags *const features = &cm->features;
6112
0
  const int num_planes = av1_num_planes(cm);
6113
0
  const SPEED_FEATURES *const sf = &cpi->sf;
6114
0
  MACROBLOCKD *const xd = &x->e_mbd;
6115
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6116
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
6117
0
  int i;
6118
0
  const ModeCosts *mode_costs = &x->mode_costs;
6119
0
  const int *comp_inter_cost =
6120
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6121
6122
0
  InterModeSearchState search_state;
6123
0
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
6124
0
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
6125
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
6126
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
6127
0
  };
6128
6129
0
  init_top_tx_no_split_rd_for_inter_modes(
6130
0
      x, sf->tx_sf.prune_inter_tx_split_rd_eval_lvl);
6131
6132
0
  HandleInterModeArgs args = { { NULL },
6133
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
6134
0
                               { NULL },
6135
0
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
6136
0
                                 MAX_SB_SIZE >> 1 },
6137
0
                               NULL,
6138
0
                               NULL,
6139
0
                               NULL,
6140
0
                               search_state.modelled_rd,
6141
0
                               INT_MAX,
6142
0
                               INT_MAX,
6143
0
                               search_state.simple_rd,
6144
0
                               0,
6145
0
                               false,
6146
0
                               interintra_modes,
6147
0
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
6148
0
                               { { 0, 0 } },
6149
0
                               { 0 },
6150
0
                               0,
6151
0
                               0,
6152
0
                               -1,
6153
0
                               -1,
6154
0
                               -1,
6155
0
                               { 0 },
6156
0
                               { 0 },
6157
0
                               UINT_MAX };
6158
  // Currently, is_low_temp_var is used in real time encoding.
6159
0
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
6160
6161
0
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
6162
  // Indicates the appropriate number of simple translation winner modes for
6163
  // exhaustive motion mode evaluation
6164
0
  const int max_winner_motion_mode_cand =
6165
0
      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
6166
0
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
6167
0
  motion_mode_candidate motion_mode_cand;
6168
0
  motion_mode_best_st_candidate best_motion_mode_cands;
6169
  // Initializing the number of motion mode candidates to zero.
6170
0
  best_motion_mode_cands.num_motion_mode_cand = 0;
6171
0
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
6172
0
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
6173
6174
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6175
6176
0
  av1_invalid_rd_stats(rd_cost);
6177
6178
0
  for (i = 0; i < REF_FRAMES; ++i) {
6179
0
    x->warp_sample_info[i].num = -1;
6180
0
  }
6181
6182
  // Ref frames that are selected by square partition blocks.
6183
0
  int picked_ref_frames_mask = 0;
6184
0
  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
6185
0
      mbmi->partition != PARTITION_NONE) {
6186
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
6187
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
6188
    // implies prune for vert, horiz and extended partition blocks.
6189
0
    if ((mbmi->partition != PARTITION_VERT &&
6190
0
         mbmi->partition != PARTITION_HORZ) ||
6191
0
        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
6192
0
      picked_ref_frames_mask =
6193
0
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
6194
0
    }
6195
0
  }
6196
6197
#if CONFIG_COLLECT_COMPONENT_TIMING
6198
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
6199
#endif
6200
  // Skip ref frames that never selected by square blocks.
6201
0
  const int skip_ref_frame_mask =
6202
0
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
6203
0
  mode_skip_mask_t mode_skip_mask;
6204
0
  unsigned int ref_costs_single[REF_FRAMES];
6205
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6206
0
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
6207
  // init params, set frame modes, speed features
6208
0
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
6209
0
                                skip_ref_frame_mask, ref_costs_single,
6210
0
                                ref_costs_comp, yv12_mb);
6211
#if CONFIG_COLLECT_COMPONENT_TIMING
6212
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
6213
#endif
6214
6215
0
  int64_t best_est_rd = INT64_MAX;
6216
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
6217
  // If do_tx_search is 0, only estimated RD should be computed.
6218
  // If do_tx_search is 1, all modes have TX search performed.
6219
0
  const int do_tx_search =
6220
0
      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
6221
0
        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
6222
0
         num_pels_log2_lookup[bsize] > 8));
6223
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
6224
0
  inter_modes_info->num = 0;
6225
6226
  // Temporary buffers used by handle_inter_mode().
6227
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
6228
6229
  // The best RD found for the reference frame, among single reference modes.
6230
  // Note that the 0-th element will contain a cut-off that is later used
6231
  // to determine if we should skip a compound mode.
6232
0
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
6233
0
                                       INT64_MAX, INT64_MAX, INT64_MAX,
6234
0
                                       INT64_MAX, INT64_MAX };
6235
6236
  // Prepared stats used later to check if we could skip intra mode eval.
6237
0
  int64_t inter_cost = -1;
6238
0
  int64_t intra_cost = -1;
6239
  // Need to tweak the threshold for hdres speed 0 & 1.
6240
0
  const int mi_row = xd->mi_row;
6241
0
  const int mi_col = xd->mi_col;
6242
6243
  // Obtain the relevant tpl stats for pruning inter modes
6244
0
  PruneInfoFromTpl inter_cost_info_from_tpl;
6245
0
#if !CONFIG_REALTIME_ONLY
6246
0
  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
6247
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
6248
    // prune_ref_by_selective_ref_frame()
6249
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
6250
    // prune_ref_by_selective_ref_frame()
6251
    // Populating valid_refs[idx] = 1 ensures that
6252
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
6253
    // pruned ref frame.
6254
0
    int valid_refs[INTER_REFS_PER_FRAME];
6255
0
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
6256
0
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
6257
0
      valid_refs[frame - 1] =
6258
0
          x->tpl_keep_ref_frame[frame] ||
6259
0
          !prune_ref_by_selective_ref_frame(
6260
0
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
6261
0
    }
6262
0
    av1_zero(inter_cost_info_from_tpl);
6263
0
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
6264
0
                              &inter_cost_info_from_tpl);
6265
0
  }
6266
6267
0
  const int do_pruning =
6268
0
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
6269
0
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
6270
0
      cpi->oxcf.algo_cfg.enable_tpl_model)
6271
0
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
6272
0
                                 &intra_cost);
6273
0
#endif  // !CONFIG_REALTIME_ONLY
6274
6275
  // Initialize best mode stats for winner mode processing.
6276
0
  const int max_winner_mode_count =
6277
0
      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
6278
0
  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
6279
0
  x->winner_mode_count = 0;
6280
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
6281
0
                          NULL, bsize, best_rd_so_far,
6282
0
                          sf->winner_mode_sf.multi_winner_mode_type, 0);
6283
6284
0
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
6285
0
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
6286
    // Higher multiplication factor values for lower quantizers.
6287
0
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
6288
0
  }
6289
6290
  // Initialize arguments for mode loop speed features
6291
0
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
6292
0
                              &mode_skip_mask,
6293
0
                              &search_state,
6294
0
                              skip_ref_frame_mask,
6295
0
                              0,
6296
0
                              mode_thresh_mul_fact,
6297
0
                              0,
6298
0
                              0 };
6299
0
  int64_t best_inter_yrd = INT64_MAX;
6300
6301
  // This is the main loop of this function. It loops over all possible inter
6302
  // modes and calls handle_inter_mode() to compute the RD for each.
6303
  // Here midx is just an iterator index that should not be used by itself
6304
  // except to keep track of the number of modes searched. It should be used
6305
  // with av1_default_mode_order to get the enum that defines the mode, which
6306
  // can be used with av1_mode_defs to get the prediction mode and the ref
6307
  // frames.
6308
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
6309
  // good speedup for real time case. If we decide to use compound mode in real
6310
  // time, maybe we can modify av1_default_mode_order table.
6311
0
  THR_MODES mode_start = THR_INTER_MODE_START;
6312
0
  THR_MODES mode_end = THR_INTER_MODE_END;
6313
0
  const CurrentFrame *const current_frame = &cm->current_frame;
6314
0
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
6315
0
    mode_start = SINGLE_REF_MODE_START;
6316
0
    mode_end = SINGLE_REF_MODE_END;
6317
0
  }
6318
0
  init_comp_avg_est_rd(x, sf->inter_sf.skip_cmp_using_top_cmp_avg_est_rd_lvl);
6319
0
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
6320
    // Get the actual prediction mode we are trying in this iteration
6321
0
    const THR_MODES mode_enum = av1_default_mode_order[midx];
6322
0
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
6323
0
    const PREDICTION_MODE this_mode = mode_def->mode;
6324
0
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
6325
6326
0
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
6327
0
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
6328
0
    const int is_single_pred =
6329
0
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
6330
0
    const int comp_pred = second_ref_frame > INTRA_FRAME;
6331
6332
0
    txfm_info->skip_txfm = 0;
6333
0
    sf_args.num_single_modes_processed += is_single_pred;
6334
#if CONFIG_COLLECT_COMPONENT_TIMING
6335
    start_timing(cpi, skip_inter_mode_time);
6336
#endif
6337
    // Apply speed features to decide if this inter mode can be skipped
6338
0
    const int is_skip_inter_mode = skip_inter_mode(
6339
0
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
6340
#if CONFIG_COLLECT_COMPONENT_TIMING
6341
    end_timing(cpi, skip_inter_mode_time);
6342
#endif
6343
0
    if (is_skip_inter_mode) continue;
6344
6345
0
    init_mbmi(mbmi, this_mode, ref_frames, cm);
6346
0
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
6347
6348
    // Select prediction reference frames.
6349
0
    for (i = 0; i < num_planes; i++) {
6350
0
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
6351
0
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
6352
0
    }
6353
6354
0
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
6355
0
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
6356
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
6357
0
    mbmi->ref_mv_idx = 0;
6358
6359
0
    const int64_t ref_best_rd = search_state.best_rd;
6360
0
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
6361
0
    av1_init_rd_stats(&rd_stats);
6362
6363
0
    const int ref_frame_cost = comp_pred
6364
0
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
6365
0
                                   : ref_costs_single[ref_frame];
6366
0
    const int compmode_cost =
6367
0
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6368
0
    const int real_compmode_cost =
6369
0
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6370
0
            ? compmode_cost
6371
0
            : 0;
6372
    // Point to variables that are maintained between loop iterations
6373
0
    args.single_newmv = search_state.single_newmv;
6374
0
    args.single_newmv_rate = search_state.single_newmv_rate;
6375
0
    args.single_newmv_valid = search_state.single_newmv_valid;
6376
0
    args.single_comp_cost = real_compmode_cost;
6377
0
    args.ref_frame_cost = ref_frame_cost;
6378
0
    args.best_pred_sse = search_state.best_pred_sse;
6379
0
    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6380
0
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6381
0
                           search_state.best_skip_rd[1] };
6382
0
    int64_t this_yrd = INT64_MAX;
6383
#if CONFIG_COLLECT_COMPONENT_TIMING
6384
    start_timing(cpi, handle_inter_mode_time);
6385
#endif
6386
0
    int64_t this_rd = handle_inter_mode(
6387
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6388
0
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6389
0
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6390
0
        &this_yrd);
6391
#if CONFIG_COLLECT_COMPONENT_TIMING
6392
    end_timing(cpi, handle_inter_mode_time);
6393
#endif
6394
0
    if (current_frame->reference_mode != SINGLE_REFERENCE) {
6395
0
      if (!args.skip_ifs &&
6396
0
          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6397
0
          is_inter_singleref_mode(this_mode)) {
6398
0
        collect_single_states(x, &search_state, mbmi);
6399
0
      }
6400
6401
0
      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6402
0
          is_inter_singleref_mode(this_mode))
6403
0
        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6404
0
    }
6405
6406
0
    if (this_rd == INT64_MAX) continue;
6407
6408
0
    if (mbmi->skip_txfm) {
6409
0
      rd_stats_y.rate = 0;
6410
0
      rd_stats_uv.rate = 0;
6411
0
    }
6412
6413
0
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6414
0
        this_rd < ref_frame_rd[ref_frame]) {
6415
0
      ref_frame_rd[ref_frame] = this_rd;
6416
0
    }
6417
6418
0
    adjust_cost(cpi, x, &this_rd, /*is_inter_pred=*/true);
6419
0
    adjust_rdcost(cpi, x, &rd_stats, /*is_inter_pred=*/true);
6420
6421
    // Did this mode help, i.e., is it the new best mode
6422
0
    if (this_rd < search_state.best_rd) {
6423
0
      assert(IMPLIES(comp_pred,
6424
0
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
6425
0
      search_state.best_pred_sse = x->pred_sse[ref_frame];
6426
0
      best_inter_yrd = this_yrd;
6427
0
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6428
0
                          &rd_stats_uv, mode_enum, x, do_tx_search);
6429
0
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6430
      // skip_rd[0] is the best total rd for a skip mode so far.
6431
      // skip_rd[1] is the best total rd for a skip mode so far in luma.
6432
      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6433
      // When do_tx_search = 0, skip_rd[1] is updated.
6434
0
      search_state.best_skip_rd[1] = skip_rd[1];
6435
0
    }
6436
0
    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6437
      // Add this mode to motion mode candidate list for motion mode search
6438
      // if using motion_mode_for_winner_cand speed feature
6439
0
      handle_winner_cand(mbmi, &best_motion_mode_cands,
6440
0
                         max_winner_motion_mode_cand, this_rd,
6441
0
                         &motion_mode_cand, args.skip_motion_mode);
6442
0
    }
6443
6444
    /* keep record of best compound/single-only prediction */
6445
0
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6446
0
                         x->rdmult, &search_state, compmode_cost);
6447
0
  }
6448
6449
#if CONFIG_COLLECT_COMPONENT_TIMING
6450
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6451
#endif
6452
0
  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6453
    // For the single ref winner candidates, evaluate other motion modes (non
6454
    // simple translation).
6455
0
    evaluate_motion_mode_for_winner_candidates(
6456
0
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6457
0
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6458
0
        &search_state, &best_inter_yrd);
6459
0
  }
6460
#if CONFIG_COLLECT_COMPONENT_TIMING
6461
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6462
#endif
6463
6464
#if CONFIG_COLLECT_COMPONENT_TIMING
6465
  start_timing(cpi, do_tx_search_time);
6466
#endif
6467
0
  if (do_tx_search != 1) {
6468
    // A full tx search has not yet been done, do tx search for
6469
    // top mode candidates
6470
0
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6471
0
                                    yv12_mb, mi_row, mi_col, &search_state,
6472
0
                                    rd_cost, ctx, &best_inter_yrd);
6473
0
  }
6474
#if CONFIG_COLLECT_COMPONENT_TIMING
6475
  end_timing(cpi, do_tx_search_time);
6476
#endif
6477
6478
#if CONFIG_COLLECT_COMPONENT_TIMING
6479
  start_timing(cpi, handle_intra_mode_time);
6480
#endif
6481
  // Gate intra mode evaluation if best of inter is skip except when source
6482
  // variance is extremely low and also based on max intra bsize.
6483
0
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6484
0
                                 intra_cost);
6485
6486
0
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6487
0
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6488
0
                                   &sf_args, intra_ref_frame_cost,
6489
0
                                   best_inter_yrd);
6490
#if CONFIG_COLLECT_COMPONENT_TIMING
6491
  end_timing(cpi, handle_intra_mode_time);
6492
#endif
6493
6494
#if CONFIG_COLLECT_COMPONENT_TIMING
6495
  start_timing(cpi, refine_winner_mode_tx_time);
6496
#endif
6497
0
  int winner_mode_count =
6498
0
      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6499
  // In effect only when fast tx search speed features are enabled.
6500
0
  refine_winner_mode_tx(
6501
0
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6502
0
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6503
0
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6504
#if CONFIG_COLLECT_COMPONENT_TIMING
6505
  end_timing(cpi, refine_winner_mode_tx_time);
6506
#endif
6507
6508
  // Initialize default mode evaluation params
6509
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6510
6511
  // Only try palette mode when the best mode so far is an intra mode.
6512
0
  const int try_palette =
6513
0
      cpi->oxcf.tool_cfg.enable_palette &&
6514
0
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6515
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6516
0
  RD_STATS this_rd_cost;
6517
0
  int this_skippable = 0;
6518
0
  if (try_palette) {
6519
#if CONFIG_COLLECT_COMPONENT_TIMING
6520
    start_timing(cpi, av1_search_palette_mode_time);
6521
#endif
6522
0
    this_skippable = av1_search_palette_mode(
6523
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6524
0
        ctx, &this_rd_cost, search_state.best_rd);
6525
#if CONFIG_COLLECT_COMPONENT_TIMING
6526
    end_timing(cpi, av1_search_palette_mode_time);
6527
#endif
6528
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6529
0
      search_state.best_mode_index = THR_DC;
6530
0
      mbmi->mv[0].as_int = 0;
6531
0
      rd_cost->rate = this_rd_cost.rate;
6532
0
      rd_cost->dist = this_rd_cost.dist;
6533
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6534
0
      search_state.best_rd = rd_cost->rdcost;
6535
0
      search_state.best_mbmode = *mbmi;
6536
0
      search_state.best_skip2 = 0;
6537
0
      search_state.best_mode_skippable = this_skippable;
6538
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6539
0
    }
6540
0
  }
6541
6542
0
  search_state.best_mbmode.skip_mode = 0;
6543
0
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6544
0
      cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) {
6545
0
    const struct segmentation *const seg = &cm->seg;
6546
0
    unsigned char segment_id = mbmi->segment_id;
6547
0
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6548
0
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6549
0
    }
6550
0
  }
6551
6552
  // Make sure that the ref_mv_idx is only nonzero when we're
6553
  // using a mode which can support ref_mv_idx
6554
0
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6555
0
      !(search_state.best_mbmode.mode == NEWMV ||
6556
0
        search_state.best_mbmode.mode == NEW_NEWMV ||
6557
0
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6558
0
    search_state.best_mbmode.ref_mv_idx = 0;
6559
0
  }
6560
6561
0
  if (search_state.best_mode_index == THR_INVALID ||
6562
0
      search_state.best_rd >= best_rd_so_far) {
6563
0
    rd_cost->rate = INT_MAX;
6564
0
    rd_cost->rdcost = INT64_MAX;
6565
0
    return;
6566
0
  }
6567
6568
0
  const InterpFilter interp_filter = features->interp_filter;
6569
0
  assert((interp_filter == SWITCHABLE) ||
6570
0
         (interp_filter ==
6571
0
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6572
0
         !is_inter_block(&search_state.best_mbmode));
6573
0
  assert((interp_filter == SWITCHABLE) ||
6574
0
         (interp_filter ==
6575
0
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6576
0
         !is_inter_block(&search_state.best_mbmode));
6577
6578
0
  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6579
0
    av1_update_rd_thresh_fact(
6580
0
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6581
0
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6582
0
  }
6583
6584
  // macroblock modes
6585
0
  *mbmi = search_state.best_mbmode;
6586
0
  txfm_info->skip_txfm |= search_state.best_skip2;
6587
6588
  // Note: this section is needed since the mode may have been forced to
6589
  // GLOBALMV by the all-zero mode handling of ref-mv.
6590
0
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6591
    // Correct the interp filters for GLOBALMV
6592
0
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6593
0
      int_interpfilters filters =
6594
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6595
0
      assert(mbmi->interp_filters.as_int == filters.as_int);
6596
0
      (void)filters;
6597
0
    }
6598
0
  }
6599
6600
0
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6601
6602
0
  assert(search_state.best_mode_index != THR_INVALID);
6603
6604
#if CONFIG_INTERNAL_STATS
6605
  store_coding_context(x, ctx, search_state.best_mode_index,
6606
                       search_state.best_mode_skippable);
6607
#else
6608
0
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6609
0
#endif  // CONFIG_INTERNAL_STATS
6610
6611
0
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6612
0
    assert(try_palette);
6613
0
    av1_restore_uv_color_map(cpi, x);
6614
0
  }
6615
0
}
6616
6617
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6618
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6619
                                        int mi_row, int mi_col,
6620
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6621
                                        PICK_MODE_CONTEXT *ctx,
6622
0
                                        int64_t best_rd_so_far) {
6623
0
  const AV1_COMMON *const cm = &cpi->common;
6624
0
  const FeatureFlags *const features = &cm->features;
6625
0
  MACROBLOCKD *const xd = &x->e_mbd;
6626
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6627
0
  unsigned char segment_id = mbmi->segment_id;
6628
0
  const int comp_pred = 0;
6629
0
  int i;
6630
0
  unsigned int ref_costs_single[REF_FRAMES];
6631
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6632
0
  const ModeCosts *mode_costs = &x->mode_costs;
6633
0
  const int *comp_inter_cost =
6634
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6635
0
  InterpFilter best_filter = SWITCHABLE;
6636
0
  int64_t this_rd = INT64_MAX;
6637
0
  int rate2 = 0;
6638
0
  const int64_t distortion2 = 0;
6639
0
  (void)mi_row;
6640
0
  (void)mi_col;
6641
0
  (void)tile_data;
6642
6643
0
  av1_collect_neighbors_ref_counts(xd);
6644
6645
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6646
0
                           ref_costs_comp);
6647
6648
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6649
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6650
6651
0
  rd_cost->rate = INT_MAX;
6652
6653
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6654
6655
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6656
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6657
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6658
0
  mbmi->mode = GLOBALMV;
6659
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6660
0
  mbmi->uv_mode = UV_DC_PRED;
6661
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6662
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6663
0
  else
6664
0
    mbmi->ref_frame[0] = LAST_FRAME;
6665
0
  mbmi->ref_frame[1] = NONE_FRAME;
6666
0
  mbmi->mv[0].as_int =
6667
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6668
0
                           features->allow_high_precision_mv, bsize, mi_col,
6669
0
                           mi_row, features->cur_frame_force_integer_mv)
6670
0
          .as_int;
6671
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6672
0
  x->txfm_search_info.skip_txfm = 1;
6673
6674
0
  mbmi->ref_mv_idx = 0;
6675
6676
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6677
0
  av1_count_overlappable_neighbors(cm, xd);
6678
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6679
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6680
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6681
    // Select the samples according to motion vector difference
6682
0
    if (mbmi->num_proj_ref > 1) {
6683
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6684
0
                                             mbmi->num_proj_ref, bsize);
6685
0
    }
6686
0
  }
6687
6688
0
  const InterpFilter interp_filter = features->interp_filter;
6689
0
  set_default_interp_filters(mbmi, interp_filter);
6690
6691
0
  if (interp_filter != SWITCHABLE) {
6692
0
    best_filter = interp_filter;
6693
0
  } else {
6694
0
    best_filter = EIGHTTAP_REGULAR;
6695
0
    if (av1_is_interp_needed(xd)) {
6696
0
      int rs;
6697
0
      int best_rs = INT_MAX;
6698
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6699
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6700
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6701
0
                                     cm->seq_params->enable_dual_filter);
6702
0
        if (rs < best_rs) {
6703
0
          best_rs = rs;
6704
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6705
0
        }
6706
0
      }
6707
0
    }
6708
0
  }
6709
  // Set the appropriate filter
6710
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6711
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6712
0
                                   cm->seq_params->enable_dual_filter);
6713
6714
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6715
0
    rate2 += comp_inter_cost[comp_pred];
6716
6717
  // Estimate the reference frame signaling cost and add it
6718
  // to the rolling cost variable.
6719
0
  rate2 += ref_costs_single[LAST_FRAME];
6720
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6721
6722
0
  rd_cost->rate = rate2;
6723
0
  rd_cost->dist = distortion2;
6724
0
  rd_cost->rdcost = this_rd;
6725
6726
0
  if (this_rd >= best_rd_so_far) {
6727
0
    rd_cost->rate = INT_MAX;
6728
0
    rd_cost->rdcost = INT64_MAX;
6729
0
    return;
6730
0
  }
6731
6732
0
  assert((interp_filter == SWITCHABLE) ||
6733
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6734
6735
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6736
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6737
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6738
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6739
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6740
0
  }
6741
6742
#if CONFIG_INTERNAL_STATS
6743
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6744
#else
6745
0
  store_coding_context(x, ctx, 0);
6746
0
#endif  // CONFIG_INTERNAL_STATS
6747
0
}
6748
6749
/*!\cond */
6750
struct calc_target_weighted_pred_ctxt {
6751
  const OBMCBuffer *obmc_buffer;
6752
  const uint8_t *tmp;
6753
  int tmp_stride;
6754
  int overlap;
6755
};
6756
/*!\endcond */
6757
6758
static inline void calc_target_weighted_pred_above(
6759
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6760
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6761
0
  (void)nb_mi;
6762
0
  (void)num_planes;
6763
0
  (void)rel_mi_row;
6764
0
  (void)dir;
6765
6766
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6767
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6768
6769
0
  const int bw = xd->width << MI_SIZE_LOG2;
6770
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6771
6772
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6773
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6774
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6775
0
  const int is_hbd = is_cur_buf_hbd(xd);
6776
6777
0
  if (!is_hbd) {
6778
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6779
0
      const uint8_t m0 = mask1d[row];
6780
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6781
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6782
0
        wsrc[col] = m1 * tmp[col];
6783
0
        mask[col] = m0;
6784
0
      }
6785
0
      wsrc += bw;
6786
0
      mask += bw;
6787
0
      tmp += ctxt->tmp_stride;
6788
0
    }
6789
0
  } else {
6790
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6791
6792
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6793
0
      const uint8_t m0 = mask1d[row];
6794
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6795
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6796
0
        wsrc[col] = m1 * tmp16[col];
6797
0
        mask[col] = m0;
6798
0
      }
6799
0
      wsrc += bw;
6800
0
      mask += bw;
6801
0
      tmp16 += ctxt->tmp_stride;
6802
0
    }
6803
0
  }
6804
0
}
6805
6806
static inline void calc_target_weighted_pred_left(
6807
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6808
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6809
0
  (void)nb_mi;
6810
0
  (void)num_planes;
6811
0
  (void)rel_mi_col;
6812
0
  (void)dir;
6813
6814
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6815
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6816
6817
0
  const int bw = xd->width << MI_SIZE_LOG2;
6818
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6819
6820
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6821
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6822
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6823
0
  const int is_hbd = is_cur_buf_hbd(xd);
6824
6825
0
  if (!is_hbd) {
6826
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6827
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6828
0
        const uint8_t m0 = mask1d[col];
6829
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6830
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6831
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6832
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6833
0
      }
6834
0
      wsrc += bw;
6835
0
      mask += bw;
6836
0
      tmp += ctxt->tmp_stride;
6837
0
    }
6838
0
  } else {
6839
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6840
6841
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6842
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6843
0
        const uint8_t m0 = mask1d[col];
6844
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6845
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6846
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6847
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6848
0
      }
6849
0
      wsrc += bw;
6850
0
      mask += bw;
6851
0
      tmp16 += ctxt->tmp_stride;
6852
0
    }
6853
0
  }
6854
0
}
6855
6856
// This function has a structure similar to av1_build_obmc_inter_prediction
6857
//
6858
// The OBMC predictor is computed as:
6859
//
6860
//  PObmc(x,y) =
6861
//    AOM_BLEND_A64(Mh(x),
6862
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6863
//                  PLeft(x, y))
6864
//
6865
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6866
// rounding, this can be written as:
6867
//
6868
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6869
//    Mh(x) * Mv(y) * P(x,y) +
6870
//      Mh(x) * Cv(y) * Pabove(x,y) +
6871
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6872
//
6873
// Where :
6874
//
6875
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6876
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6877
//
6878
// This function computes 'wsrc' and 'mask' as:
6879
//
6880
//  wsrc(x, y) =
6881
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6882
//      Mh(x) * Cv(y) * Pabove(x,y) +
6883
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6884
//
6885
//  mask(x, y) = Mh(x) * Mv(y)
6886
//
6887
// These can then be used to efficiently approximate the error for any
6888
// predictor P in the context of the provided neighbouring predictors by
6889
// computing:
6890
//
6891
//  error(x, y) =
6892
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6893
//
6894
static inline void calc_target_weighted_pred(
6895
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6896
    const uint8_t *above, int above_stride, const uint8_t *left,
6897
0
    int left_stride) {
6898
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6899
0
  const int bw = xd->width << MI_SIZE_LOG2;
6900
0
  const int bh = xd->height << MI_SIZE_LOG2;
6901
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6902
0
  int32_t *mask_buf = obmc_buffer->mask;
6903
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6904
6905
0
  const int is_hbd = is_cur_buf_hbd(xd);
6906
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6907
6908
  // plane 0 should not be sub-sampled
6909
0
  assert(xd->plane[0].subsampling_x == 0);
6910
0
  assert(xd->plane[0].subsampling_y == 0);
6911
6912
0
  av1_zero_array(wsrc_buf, bw * bh);
6913
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6914
6915
  // handle above row
6916
0
  if (xd->up_available) {
6917
0
    const int overlap =
6918
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6919
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6920
0
                                                   above_stride, overlap };
6921
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6922
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6923
0
                                  calc_target_weighted_pred_above, &ctxt);
6924
0
  }
6925
6926
0
  for (int i = 0; i < bw * bh; ++i) {
6927
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6928
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6929
0
  }
6930
6931
  // handle left column
6932
0
  if (xd->left_available) {
6933
0
    const int overlap =
6934
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6935
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6936
0
                                                   left_stride, overlap };
6937
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6938
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6939
0
                                 calc_target_weighted_pred_left, &ctxt);
6940
0
  }
6941
6942
0
  if (!is_hbd) {
6943
0
    const uint8_t *src = x->plane[0].src.buf;
6944
6945
0
    for (int row = 0; row < bh; ++row) {
6946
0
      for (int col = 0; col < bw; ++col) {
6947
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6948
0
      }
6949
0
      wsrc_buf += bw;
6950
0
      src += x->plane[0].src.stride;
6951
0
    }
6952
0
  } else {
6953
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6954
6955
0
    for (int row = 0; row < bh; ++row) {
6956
0
      for (int col = 0; col < bw; ++col) {
6957
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6958
0
      }
6959
0
      wsrc_buf += bw;
6960
0
      src += x->plane[0].src.stride;
6961
0
    }
6962
0
  }
6963
0
}