Coverage Report

Created: 2022-08-24 06:17

/src/aom/av1/encoder/rdopt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/av1_rtcd.h"
19
20
#include "aom_dsp/aom_dsp_common.h"
21
#include "aom_dsp/blend.h"
22
#include "aom_mem/aom_mem.h"
23
#include "aom_ports/aom_timer.h"
24
#include "aom_ports/mem.h"
25
26
#include "av1/common/av1_common_int.h"
27
#include "av1/common/cfl.h"
28
#include "av1/common/blockd.h"
29
#include "av1/common/common.h"
30
#include "av1/common/common_data.h"
31
#include "av1/common/entropy.h"
32
#include "av1/common/entropymode.h"
33
#include "av1/common/idct.h"
34
#include "av1/common/mvref_common.h"
35
#include "av1/common/obmc.h"
36
#include "av1/common/pred_common.h"
37
#include "av1/common/quant_common.h"
38
#include "av1/common/reconinter.h"
39
#include "av1/common/reconintra.h"
40
#include "av1/common/scan.h"
41
#include "av1/common/seg_common.h"
42
#include "av1/common/txb_common.h"
43
#include "av1/common/warped_motion.h"
44
45
#include "av1/encoder/aq_variance.h"
46
#include "av1/encoder/av1_quantize.h"
47
#include "av1/encoder/cost.h"
48
#include "av1/encoder/compound_type.h"
49
#include "av1/encoder/encodemb.h"
50
#include "av1/encoder/encodemv.h"
51
#include "av1/encoder/encoder.h"
52
#include "av1/encoder/encodetxb.h"
53
#include "av1/encoder/hybrid_fwd_txfm.h"
54
#include "av1/encoder/interp_search.h"
55
#include "av1/encoder/intra_mode_search.h"
56
#include "av1/encoder/intra_mode_search_utils.h"
57
#include "av1/encoder/mcomp.h"
58
#include "av1/encoder/ml.h"
59
#include "av1/encoder/mode_prune_model_weights.h"
60
#include "av1/encoder/model_rd.h"
61
#include "av1/encoder/motion_search_facade.h"
62
#include "av1/encoder/palette.h"
63
#include "av1/encoder/pustats.h"
64
#include "av1/encoder/random.h"
65
#include "av1/encoder/ratectrl.h"
66
#include "av1/encoder/rd.h"
67
#include "av1/encoder/rdopt.h"
68
#include "av1/encoder/reconinter_enc.h"
69
#include "av1/encoder/tokenize.h"
70
#include "av1/encoder/tpl_model.h"
71
#include "av1/encoder/tx_search.h"
72
#include "av1/encoder/var_based_part.h"
73
74
0
#define LAST_NEW_MV_INDEX 6
75
76
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77
// The values are kept in Q12 format and equation used to derive is
78
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79
0
#define MODE_THRESH_QBITS 12
80
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104
  4144,  4120,  4096
105
};
106
107
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108
  THR_NEARESTMV,
109
  THR_NEARESTL2,
110
  THR_NEARESTL3,
111
  THR_NEARESTB,
112
  THR_NEARESTA2,
113
  THR_NEARESTA,
114
  THR_NEARESTG,
115
116
  THR_NEWMV,
117
  THR_NEWL2,
118
  THR_NEWL3,
119
  THR_NEWB,
120
  THR_NEWA2,
121
  THR_NEWA,
122
  THR_NEWG,
123
124
  THR_NEARMV,
125
  THR_NEARL2,
126
  THR_NEARL3,
127
  THR_NEARB,
128
  THR_NEARA2,
129
  THR_NEARA,
130
  THR_NEARG,
131
132
  THR_GLOBALMV,
133
  THR_GLOBALL2,
134
  THR_GLOBALL3,
135
  THR_GLOBALB,
136
  THR_GLOBALA2,
137
  THR_GLOBALA,
138
  THR_GLOBALG,
139
140
  THR_COMP_NEAREST_NEARESTLA,
141
  THR_COMP_NEAREST_NEARESTL2A,
142
  THR_COMP_NEAREST_NEARESTL3A,
143
  THR_COMP_NEAREST_NEARESTGA,
144
  THR_COMP_NEAREST_NEARESTLB,
145
  THR_COMP_NEAREST_NEARESTL2B,
146
  THR_COMP_NEAREST_NEARESTL3B,
147
  THR_COMP_NEAREST_NEARESTGB,
148
  THR_COMP_NEAREST_NEARESTLA2,
149
  THR_COMP_NEAREST_NEARESTL2A2,
150
  THR_COMP_NEAREST_NEARESTL3A2,
151
  THR_COMP_NEAREST_NEARESTGA2,
152
  THR_COMP_NEAREST_NEARESTLL2,
153
  THR_COMP_NEAREST_NEARESTLL3,
154
  THR_COMP_NEAREST_NEARESTLG,
155
  THR_COMP_NEAREST_NEARESTBA,
156
157
  THR_COMP_NEAR_NEARLB,
158
  THR_COMP_NEW_NEWLB,
159
  THR_COMP_NEW_NEARESTLB,
160
  THR_COMP_NEAREST_NEWLB,
161
  THR_COMP_NEW_NEARLB,
162
  THR_COMP_NEAR_NEWLB,
163
  THR_COMP_GLOBAL_GLOBALLB,
164
165
  THR_COMP_NEAR_NEARLA,
166
  THR_COMP_NEW_NEWLA,
167
  THR_COMP_NEW_NEARESTLA,
168
  THR_COMP_NEAREST_NEWLA,
169
  THR_COMP_NEW_NEARLA,
170
  THR_COMP_NEAR_NEWLA,
171
  THR_COMP_GLOBAL_GLOBALLA,
172
173
  THR_COMP_NEAR_NEARL2A,
174
  THR_COMP_NEW_NEWL2A,
175
  THR_COMP_NEW_NEARESTL2A,
176
  THR_COMP_NEAREST_NEWL2A,
177
  THR_COMP_NEW_NEARL2A,
178
  THR_COMP_NEAR_NEWL2A,
179
  THR_COMP_GLOBAL_GLOBALL2A,
180
181
  THR_COMP_NEAR_NEARL3A,
182
  THR_COMP_NEW_NEWL3A,
183
  THR_COMP_NEW_NEARESTL3A,
184
  THR_COMP_NEAREST_NEWL3A,
185
  THR_COMP_NEW_NEARL3A,
186
  THR_COMP_NEAR_NEWL3A,
187
  THR_COMP_GLOBAL_GLOBALL3A,
188
189
  THR_COMP_NEAR_NEARGA,
190
  THR_COMP_NEW_NEWGA,
191
  THR_COMP_NEW_NEARESTGA,
192
  THR_COMP_NEAREST_NEWGA,
193
  THR_COMP_NEW_NEARGA,
194
  THR_COMP_NEAR_NEWGA,
195
  THR_COMP_GLOBAL_GLOBALGA,
196
197
  THR_COMP_NEAR_NEARL2B,
198
  THR_COMP_NEW_NEWL2B,
199
  THR_COMP_NEW_NEARESTL2B,
200
  THR_COMP_NEAREST_NEWL2B,
201
  THR_COMP_NEW_NEARL2B,
202
  THR_COMP_NEAR_NEWL2B,
203
  THR_COMP_GLOBAL_GLOBALL2B,
204
205
  THR_COMP_NEAR_NEARL3B,
206
  THR_COMP_NEW_NEWL3B,
207
  THR_COMP_NEW_NEARESTL3B,
208
  THR_COMP_NEAREST_NEWL3B,
209
  THR_COMP_NEW_NEARL3B,
210
  THR_COMP_NEAR_NEWL3B,
211
  THR_COMP_GLOBAL_GLOBALL3B,
212
213
  THR_COMP_NEAR_NEARGB,
214
  THR_COMP_NEW_NEWGB,
215
  THR_COMP_NEW_NEARESTGB,
216
  THR_COMP_NEAREST_NEWGB,
217
  THR_COMP_NEW_NEARGB,
218
  THR_COMP_NEAR_NEWGB,
219
  THR_COMP_GLOBAL_GLOBALGB,
220
221
  THR_COMP_NEAR_NEARLA2,
222
  THR_COMP_NEW_NEWLA2,
223
  THR_COMP_NEW_NEARESTLA2,
224
  THR_COMP_NEAREST_NEWLA2,
225
  THR_COMP_NEW_NEARLA2,
226
  THR_COMP_NEAR_NEWLA2,
227
  THR_COMP_GLOBAL_GLOBALLA2,
228
229
  THR_COMP_NEAR_NEARL2A2,
230
  THR_COMP_NEW_NEWL2A2,
231
  THR_COMP_NEW_NEARESTL2A2,
232
  THR_COMP_NEAREST_NEWL2A2,
233
  THR_COMP_NEW_NEARL2A2,
234
  THR_COMP_NEAR_NEWL2A2,
235
  THR_COMP_GLOBAL_GLOBALL2A2,
236
237
  THR_COMP_NEAR_NEARL3A2,
238
  THR_COMP_NEW_NEWL3A2,
239
  THR_COMP_NEW_NEARESTL3A2,
240
  THR_COMP_NEAREST_NEWL3A2,
241
  THR_COMP_NEW_NEARL3A2,
242
  THR_COMP_NEAR_NEWL3A2,
243
  THR_COMP_GLOBAL_GLOBALL3A2,
244
245
  THR_COMP_NEAR_NEARGA2,
246
  THR_COMP_NEW_NEWGA2,
247
  THR_COMP_NEW_NEARESTGA2,
248
  THR_COMP_NEAREST_NEWGA2,
249
  THR_COMP_NEW_NEARGA2,
250
  THR_COMP_NEAR_NEWGA2,
251
  THR_COMP_GLOBAL_GLOBALGA2,
252
253
  THR_COMP_NEAR_NEARLL2,
254
  THR_COMP_NEW_NEWLL2,
255
  THR_COMP_NEW_NEARESTLL2,
256
  THR_COMP_NEAREST_NEWLL2,
257
  THR_COMP_NEW_NEARLL2,
258
  THR_COMP_NEAR_NEWLL2,
259
  THR_COMP_GLOBAL_GLOBALLL2,
260
261
  THR_COMP_NEAR_NEARLL3,
262
  THR_COMP_NEW_NEWLL3,
263
  THR_COMP_NEW_NEARESTLL3,
264
  THR_COMP_NEAREST_NEWLL3,
265
  THR_COMP_NEW_NEARLL3,
266
  THR_COMP_NEAR_NEWLL3,
267
  THR_COMP_GLOBAL_GLOBALLL3,
268
269
  THR_COMP_NEAR_NEARLG,
270
  THR_COMP_NEW_NEWLG,
271
  THR_COMP_NEW_NEARESTLG,
272
  THR_COMP_NEAREST_NEWLG,
273
  THR_COMP_NEW_NEARLG,
274
  THR_COMP_NEAR_NEWLG,
275
  THR_COMP_GLOBAL_GLOBALLG,
276
277
  THR_COMP_NEAR_NEARBA,
278
  THR_COMP_NEW_NEWBA,
279
  THR_COMP_NEW_NEARESTBA,
280
  THR_COMP_NEAREST_NEWBA,
281
  THR_COMP_NEW_NEARBA,
282
  THR_COMP_NEAR_NEWBA,
283
  THR_COMP_GLOBAL_GLOBALBA,
284
285
  THR_DC,
286
  THR_PAETH,
287
  THR_SMOOTH,
288
  THR_SMOOTH_V,
289
  THR_SMOOTH_H,
290
  THR_H_PRED,
291
  THR_V_PRED,
292
  THR_D135_PRED,
293
  THR_D203_PRED,
294
  THR_D157_PRED,
295
  THR_D67_PRED,
296
  THR_D113_PRED,
297
  THR_D45_PRED,
298
};
299
300
/*!\cond */
301
typedef struct SingleInterModeState {
302
  int64_t rd;
303
  MV_REFERENCE_FRAME ref_frame;
304
  int valid;
305
} SingleInterModeState;
306
307
typedef struct InterModeSearchState {
308
  int64_t best_rd;
309
  int64_t best_skip_rd[2];
310
  MB_MODE_INFO best_mbmode;
311
  int best_rate_y;
312
  int best_rate_uv;
313
  int best_mode_skippable;
314
  int best_skip2;
315
  THR_MODES best_mode_index;
316
  int num_available_refs;
317
  int64_t dist_refs[REF_FRAMES];
318
  int dist_order_refs[REF_FRAMES];
319
  int64_t mode_threshold[MAX_MODES];
320
  int64_t best_intra_rd;
321
  unsigned int best_pred_sse;
322
323
  /*!
324
   * \brief Keep track of best intra rd for use in compound mode.
325
   */
326
  int64_t best_pred_rd[REFERENCE_MODES];
327
  // Save a set of single_newmv for each checked ref_mv.
328
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332
  // The rd of simple translation in single inter modes
333
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int64_t best_single_rd[REF_FRAMES];
335
  PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337
  // Single search results by [directions][modes][reference frames]
338
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341
                                            [FWD_REFS];
342
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344
  IntraModeSearchState intra_search_state;
345
  RD_STATS best_y_rdcost;
346
} InterModeSearchState;
347
/*!\endcond */
348
349
0
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350
0
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352
0
    md->ready = 0;
353
0
    md->num = 0;
354
0
    md->dist_sum = 0;
355
0
    md->ld_sum = 0;
356
0
    md->sse_sum = 0;
357
0
    md->sse_sse_sum = 0;
358
0
    md->sse_ld_sum = 0;
359
0
  }
360
0
}
361
362
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363
                             int64_t sse, int *est_residue_cost,
364
0
                             int64_t *est_dist) {
365
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366
0
  if (md->ready) {
367
0
    if (sse < md->dist_mean) {
368
0
      *est_residue_cost = 0;
369
0
      *est_dist = sse;
370
0
    } else {
371
0
      *est_dist = (int64_t)round(md->dist_mean);
372
0
      const double est_ld = md->a * sse + md->b;
373
      // Clamp estimated rate cost by INT_MAX / 2.
374
      // TODO(angiebird@google.com): find better solution than clamping.
375
0
      if (fabs(est_ld) < 1e-2) {
376
0
        *est_residue_cost = INT_MAX / 2;
377
0
      } else {
378
0
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379
0
        if (est_residue_cost_dbl < 0) {
380
0
          *est_residue_cost = 0;
381
0
        } else {
382
0
          *est_residue_cost =
383
0
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384
0
        }
385
0
      }
386
0
      if (*est_residue_cost <= 0) {
387
0
        *est_residue_cost = 0;
388
0
        *est_dist = sse;
389
0
      }
390
0
    }
391
0
    return 1;
392
0
  }
393
0
  return 0;
394
0
}
395
396
0
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397
0
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398
0
    const int block_idx = inter_mode_data_block_idx(bsize);
399
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400
0
    if (block_idx == -1) continue;
401
0
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402
0
      continue;
403
0
    } else {
404
0
      if (md->ready == 0) {
405
0
        md->dist_mean = md->dist_sum / md->num;
406
0
        md->ld_mean = md->ld_sum / md->num;
407
0
        md->sse_mean = md->sse_sum / md->num;
408
0
        md->sse_sse_mean = md->sse_sse_sum / md->num;
409
0
        md->sse_ld_mean = md->sse_ld_sum / md->num;
410
0
      } else {
411
0
        const double factor = 3;
412
0
        md->dist_mean =
413
0
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414
0
        md->ld_mean =
415
0
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416
0
        md->sse_mean =
417
0
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418
0
        md->sse_sse_mean =
419
0
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420
0
            (factor + 1);
421
0
        md->sse_ld_mean =
422
0
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423
0
            (factor + 1);
424
0
      }
425
426
0
      const double my = md->ld_mean;
427
0
      const double mx = md->sse_mean;
428
0
      const double dx = sqrt(md->sse_sse_mean);
429
0
      const double dxy = md->sse_ld_mean;
430
431
0
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432
0
      md->b = my - md->a * mx;
433
0
      md->ready = 1;
434
435
0
      md->num = 0;
436
0
      md->dist_sum = 0;
437
0
      md->ld_sum = 0;
438
0
      md->sse_sum = 0;
439
0
      md->sse_sse_sum = 0;
440
0
      md->sse_ld_sum = 0;
441
0
    }
442
0
    (void)rdmult;
443
0
  }
444
0
}
445
446
static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447
                                            BLOCK_SIZE bsize, int64_t sse,
448
0
                                            int64_t dist, int residue_cost) {
449
0
  if (residue_cost == 0 || sse == dist) return;
450
0
  const int block_idx = inter_mode_data_block_idx(bsize);
451
0
  if (block_idx == -1) return;
452
0
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453
0
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454
0
    const double ld = (sse - dist) * 1. / residue_cost;
455
0
    ++rd_model->num;
456
0
    rd_model->dist_sum += dist;
457
0
    rd_model->ld_sum += ld;
458
0
    rd_model->sse_sum += sse;
459
0
    rd_model->sse_sse_sum += (double)sse * (double)sse;
460
0
    rd_model->sse_ld_sum += sse * ld;
461
0
  }
462
0
}
463
464
static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465
                                             int mode_rate, int64_t sse,
466
                                             int64_t rd, RD_STATS *rd_cost,
467
                                             RD_STATS *rd_cost_y,
468
                                             RD_STATS *rd_cost_uv,
469
0
                                             const MB_MODE_INFO *mbmi) {
470
0
  const int num = inter_modes_info->num;
471
0
  assert(num < MAX_INTER_MODES);
472
0
  inter_modes_info->mbmi_arr[num] = *mbmi;
473
0
  inter_modes_info->mode_rate_arr[num] = mode_rate;
474
0
  inter_modes_info->sse_arr[num] = sse;
475
0
  inter_modes_info->est_rd_arr[num] = rd;
476
0
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
477
0
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478
0
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479
0
  ++inter_modes_info->num;
480
0
}
481
482
0
static int compare_rd_idx_pair(const void *a, const void *b) {
483
0
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484
    // To avoid inconsistency in qsort() ordering when two elements are equal,
485
    // using idx as tie breaker. Refer aomedia:2928
486
0
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487
0
      return 0;
488
0
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489
0
      return 1;
490
0
    else
491
0
      return -1;
492
0
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493
0
    return 1;
494
0
  } else {
495
0
    return -1;
496
0
  }
497
0
}
498
499
static AOM_INLINE void inter_modes_info_sort(
500
0
    const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501
0
  if (inter_modes_info->num == 0) {
502
0
    return;
503
0
  }
504
0
  for (int i = 0; i < inter_modes_info->num; ++i) {
505
0
    rd_idx_pair_arr[i].idx = i;
506
0
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507
0
  }
508
0
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509
0
        compare_rd_idx_pair);
510
0
}
511
512
// Similar to get_horver_correlation, but also takes into account first
513
// row/column, when computing horizontal/vertical correlation.
514
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515
                                       int width, int height, float *hcorr,
516
0
                                       float *vcorr) {
517
  // The following notation is used:
518
  // x - current pixel
519
  // y - left neighbor pixel
520
  // z - top neighbor pixel
521
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524
525
  // First, process horizontal correlation on just the first row
526
0
  x_sum += diff[0];
527
0
  x2_sum += diff[0] * diff[0];
528
0
  x_firstrow += diff[0];
529
0
  x2_firstrow += diff[0] * diff[0];
530
0
  for (int j = 1; j < width; ++j) {
531
0
    const int16_t x = diff[j];
532
0
    const int16_t y = diff[j - 1];
533
0
    x_sum += x;
534
0
    x_firstrow += x;
535
0
    x2_sum += x * x;
536
0
    x2_firstrow += x * x;
537
0
    xy_sum += x * y;
538
0
  }
539
540
  // Process vertical correlation in the first column
541
0
  x_firstcol += diff[0];
542
0
  x2_firstcol += diff[0] * diff[0];
543
0
  for (int i = 1; i < height; ++i) {
544
0
    const int16_t x = diff[i * stride];
545
0
    const int16_t z = diff[(i - 1) * stride];
546
0
    x_sum += x;
547
0
    x_firstcol += x;
548
0
    x2_sum += x * x;
549
0
    x2_firstcol += x * x;
550
0
    xz_sum += x * z;
551
0
  }
552
553
  // Now process horiz and vert correlation through the rest unit
554
0
  for (int i = 1; i < height; ++i) {
555
0
    for (int j = 1; j < width; ++j) {
556
0
      const int16_t x = diff[i * stride + j];
557
0
      const int16_t y = diff[i * stride + j - 1];
558
0
      const int16_t z = diff[(i - 1) * stride + j];
559
0
      x_sum += x;
560
0
      x2_sum += x * x;
561
0
      xy_sum += x * y;
562
0
      xz_sum += x * z;
563
0
    }
564
0
  }
565
566
0
  for (int j = 0; j < width; ++j) {
567
0
    x_finalrow += diff[(height - 1) * stride + j];
568
0
    x2_finalrow +=
569
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570
0
  }
571
0
  for (int i = 0; i < height; ++i) {
572
0
    x_finalcol += diff[i * stride + width - 1];
573
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574
0
  }
575
576
0
  int64_t xhor_sum = x_sum - x_finalcol;
577
0
  int64_t xver_sum = x_sum - x_finalrow;
578
0
  int64_t y_sum = x_sum - x_firstcol;
579
0
  int64_t z_sum = x_sum - x_firstrow;
580
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
581
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
582
0
  int64_t y2_sum = x2_sum - x2_firstcol;
583
0
  int64_t z2_sum = x2_sum - x2_firstrow;
584
585
0
  const float num_hor = (float)(height * (width - 1));
586
0
  const float num_ver = (float)((height - 1) * width);
587
588
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590
591
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593
594
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596
597
0
  if (xhor_var_n > 0 && y_var_n > 0) {
598
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
600
0
  } else {
601
0
    *hcorr = 1.0;
602
0
  }
603
0
  if (xver_var_n > 0 && z_var_n > 0) {
604
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
606
0
  } else {
607
0
    *vcorr = 1.0;
608
0
  }
609
0
}
610
611
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612
0
                       int64_t *sse_y) {
613
0
  const AV1_COMMON *cm = &cpi->common;
614
0
  const int num_planes = av1_num_planes(cm);
615
0
  const MACROBLOCKD *xd = &x->e_mbd;
616
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
617
0
  int64_t total_sse = 0;
618
0
  for (int plane = 0; plane < num_planes; ++plane) {
619
0
    if (plane && !xd->is_chroma_ref) break;
620
0
    const struct macroblock_plane *const p = &x->plane[plane];
621
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
622
0
    const BLOCK_SIZE bs =
623
0
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624
0
    unsigned int sse;
625
626
0
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627
0
                            pd->dst.stride, &sse);
628
0
    total_sse += sse;
629
0
    if (!plane && sse_y) *sse_y = sse;
630
0
  }
631
0
  total_sse <<= 4;
632
0
  return total_sse;
633
0
}
634
635
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636
0
                          intptr_t block_size, int64_t *ssz) {
637
0
  int i;
638
0
  int64_t error = 0, sqcoeff = 0;
639
640
0
  for (i = 0; i < block_size; i++) {
641
0
    const int diff = coeff[i] - dqcoeff[i];
642
0
    error += diff * diff;
643
0
    sqcoeff += coeff[i] * coeff[i];
644
0
  }
645
646
0
  *ssz = sqcoeff;
647
0
  return error;
648
0
}
649
650
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651
0
                             intptr_t block_size) {
652
0
  int64_t error = 0;
653
654
0
  for (int i = 0; i < block_size; i++) {
655
0
    const int diff = coeff[i] - dqcoeff[i];
656
0
    error += diff * diff;
657
0
  }
658
659
0
  return error;
660
0
}
661
662
#if CONFIG_AV1_HIGHBITDEPTH
663
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664
                                 const tran_low_t *dqcoeff, intptr_t block_size,
665
0
                                 int64_t *ssz, int bd) {
666
0
  int i;
667
0
  int64_t error = 0, sqcoeff = 0;
668
0
  int shift = 2 * (bd - 8);
669
0
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670
671
0
  for (i = 0; i < block_size; i++) {
672
0
    const int64_t diff = coeff[i] - dqcoeff[i];
673
0
    error += diff * diff;
674
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675
0
  }
676
0
  assert(error >= 0 && sqcoeff >= 0);
677
0
  error = (error + rounding) >> shift;
678
0
  sqcoeff = (sqcoeff + rounding) >> shift;
679
680
0
  *ssz = sqcoeff;
681
0
  return error;
682
0
}
683
#endif
684
685
static int conditional_skipintra(PREDICTION_MODE mode,
686
0
                                 PREDICTION_MODE best_intra_mode) {
687
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
688
0
      best_intra_mode != D135_PRED)
689
0
    return 1;
690
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
691
0
      best_intra_mode != D45_PRED)
692
0
    return 1;
693
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
694
0
      best_intra_mode != D45_PRED)
695
0
    return 1;
696
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
697
0
      best_intra_mode != D135_PRED)
698
0
    return 1;
699
0
  return 0;
700
0
}
701
702
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703
0
                       int16_t mode_context) {
704
0
  if (is_inter_compound_mode(mode)) {
705
0
    return mode_costs
706
0
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707
0
  }
708
709
0
  int mode_cost = 0;
710
0
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711
712
0
  assert(is_inter_mode(mode));
713
714
0
  if (mode == NEWMV) {
715
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716
0
    return mode_cost;
717
0
  } else {
718
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719
0
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720
721
0
    if (mode == GLOBALMV) {
722
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723
0
      return mode_cost;
724
0
    } else {
725
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726
0
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727
0
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728
0
      return mode_cost;
729
0
    }
730
0
  }
731
0
}
732
733
static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734
0
                                              int ref_idx) {
735
0
  return ref_idx ? compound_ref1_mode(this_mode)
736
0
                 : compound_ref0_mode(this_mode);
737
0
}
738
739
static AOM_INLINE void estimate_ref_frame_costs(
740
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741
    int segment_id, unsigned int *ref_costs_single,
742
0
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743
0
  int seg_ref_active =
744
0
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745
0
  if (seg_ref_active) {
746
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747
0
    int ref_frame;
748
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749
0
      memset(ref_costs_comp[ref_frame], 0,
750
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751
0
  } else {
752
0
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
753
0
    ref_costs_single[INTRA_FRAME] =
754
0
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
755
0
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756
757
0
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758
0
      ref_costs_single[i] = base_cost;
759
760
0
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761
0
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762
0
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763
0
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764
0
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765
0
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766
767
    // Determine cost of a single ref frame, where frame types are represented
768
    // by a tree:
769
    // Level 0: add cost whether this ref is a forward or backward ref
770
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775
0
    ref_costs_single[ALTREF2_FRAME] +=
776
0
        mode_costs->single_ref_cost[ctx_p1][0][1];
777
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778
779
    // Level 1: if this ref is forward ref,
780
    // add cost whether it is last/last2 or last3/golden
781
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785
786
    // Level 1: if this ref is backward ref
787
    // then add cost whether this ref is altref or backward ref
788
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789
0
    ref_costs_single[ALTREF2_FRAME] +=
790
0
        mode_costs->single_ref_cost[ctx_p2][1][0];
791
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792
793
    // Level 2: further add cost whether this ref is last or last2
794
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796
797
    // Level 2: last3 or golden
798
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800
801
    // Level 2: bwdref or altref2
802
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803
0
    ref_costs_single[ALTREF2_FRAME] +=
804
0
        mode_costs->single_ref_cost[ctx_p6][5][1];
805
806
0
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807
      // Similar to single ref, determine cost of compound ref frames.
808
      // cost_compound_refs = cost_first_ref + cost_second_ref
809
0
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810
0
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811
0
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812
0
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813
0
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814
815
0
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816
0
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817
818
0
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819
0
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820
0
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821
0
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822
0
      ref_bicomp_costs[ALTREF_FRAME] = 0;
823
824
      // cost of first ref frame
825
0
      ref_bicomp_costs[LAST_FRAME] +=
826
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827
0
      ref_bicomp_costs[LAST2_FRAME] +=
828
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829
0
      ref_bicomp_costs[LAST3_FRAME] +=
830
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
832
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833
834
0
      ref_bicomp_costs[LAST_FRAME] +=
835
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836
0
      ref_bicomp_costs[LAST2_FRAME] +=
837
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838
839
0
      ref_bicomp_costs[LAST3_FRAME] +=
840
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
842
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843
844
      // cost of second ref frame
845
0
      ref_bicomp_costs[BWDREF_FRAME] +=
846
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
848
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849
0
      ref_bicomp_costs[ALTREF_FRAME] +=
850
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851
852
0
      ref_bicomp_costs[BWDREF_FRAME] +=
853
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
855
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856
857
      // cost: if one ref frame is forward ref, the other ref is backward ref
858
0
      int ref0, ref1;
859
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861
0
          ref_costs_comp[ref0][ref1] =
862
0
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863
0
        }
864
0
      }
865
866
      // cost: if both ref frames are the same side.
867
0
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868
0
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869
0
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887
0
    } else {
888
0
      int ref0, ref1;
889
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891
0
          ref_costs_comp[ref0][ref1] = 512;
892
0
      }
893
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897
0
    }
898
0
  }
899
0
}
900
901
static AOM_INLINE void store_coding_context(
902
#if CONFIG_INTERNAL_STATS
903
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904
#else
905
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906
#endif  // CONFIG_INTERNAL_STATS
907
0
    int skippable) {
908
0
  MACROBLOCKD *const xd = &x->e_mbd;
909
910
  // Take a snapshot of the coding context so it can be
911
  // restored if we decide to encode this way
912
0
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913
0
  ctx->skippable = skippable;
914
#if CONFIG_INTERNAL_STATS
915
  ctx->best_mode_index = mode_index;
916
#endif  // CONFIG_INTERNAL_STATS
917
0
  ctx->mic = *xd->mi[0];
918
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
920
0
}
921
922
static AOM_INLINE void setup_buffer_ref_mvs_inter(
923
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924
0
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925
0
  const AV1_COMMON *cm = &cpi->common;
926
0
  const int num_planes = av1_num_planes(cm);
927
0
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
928
0
      av1_get_scaled_ref_frame(cpi, ref_frame);
929
0
  MACROBLOCKD *const xd = &x->e_mbd;
930
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
931
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932
0
  const struct scale_factors *const sf =
933
0
      get_ref_scale_factors_const(cm, ref_frame);
934
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935
0
  assert(yv12 != NULL);
936
937
0
  if (scaled_ref_frame) {
938
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939
    // support scaling.
940
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941
0
                         num_planes);
942
0
  } else {
943
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944
0
  }
945
946
  // Gets an initial list of candidate vectors from neighbours and orders them
947
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949
0
                   mbmi_ext->mode_context);
950
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953
  // Further refinement that is encode side only to test the top few candidates
954
  // in full and choose the best as the center point for subsequent searches.
955
  // The current implementation doesn't support scaling.
956
0
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957
0
              ref_frame, block_size);
958
959
  // Go back to unscaled reference.
960
0
  if (scaled_ref_frame) {
961
    // We had temporarily setup pred block based on scaled reference above. Go
962
    // back to unscaled reference now, for subsequent use.
963
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964
0
  }
965
0
}
966
967
0
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968
0
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969
970
// TODO(jingning): this mv clamping function should be block size dependent.
971
0
static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972
0
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973
0
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974
0
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975
0
                                     xd->mb_to_bottom_edge +
976
0
                                         RIGHT_BOTTOM_MARGIN };
977
0
  clamp_mv(mv, &mv_limits);
978
0
}
979
980
/* If the current mode shares the same mv with other modes with higher cost,
981
 * skip this mode. */
982
static int skip_repeated_mv(const AV1_COMMON *const cm,
983
                            const MACROBLOCK *const x,
984
                            PREDICTION_MODE this_mode,
985
                            const MV_REFERENCE_FRAME ref_frames[2],
986
0
                            InterModeSearchState *search_state) {
987
0
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991
0
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992
0
  if (!is_comp_pred) {
993
0
    if (this_mode == NEARMV) {
994
0
      if (ref_mv_count == 0) {
995
        // NEARMV has the same motion vector as NEARESTMV
996
0
        compare_mode = NEARESTMV;
997
0
      }
998
0
      if (ref_mv_count == 1 &&
999
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000
        // NEARMV has the same motion vector as GLOBALMV
1001
0
        compare_mode = GLOBALMV;
1002
0
      }
1003
0
    }
1004
0
    if (this_mode == GLOBALMV) {
1005
0
      if (ref_mv_count == 0 &&
1006
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007
        // GLOBALMV has the same motion vector as NEARESTMV
1008
0
        compare_mode = NEARESTMV;
1009
0
      }
1010
0
      if (ref_mv_count == 1) {
1011
        // GLOBALMV has the same motion vector as NEARMV
1012
0
        compare_mode = NEARMV;
1013
0
      }
1014
0
    }
1015
1016
0
    if (compare_mode != MB_MODE_COUNT) {
1017
      // Use modelled_rd to check whether compare mode was searched
1018
0
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019
0
          INT64_MAX) {
1020
0
        const int16_t mode_ctx =
1021
0
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022
0
        const int compare_cost =
1023
0
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024
0
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025
1026
        // Only skip if the mode cost is larger than compare mode cost
1027
0
        if (this_cost > compare_cost) {
1028
0
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029
0
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030
0
          return 1;
1031
0
        }
1032
0
      }
1033
0
    }
1034
0
  }
1035
0
  return 0;
1036
0
}
1037
1038
static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039
                                     const AV1_COMMON *cm,
1040
0
                                     const MACROBLOCK *x) {
1041
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1042
0
  *out_mv = in_mv;
1043
0
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044
0
                     cm->features.cur_frame_force_integer_mv);
1045
0
  clamp_mv2(&out_mv->as_mv, xd);
1046
0
  return av1_is_fullmv_in_range(&x->mv_limits,
1047
0
                                get_fullmv_from_mv(&out_mv->as_mv));
1048
0
}
1049
1050
// To use single newmv directly for compound modes, need to clamp the mv to the
1051
// valid mv range. Without this, encoder would generate out of range mv, and
1052
// this is seen in 8k encoding.
1053
static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054
0
                                     int ref_idx) {
1055
0
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056
0
  SubpelMvLimits mv_limits;
1057
1058
0
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059
0
  clamp_mv(&mv->as_mv, &mv_limits);
1060
0
}
1061
1062
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1064
                            int *const rate_mv, HandleInterModeArgs *const args,
1065
0
                            inter_mode_info *mode_info) {
1066
0
  MACROBLOCKD *const xd = &x->e_mbd;
1067
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1068
0
  const int is_comp_pred = has_second_ref(mbmi);
1069
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1070
0
  const int refs[2] = { mbmi->ref_frame[0],
1071
0
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072
0
  const int ref_mv_idx = mbmi->ref_mv_idx;
1073
1074
0
  if (is_comp_pred) {
1075
0
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076
0
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077
0
    if (this_mode == NEW_NEWMV) {
1078
0
      if (valid_mv0) {
1079
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1081
0
      }
1082
0
      if (valid_mv1) {
1083
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1085
0
      }
1086
0
      *rate_mv = 0;
1087
0
      for (int i = 0; i < 2; ++i) {
1088
0
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1089
0
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090
0
                                    x->mv_costs->nmv_joint_cost,
1091
0
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092
0
      }
1093
0
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094
0
      if (valid_mv1) {
1095
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1097
0
      }
1098
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100
0
                                 x->mv_costs->nmv_joint_cost,
1101
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102
0
    } else {
1103
0
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104
0
      if (valid_mv0) {
1105
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1107
0
      }
1108
0
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110
0
                                 x->mv_costs->nmv_joint_cost,
1111
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112
0
    }
1113
0
  } else {
1114
    // Single ref case.
1115
0
    const int ref_idx = 0;
1116
0
    int search_range = INT_MAX;
1117
1118
0
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119
0
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120
0
      int min_mv_diff = INT_MAX;
1121
0
      int best_match = -1;
1122
0
      MV prev_ref_mv[2] = { { 0 } };
1123
0
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124
0
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125
0
                                                     idx, &x->mbmi_ext)
1126
0
                               .as_mv;
1127
0
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128
0
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1129
1130
0
        if (min_mv_diff > ref_mv_diff) {
1131
0
          min_mv_diff = ref_mv_diff;
1132
0
          best_match = idx;
1133
0
        }
1134
0
      }
1135
1136
0
      if (min_mv_diff < (16 << 3)) {
1137
0
        if (args->single_newmv_valid[best_match][refs[0]]) {
1138
0
          search_range = min_mv_diff;
1139
0
          search_range +=
1140
0
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141
0
                         prev_ref_mv[best_match].row),
1142
0
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143
0
                         prev_ref_mv[best_match].col));
1144
          // Get full pixel search range.
1145
0
          search_range = (search_range + 4) >> 3;
1146
0
        }
1147
0
      }
1148
0
    }
1149
1150
0
    int_mv best_mv;
1151
0
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152
0
                             mode_info, &best_mv, args);
1153
0
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154
1155
0
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156
0
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157
0
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158
0
    cur_mv[0].as_int = best_mv.as_int;
1159
1160
    // Return after single_newmv is set.
1161
0
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1162
0
  }
1163
1164
0
  return 0;
1165
0
}
1166
1167
static INLINE void update_mode_start_end_index(
1168
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1169
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1170
0
    int interintra_allowed, int eval_motion_mode) {
1171
0
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1172
0
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1173
0
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1174
0
    if (!eval_motion_mode) {
1175
0
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1176
0
    } else {
1177
      // Set the start index appropriately to process motion modes other than
1178
      // simple translation
1179
0
      *mode_index_start = 1;
1180
0
    }
1181
0
  }
1182
0
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1183
0
    *mode_index_end = SIMPLE_TRANSLATION;
1184
0
}
1185
1186
/*!\brief AV1 motion mode search
1187
 *
1188
 * \ingroup inter_mode_search
1189
 * Function to search over and determine the motion mode. It will update
1190
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1191
 * WARPED_CAUSAL and determine any necessary side information for the selected
1192
 * motion mode. It will also perform the full transform search, unless the
1193
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1194
 * than an RD corresponding to a full transform search. It will return the
1195
 * RD for the final motion_mode.
1196
 * Do the RD search for a given inter mode and compute all information relevant
1197
 * to the input mode. It will compute the best MV,
1198
 * compound parameters (if the mode is a compound mode) and interpolation filter
1199
 * parameters.
1200
 *
1201
 * \param[in]     cpi               Top-level encoder structure.
1202
 * \param[in]     tile_data         Pointer to struct holding adaptive
1203
 *                                  data/contexts/models for the tile during
1204
 *                                  encoding.
1205
 * \param[in]     x                 Pointer to struct holding all the data for
1206
 *                                  the current macroblock.
1207
 * \param[in]     bsize             Current block size.
1208
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1209
 *                                  information.
1210
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1211
 *                                  for only the Y plane.
1212
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1213
 *                                  for only the UV planes.
1214
 * \param[in]     args              HandleInterModeArgs struct holding
1215
 *                                  miscellaneous arguments for inter mode
1216
 *                                  search. See the documentation for this
1217
 *                                  struct for a description of each member.
1218
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1219
 *                                  It is used for early termination of this
1220
 *                                  search if the RD exceeds this value.
1221
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1222
 *                                  best total RD for a skip mode so far, and
1223
 *                                  skip_rd[1] is the best RD for a skip mode so
1224
 *                                  far in luma. This is used as a speed feature
1225
 *                                  to skip the transform search if the computed
1226
 *                                  skip RD for the current mode is not better
1227
 *                                  than the best skip_rd so far.
1228
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1229
 *                                  This will be modified if a motion search is
1230
 *                                  done in the motion mode search.
1231
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1232
 *                                  prediction. This will eventually hold the
1233
 *                                  final prediction, and the tmp_dst info will
1234
 *                                  be copied here.
1235
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1236
 *                                  do_tx_search (see below) is 0.
1237
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1238
 *                                  a full transform search. This will compute
1239
 *                                  an estimated RD for the modes without the
1240
 *                                  transform search and later perform the full
1241
 *                                  transform search on the best candidates.
1242
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1243
 *                                  information to perform a full transform
1244
 *                                  search only on winning candidates searched
1245
 *                                  with an estimate for transform coding RD.
1246
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1247
 *                                  motion modes other than SIMPLE_TRANSLATION.
1248
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1249
 *                                  the luma plane.
1250
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1251
 * current motion mode being tested should be skipped. It returns 0 if the
1252
 * motion mode search is a success.
1253
 */
1254
static int64_t motion_mode_rd(
1255
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1256
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1257
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1258
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1259
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1260
0
    int eval_motion_mode, int64_t *yrd) {
1261
0
  const AV1_COMMON *const cm = &cpi->common;
1262
0
  const FeatureFlags *const features = &cm->features;
1263
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1264
0
  const int num_planes = av1_num_planes(cm);
1265
0
  MACROBLOCKD *xd = &x->e_mbd;
1266
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1267
0
  const int is_comp_pred = has_second_ref(mbmi);
1268
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1269
0
  const int rate2_nocoeff = rd_stats->rate;
1270
0
  int best_xskip_txfm = 0;
1271
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1272
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1274
0
  const int rate_mv0 = *rate_mv;
1275
0
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1276
0
                                 is_interintra_allowed(mbmi) &&
1277
0
                                 mbmi->compound_idx;
1278
0
  WARP_SAMPLE_INFO *const warp_sample_info =
1279
0
      &x->warp_sample_info[mbmi->ref_frame[0]];
1280
0
  int *pts0 = warp_sample_info->pts;
1281
0
  int *pts_inref0 = warp_sample_info->pts_inref;
1282
1283
0
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1284
0
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1285
0
  av1_invalid_rd_stats(&best_rd_stats);
1286
0
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1287
0
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1288
0
  *yrd = INT64_MAX;
1289
0
  if (features->switchable_motion_mode) {
1290
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1291
    // is allowed.
1292
0
    last_motion_mode_allowed = motion_mode_allowed(
1293
0
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1294
0
  }
1295
1296
0
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1297
    // Collect projection samples used in least squares approximation of
1298
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1299
0
    if (warp_sample_info->num < 0) {
1300
0
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1301
0
    }
1302
0
    mbmi->num_proj_ref = warp_sample_info->num;
1303
0
  }
1304
0
  const int total_samples = mbmi->num_proj_ref;
1305
0
  if (total_samples == 0) {
1306
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1307
    // warped parameters.
1308
0
    last_motion_mode_allowed = OBMC_CAUSAL;
1309
0
  }
1310
1311
0
  const MB_MODE_INFO base_mbmi = *mbmi;
1312
0
  MB_MODE_INFO best_mbmi;
1313
0
  const int interp_filter = features->interp_filter;
1314
0
  const int switchable_rate =
1315
0
      av1_is_interp_needed(xd)
1316
0
          ? av1_get_switchable_rate(x, xd, interp_filter,
1317
0
                                    cm->seq_params->enable_dual_filter)
1318
0
          : 0;
1319
0
  int64_t best_rd = INT64_MAX;
1320
0
  int best_rate_mv = rate_mv0;
1321
0
  const int mi_row = xd->mi_row;
1322
0
  const int mi_col = xd->mi_col;
1323
0
  int mode_index_start, mode_index_end;
1324
  // Modify the start and end index according to speed features. For example,
1325
  // if SIMPLE_TRANSLATION has already been searched according to
1326
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1327
  // to avoid searching it again.
1328
0
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1329
0
                              last_motion_mode_allowed, interintra_allowed,
1330
0
                              eval_motion_mode);
1331
  // Main function loop. This loops over all of the possible motion modes and
1332
  // computes RD to determine the best one. This process includes computing
1333
  // any necessary side information for the motion mode and performing the
1334
  // transform search.
1335
0
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1336
0
       mode_index++) {
1337
0
    if (args->skip_motion_mode && mode_index) continue;
1338
0
    int tmp_rate2 = rate2_nocoeff;
1339
0
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1340
0
    int tmp_rate_mv = rate_mv0;
1341
1342
0
    *mbmi = base_mbmi;
1343
0
    if (is_interintra_mode) {
1344
      // Only use SIMPLE_TRANSLATION for interintra
1345
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1346
0
    } else {
1347
0
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1348
0
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1349
0
    }
1350
1351
    // Do not search OBMC if the probability of selecting it is below a
1352
    // predetermined threshold for this update_type and block size.
1353
0
    const FRAME_UPDATE_TYPE update_type =
1354
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1355
0
    int use_actual_frame_probs = 1;
1356
0
    int prune_obmc;
1357
#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
1358
    use_actual_frame_probs =
1359
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1360
    if (!use_actual_frame_probs) {
1361
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1362
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1363
    }
1364
#endif
1365
0
    if (use_actual_frame_probs) {
1366
0
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1367
0
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1368
0
    }
1369
0
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1370
0
        mbmi->motion_mode == OBMC_CAUSAL)
1371
0
      continue;
1372
1373
0
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1374
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1375
      // The prediction is calculated before motion_mode_rd() is called in
1376
      // handle_inter_mode()
1377
0
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1378
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1379
      // OBMC_CAUSAL not allowed for compound prediction
1380
0
      assert(!is_comp_pred);
1381
0
      if (have_newmv_in_inter_mode(this_mode)) {
1382
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1383
0
                                 &mbmi->mv[0], NULL);
1384
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1385
0
      }
1386
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1387
        // Build the predictor according to the current motion vector if it has
1388
        // not already been built
1389
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1390
0
                                      0, av1_num_planes(cm) - 1);
1391
0
      }
1392
      // Build the inter predictor by blending the predictor corresponding to
1393
      // this MV, and the neighboring blocks using the OBMC model
1394
0
      av1_build_obmc_inter_prediction(
1395
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1396
0
          args->left_pred_buf, args->left_pred_stride);
1397
0
#if !CONFIG_REALTIME_ONLY
1398
0
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1399
0
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1400
0
      mbmi->motion_mode = WARPED_CAUSAL;
1401
0
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1402
0
      mbmi->interp_filters =
1403
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1404
1405
0
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1406
0
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1407
      // Select the samples according to motion vector difference
1408
0
      if (mbmi->num_proj_ref > 1) {
1409
0
        mbmi->num_proj_ref = av1_selectSamples(
1410
0
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1411
0
      }
1412
1413
      // Compute the warped motion parameters with a least squares fit
1414
      //  using the collected samples
1415
0
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1416
0
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1417
0
                               &mbmi->wm_params, mi_row, mi_col)) {
1418
0
        assert(!is_comp_pred);
1419
0
        if (have_newmv_in_inter_mode(this_mode)) {
1420
          // Refine MV for NEWMV mode
1421
0
          const int_mv mv0 = mbmi->mv[0];
1422
0
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1423
0
          const int num_proj_ref0 = mbmi->num_proj_ref;
1424
1425
0
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1426
0
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1427
0
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1428
0
                                            &ref_mv.as_mv, NULL);
1429
1430
          // Refine MV in a small range.
1431
0
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1432
0
                               total_samples);
1433
1434
0
          if (mv0.as_int != mbmi->mv[0].as_int) {
1435
            // Keep the refined MV and WM parameters.
1436
0
            tmp_rate_mv = av1_mv_bit_cost(
1437
0
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1438
0
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1439
0
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1440
0
          } else {
1441
            // Restore the old MV and WM parameters.
1442
0
            mbmi->mv[0] = mv0;
1443
0
            mbmi->wm_params = wm_params0;
1444
0
            mbmi->num_proj_ref = num_proj_ref0;
1445
0
          }
1446
0
        }
1447
1448
        // Build the warped predictor
1449
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1450
0
                                      av1_num_planes(cm) - 1);
1451
0
      } else {
1452
0
        continue;
1453
0
      }
1454
0
#endif  // !CONFIG_REALTIME_ONLY
1455
0
    } else if (is_interintra_mode) {
1456
0
      const int ret =
1457
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1458
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1459
0
      if (ret < 0) continue;
1460
0
    }
1461
1462
    // If we are searching newmv and the mv is the same as refmv, skip the
1463
    // current mode
1464
0
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1465
1466
    // Update rd_stats for the current motion mode
1467
0
    txfm_info->skip_txfm = 0;
1468
0
    rd_stats->dist = 0;
1469
0
    rd_stats->sse = 0;
1470
0
    rd_stats->skip_txfm = 1;
1471
0
    rd_stats->rate = tmp_rate2;
1472
0
    const ModeCosts *mode_costs = &x->mode_costs;
1473
0
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1474
0
    if (interintra_allowed) {
1475
0
      rd_stats->rate +=
1476
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1477
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1478
0
    }
1479
0
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1480
0
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1481
0
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1482
0
        rd_stats->rate +=
1483
0
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1484
0
      } else {
1485
0
        rd_stats->rate +=
1486
0
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1487
0
      }
1488
0
    }
1489
1490
0
    int64_t this_yrd = INT64_MAX;
1491
1492
0
    if (!do_tx_search) {
1493
      // Avoid doing a transform search here to speed up the overall mode
1494
      // search. It will be done later in the mode search if the current
1495
      // motion mode seems promising.
1496
0
      int64_t curr_sse = -1;
1497
0
      int64_t sse_y = -1;
1498
0
      int est_residue_cost = 0;
1499
0
      int64_t est_dist = 0;
1500
0
      int64_t est_rd = 0;
1501
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1502
0
        curr_sse = get_sse(cpi, x, &sse_y);
1503
0
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1504
0
                                                 &est_residue_cost, &est_dist);
1505
0
        (void)has_est_rd;
1506
0
        assert(has_est_rd);
1507
0
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1508
0
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1509
0
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1510
0
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1511
0
            NULL, &curr_sse, NULL, NULL, NULL);
1512
0
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1513
0
      }
1514
0
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1515
0
      if (est_rd * 0.80 > *best_est_rd) {
1516
0
        mbmi->ref_frame[1] = ref_frame_1;
1517
0
        continue;
1518
0
      }
1519
0
      const int mode_rate = rd_stats->rate;
1520
0
      rd_stats->rate += est_residue_cost;
1521
0
      rd_stats->dist = est_dist;
1522
0
      rd_stats->rdcost = est_rd;
1523
0
      if (rd_stats->rdcost < *best_est_rd) {
1524
0
        *best_est_rd = rd_stats->rdcost;
1525
0
        assert(sse_y >= 0);
1526
0
        ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1527
0
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1528
0
                             : INT64_MAX;
1529
0
      }
1530
0
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1531
0
        if (!is_comp_pred) {
1532
0
          assert(curr_sse >= 0);
1533
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1534
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1535
0
                                rd_stats_uv, mbmi);
1536
0
        }
1537
0
      } else {
1538
0
        assert(curr_sse >= 0);
1539
0
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1540
0
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1541
0
                              rd_stats_uv, mbmi);
1542
0
      }
1543
0
      mbmi->skip_txfm = 0;
1544
0
    } else {
1545
      // Perform full transform search
1546
0
      int64_t skip_rd = INT64_MAX;
1547
0
      int64_t skip_rdy = INT64_MAX;
1548
0
      if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1549
        // Check if the mode is good enough based on skip RD
1550
0
        int64_t sse_y = INT64_MAX;
1551
0
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1552
0
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1553
0
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1554
0
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1555
0
                                        cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1556
0
        if (!eval_txfm) continue;
1557
0
      }
1558
1559
      // Do transform search
1560
0
      const int mode_rate = rd_stats->rate;
1561
0
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1562
0
                           rd_stats->rate, ref_best_rd)) {
1563
0
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1564
0
          return INT64_MAX;
1565
0
        }
1566
0
        continue;
1567
0
      }
1568
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1569
0
      const int y_rate =
1570
0
          rd_stats->skip_txfm
1571
0
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1572
0
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1573
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1574
1575
0
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1576
0
      if (curr_rd < ref_best_rd) {
1577
0
        ref_best_rd = curr_rd;
1578
0
        ref_skip_rd[0] = skip_rd;
1579
0
        ref_skip_rd[1] = skip_rdy;
1580
0
      }
1581
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1582
0
        inter_mode_data_push(
1583
0
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1584
0
            rd_stats_y->rate + rd_stats_uv->rate +
1585
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1586
0
      }
1587
0
    }
1588
1589
0
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1590
0
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1591
0
        mbmi->interp_filters =
1592
0
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1593
0
      }
1594
0
    }
1595
1596
0
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1597
0
    if (mode_index == 0) {
1598
0
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1599
0
    }
1600
0
    if (mode_index == 0 || tmp_rd < best_rd) {
1601
      // Update best_rd data if this is the best motion mode so far
1602
0
      best_mbmi = *mbmi;
1603
0
      best_rd = tmp_rd;
1604
0
      best_rd_stats = *rd_stats;
1605
0
      best_rd_stats_y = *rd_stats_y;
1606
0
      best_rate_mv = tmp_rate_mv;
1607
0
      *yrd = this_yrd;
1608
0
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1609
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
1610
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1611
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1612
0
      best_xskip_txfm = mbmi->skip_txfm;
1613
0
    }
1614
0
  }
1615
  // Update RD and mbmi stats for selected motion mode
1616
0
  mbmi->ref_frame[1] = ref_frame_1;
1617
0
  *rate_mv = best_rate_mv;
1618
0
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1619
0
    av1_invalid_rd_stats(rd_stats);
1620
0
    restore_dst_buf(xd, *orig_dst, num_planes);
1621
0
    return INT64_MAX;
1622
0
  }
1623
0
  *mbmi = best_mbmi;
1624
0
  *rd_stats = best_rd_stats;
1625
0
  *rd_stats_y = best_rd_stats_y;
1626
0
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1627
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
1628
0
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1629
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1630
0
  txfm_info->skip_txfm = best_xskip_txfm;
1631
1632
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1633
0
  return 0;
1634
0
}
1635
1636
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1637
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1638
0
                            const BUFFER_SET *const orig_dst) {
1639
0
  assert(bsize < BLOCK_SIZES_ALL);
1640
0
  const AV1_COMMON *cm = &cpi->common;
1641
0
  const int num_planes = av1_num_planes(cm);
1642
0
  MACROBLOCKD *const xd = &x->e_mbd;
1643
0
  const int mi_row = xd->mi_row;
1644
0
  const int mi_col = xd->mi_col;
1645
0
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1646
0
                                av1_num_planes(cm) - 1);
1647
1648
0
  int64_t total_sse = 0;
1649
0
  for (int plane = 0; plane < num_planes; ++plane) {
1650
0
    const struct macroblock_plane *const p = &x->plane[plane];
1651
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1652
0
    const BLOCK_SIZE plane_bsize =
1653
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1654
0
    const int bw = block_size_wide[plane_bsize];
1655
0
    const int bh = block_size_high[plane_bsize];
1656
1657
0
    av1_subtract_plane(x, plane_bsize, plane);
1658
0
    int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1659
0
    total_sse += sse;
1660
0
  }
1661
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1662
0
  rd_stats->dist = rd_stats->sse = total_sse;
1663
0
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1664
0
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1665
1666
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1667
0
  return 0;
1668
0
}
1669
1670
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1671
// mode
1672
static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1673
                                      int ref_idx,
1674
                                      const MV_REFERENCE_FRAME *ref_frame,
1675
0
                                      PREDICTION_MODE single_mode) {
1676
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1677
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1678
0
  assert(single_mode != NEWMV);
1679
0
  if (single_mode == NEARESTMV) {
1680
0
    return 0;
1681
0
  } else if (single_mode == NEARMV) {
1682
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1683
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1684
0
    if (ref_mv_count < 2) return 1;
1685
0
  } else if (single_mode == GLOBALMV) {
1686
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1687
0
    if (ref_mv_count == 0) return 1;
1688
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1689
0
    else if (ref_mv_count == 1)
1690
0
      return 0;
1691
1692
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1693
    // Check GLOBALMV is matching with any mv in ref_mv_stack
1694
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1695
0
      int_mv this_mv;
1696
1697
0
      if (ref_idx == 0)
1698
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1699
0
      else
1700
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1701
1702
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1703
0
        return 1;
1704
0
    }
1705
0
  }
1706
0
  return 0;
1707
0
}
1708
1709
static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1710
                              int ref_idx, int ref_mv_idx,
1711
                              int skip_repeated_ref_mv,
1712
                              const MV_REFERENCE_FRAME *ref_frame,
1713
0
                              const MB_MODE_INFO_EXT *mbmi_ext) {
1714
0
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1715
0
  assert(is_inter_singleref_mode(single_mode));
1716
0
  if (single_mode == NEWMV) {
1717
0
    this_mv->as_int = INVALID_MV;
1718
0
  } else if (single_mode == GLOBALMV) {
1719
0
    if (skip_repeated_ref_mv &&
1720
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1721
0
      return 0;
1722
0
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1723
0
  } else {
1724
0
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
1725
0
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1726
0
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1727
0
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1728
0
      assert(ref_mv_offset >= 0);
1729
0
      if (ref_idx == 0) {
1730
0
        *this_mv =
1731
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1732
0
      } else {
1733
0
        *this_mv =
1734
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1735
0
      }
1736
0
    } else {
1737
0
      if (skip_repeated_ref_mv &&
1738
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1739
0
        return 0;
1740
0
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1741
0
    }
1742
0
  }
1743
0
  return 1;
1744
0
}
1745
1746
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1747
// population
1748
static INLINE int skip_nearest_near_mv_using_refmv_weight(
1749
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1750
0
    const int8_t ref_frame_type) {
1751
0
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1752
1753
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1754
0
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1755
0
  const int ref_mv_count =
1756
0
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1757
1758
0
  if (ref_mv_count == 0) return 0;
1759
  // If ref mv list has atleast one nearest candidate do not prune NEARESTMV
1760
0
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1761
1762
  // Count number of ref mvs populated from nearest candidates
1763
0
  int nearest_refmv_count = 0;
1764
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1765
0
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1766
0
  }
1767
1768
  // nearest_refmv_count indicates the closeness of block motion characteristics
1769
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1770
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1771
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1772
  // mode since these modes work well for blocks that shares similar motion
1773
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1774
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1775
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
1776
0
  const int prune_thresh = 1 + (ref_mv_count >= 2);
1777
0
  if (nearest_refmv_count < prune_thresh) return 1;
1778
0
  return 0;
1779
0
}
1780
1781
// This function update the non-new mv for the current prediction mode
1782
static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1783
                               const AV1_COMMON *cm, const MACROBLOCK *x,
1784
0
                               int skip_repeated_ref_mv) {
1785
0
  const MACROBLOCKD *xd = &x->e_mbd;
1786
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
1787
0
  const int is_comp_pred = has_second_ref(mbmi);
1788
1789
0
  int ret = 1;
1790
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
1791
0
    int_mv this_mv;
1792
0
    this_mv.as_int = INVALID_MV;
1793
0
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1794
0
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1795
0
    if (!ret) return 0;
1796
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1797
0
    if (single_mode == NEWMV) {
1798
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1799
0
      cur_mv[i] =
1800
0
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1801
0
                         .this_mv
1802
0
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1803
0
                         .comp_mv;
1804
0
    } else {
1805
0
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1806
0
    }
1807
0
  }
1808
0
  return ret;
1809
0
}
1810
1811
static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1812
                               const MB_MODE_INFO_EXT *mbmi_ext,
1813
                               const int (*const drl_mode_cost0)[2],
1814
0
                               int8_t ref_frame_type) {
1815
0
  int cost = 0;
1816
0
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1817
0
    for (int idx = 0; idx < 2; ++idx) {
1818
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1819
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1820
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1821
0
        if (mbmi->ref_mv_idx == idx) return cost;
1822
0
      }
1823
0
    }
1824
0
    return cost;
1825
0
  }
1826
1827
0
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
1828
0
    for (int idx = 1; idx < 3; ++idx) {
1829
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1830
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1831
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1832
0
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1833
0
      }
1834
0
    }
1835
0
    return cost;
1836
0
  }
1837
0
  return cost;
1838
0
}
1839
1840
static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1841
                                        const MB_MODE_INFO *const mbmi,
1842
0
                                        PREDICTION_MODE this_mode) {
1843
0
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1844
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1845
0
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1846
0
    if (single_mode == NEWMV &&
1847
0
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1848
0
      return 0;
1849
0
    }
1850
0
  }
1851
0
  return 1;
1852
0
}
1853
1854
static int get_drl_refmv_count(const MACROBLOCK *const x,
1855
                               const MV_REFERENCE_FRAME *ref_frame,
1856
0
                               PREDICTION_MODE mode) {
1857
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1858
0
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1859
0
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1860
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1861
0
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1862
0
  const int has_drl =
1863
0
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1864
0
  const int ref_set =
1865
0
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1866
1867
0
  return ref_set;
1868
0
}
1869
1870
// Checks if particular ref_mv_idx should be pruned.
1871
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1872
                                         const int qindex,
1873
0
                                         const int ref_mv_idx) {
1874
0
  if (reduce_inter_modes >= 3) return 1;
1875
  // Q-index logic based pruning is enabled only for
1876
  // reduce_inter_modes = 2.
1877
0
  assert(reduce_inter_modes == 2);
1878
  // When reduce_inter_modes=2, pruning happens as below based on q index.
1879
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1880
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1881
  // For q index range between 171 and 255: no pruning.
1882
0
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1883
0
  return (ref_mv_idx >= min_prune_ref_mv_idx);
1884
0
}
1885
1886
// Whether this reference motion vector can be skipped, based on initial
1887
// heuristics.
1888
static bool ref_mv_idx_early_breakout(
1889
    const SPEED_FEATURES *const sf,
1890
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1891
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
1892
0
    int ref_mv_idx) {
1893
0
  MACROBLOCKD *xd = &x->e_mbd;
1894
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1895
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1896
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1897
0
  const int is_comp_pred = has_second_ref(mbmi);
1898
0
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1899
0
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
1900
0
        mbmi->ref_frame[0] == LAST3_FRAME ||
1901
0
        mbmi->ref_frame[1] == LAST2_FRAME ||
1902
0
        mbmi->ref_frame[1] == LAST3_FRAME) {
1903
0
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1904
0
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1905
0
          REF_CAT_LEVEL) {
1906
0
        return true;
1907
0
      }
1908
0
    }
1909
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
1910
0
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1911
0
        have_newmv_in_inter_mode(mbmi->mode)) {
1912
0
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1913
0
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1914
0
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1915
0
        const int do_prune = prune_ref_mv_idx_using_qindex(
1916
0
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1917
0
        if (do_prune &&
1918
0
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1919
0
             REF_CAT_LEVEL)) {
1920
0
          return true;
1921
0
        }
1922
0
      }
1923
0
    }
1924
0
  }
1925
1926
0
  mbmi->ref_mv_idx = ref_mv_idx;
1927
0
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1928
0
    return true;
1929
0
  }
1930
0
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1931
0
  const int drl_cost = get_drl_cost(
1932
0
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1933
0
  est_rd_rate += drl_cost;
1934
0
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1935
0
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1936
0
    return true;
1937
0
  }
1938
0
  return false;
1939
0
}
1940
1941
// Compute the estimated RD cost for the motion vector with simple translation.
1942
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1943
                                          RD_STATS *rd_stats,
1944
                                          HandleInterModeArgs *args,
1945
                                          int ref_mv_idx, int64_t ref_best_rd,
1946
0
                                          BLOCK_SIZE bsize) {
1947
0
  MACROBLOCKD *xd = &x->e_mbd;
1948
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1949
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1950
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1951
0
  const AV1_COMMON *cm = &cpi->common;
1952
0
  const int is_comp_pred = has_second_ref(mbmi);
1953
0
  const ModeCosts *mode_costs = &x->mode_costs;
1954
1955
0
  struct macroblockd_plane *p = xd->plane;
1956
0
  const BUFFER_SET orig_dst = {
1957
0
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1958
0
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1959
0
  };
1960
0
  av1_init_rd_stats(rd_stats);
1961
1962
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1963
0
  mbmi->comp_group_idx = 0;
1964
0
  mbmi->compound_idx = 1;
1965
0
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
1966
0
    mbmi->ref_frame[1] = NONE_FRAME;
1967
0
  }
1968
0
  int16_t mode_ctx =
1969
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1970
1971
0
  mbmi->num_proj_ref = 0;
1972
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
1973
0
  mbmi->ref_mv_idx = ref_mv_idx;
1974
1975
0
  rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1976
0
  const int drl_cost =
1977
0
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1978
0
  rd_stats->rate += drl_cost;
1979
1980
0
  int_mv cur_mv[2];
1981
0
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1982
0
    return INT64_MAX;
1983
0
  }
1984
0
  assert(have_nearmv_in_inter_mode(mbmi->mode));
1985
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
1986
0
    mbmi->mv[i].as_int = cur_mv[i].as_int;
1987
0
  }
1988
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1989
0
  rd_stats->rate += ref_mv_cost;
1990
1991
0
  if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1992
0
    return INT64_MAX;
1993
0
  }
1994
1995
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
1996
0
  mbmi->num_proj_ref = 0;
1997
0
  if (is_comp_pred) {
1998
    // Only compound_average
1999
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2000
0
    mbmi->comp_group_idx = 0;
2001
0
    mbmi->compound_idx = 1;
2002
0
  }
2003
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2004
2005
0
  const int mi_row = xd->mi_row;
2006
0
  const int mi_col = xd->mi_col;
2007
0
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2008
0
                                AOM_PLANE_Y, AOM_PLANE_Y);
2009
0
  int est_rate;
2010
0
  int64_t est_dist;
2011
0
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2012
0
                                  NULL, NULL, NULL, NULL, NULL);
2013
0
  return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2014
0
}
2015
2016
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2017
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2018
// it is included.
2019
0
static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2020
2021
0
static INLINE bool mask_check_bit(int mask, int index) {
2022
0
  return (mask >> index) & 0x1;
2023
0
}
2024
2025
// Before performing the full MV search in handle_inter_mode, do a simple
2026
// translation search and see if we can eliminate any motion vectors.
2027
// Returns an integer where, if the i-th bit is set, it means that the i-th
2028
// motion vector should be searched. This is only set for NEAR_MV.
2029
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2030
                                RD_STATS *rd_stats,
2031
                                HandleInterModeArgs *const args,
2032
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2033
0
                                const int ref_set) {
2034
0
  AV1_COMMON *const cm = &cpi->common;
2035
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2036
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2037
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2038
2039
  // Only search indices if they have some chance of being good.
2040
0
  int good_indices = 0;
2041
0
  for (int i = 0; i < ref_set; ++i) {
2042
0
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2043
0
                                  ref_best_rd, i)) {
2044
0
      continue;
2045
0
    }
2046
0
    mask_set_bit(&good_indices, i);
2047
0
  }
2048
2049
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2050
  // is large enough. If these conditions are not met, return all good indices
2051
  // found so far.
2052
0
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2053
0
    return good_indices;
2054
0
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2055
0
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2056
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2057
  // so b/2384 can be resolved.
2058
0
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2059
0
      (mbmi->ref_frame[1] > 0 &&
2060
0
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2061
0
    return good_indices;
2062
0
  }
2063
2064
  // Calculate the RD cost for the motion vectors using simple translation.
2065
0
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2066
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2067
    // If this index is bad, ignore it.
2068
0
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2069
0
      continue;
2070
0
    }
2071
0
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2072
0
        cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2073
0
  }
2074
  // Find the index with the best RD cost.
2075
0
  int best_idx = 0;
2076
0
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2077
0
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2078
0
      best_idx = i;
2079
0
    }
2080
0
  }
2081
  // Only include indices that are good and within a % of the best.
2082
0
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2083
  // If the simple translation cost is not within this multiple of the
2084
  // best RD, skip it. Note that the cutoff is derived experimentally.
2085
0
  const double ref_dth = 5;
2086
0
  int result = 0;
2087
0
  for (int i = 0; i < ref_set; ++i) {
2088
0
    if (mask_check_bit(good_indices, i) &&
2089
0
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2090
0
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2091
0
      mask_set_bit(&result, i);
2092
0
    }
2093
0
  }
2094
0
  return result;
2095
0
}
2096
2097
/*!\brief Motion mode information for inter mode search speedup.
2098
 *
2099
 * Used in a speed feature to search motion modes other than
2100
 * SIMPLE_TRANSLATION only on winning candidates.
2101
 */
2102
typedef struct motion_mode_candidate {
2103
  /*!
2104
   * Mode info for the motion mode candidate.
2105
   */
2106
  MB_MODE_INFO mbmi;
2107
  /*!
2108
   * Rate describing the cost of the motion vectors for this candidate.
2109
   */
2110
  int rate_mv;
2111
  /*!
2112
   * Rate before motion mode search and transform coding is applied.
2113
   */
2114
  int rate2_nocoeff;
2115
  /*!
2116
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2117
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2118
   * candidate.
2119
   */
2120
  int skip_motion_mode;
2121
  /*!
2122
   * Total RD cost for this candidate.
2123
   */
2124
  int64_t rd_cost;
2125
} motion_mode_candidate;
2126
2127
/*!\cond */
2128
typedef struct motion_mode_best_st_candidate {
2129
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2130
  int num_motion_mode_cand;
2131
} motion_mode_best_st_candidate;
2132
2133
// Checks if the current reference frame matches with neighbouring block's
2134
// (top/left) reference frames
2135
static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2136
0
                                                   MB_MODE_INFO *nb_mbmi) {
2137
0
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2138
0
                                          nb_mbmi->ref_frame[1] };
2139
0
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2140
0
                                           cur_mbmi->ref_frame[1] };
2141
0
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2142
0
  int match_found = 0;
2143
2144
0
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2145
0
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2146
0
        (cur_ref_frames[i] == nb_ref_frames[1]))
2147
0
      match_found = 1;
2148
0
  }
2149
0
  return match_found;
2150
0
}
2151
2152
static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2153
0
                                                  MACROBLOCKD *xd) {
2154
0
  if (!xd->up_available) return 1;
2155
0
  const int mi_col = xd->mi_col;
2156
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2157
  // prev_row_mi points into the mi array, starting at the beginning of the
2158
  // previous row.
2159
0
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2160
0
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2161
0
  uint8_t mi_step;
2162
0
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2163
0
       above_mi_col += mi_step) {
2164
0
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2165
0
    mi_step = mi_size_wide[above_mi[0]->bsize];
2166
0
    int match_found = 0;
2167
0
    if (is_inter_block(*above_mi))
2168
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2169
0
    if (match_found) return 1;
2170
0
  }
2171
0
  return 0;
2172
0
}
2173
2174
static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2175
0
                                                 MACROBLOCKD *xd) {
2176
0
  if (!xd->left_available) return 1;
2177
0
  const int mi_row = xd->mi_row;
2178
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2179
  // prev_col_mi points into the mi array, starting at the top of the
2180
  // previous column
2181
0
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2182
0
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2183
0
  uint8_t mi_step;
2184
0
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2185
0
       left_mi_row += mi_step) {
2186
0
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2187
0
    mi_step = mi_size_high[left_mi[0]->bsize];
2188
0
    int match_found = 0;
2189
0
    if (is_inter_block(*left_mi))
2190
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2191
0
    if (match_found) return 1;
2192
0
  }
2193
0
  return 0;
2194
0
}
2195
/*!\endcond */
2196
2197
/*! \brief Struct used to hold TPL data to
2198
 * narrow down parts of the inter mode search.
2199
 */
2200
typedef struct {
2201
  /*!
2202
   * The best inter cost out of all of the reference frames.
2203
   */
2204
  int64_t best_inter_cost;
2205
  /*!
2206
   * The inter cost for each reference frame.
2207
   */
2208
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2209
} PruneInfoFromTpl;
2210
2211
#if !CONFIG_REALTIME_ONLY
2212
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2213
static AOM_INLINE void get_block_level_tpl_stats(
2214
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2215
0
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2216
0
  AV1_COMMON *const cm = &cpi->common;
2217
2218
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2219
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2220
0
  const int tpl_idx = cpi->gf_frame_index;
2221
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2222
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2223
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2224
0
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2225
0
  const int mi_wide = mi_size_wide[bsize];
2226
0
  const int mi_high = mi_size_high[bsize];
2227
0
  const int tpl_stride = tpl_frame->stride;
2228
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2229
0
  const int mi_col_sr =
2230
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2231
0
  const int mi_col_end_sr =
2232
0
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2233
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2234
2235
0
  const int row_step = step;
2236
0
  const int col_step_sr =
2237
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2238
0
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2239
0
       row += row_step) {
2240
0
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2241
0
         col += col_step_sr) {
2242
0
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2243
0
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2244
2245
      // Sums up the inter cost of corresponding ref frames
2246
0
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2247
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2248
0
            this_stats->pred_error[ref_idx];
2249
0
      }
2250
0
    }
2251
0
  }
2252
2253
  // Computes the best inter cost (minimum inter_cost)
2254
0
  int64_t best_inter_cost = INT64_MAX;
2255
0
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2256
0
    const int64_t cur_inter_cost =
2257
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2258
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2259
    // calculating the minimum inter_cost
2260
0
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2261
0
        valid_refs[ref_idx])
2262
0
      best_inter_cost = cur_inter_cost;
2263
0
  }
2264
0
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2265
0
}
2266
#endif
2267
2268
static AOM_INLINE int prune_modes_based_on_tpl_stats(
2269
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2270
0
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2271
0
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2272
0
  if ((prune_mode_level < 2) && have_newmv) return 0;
2273
2274
0
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2275
0
  if (best_inter_cost == INT64_MAX) return 0;
2276
2277
0
  const int prune_level = prune_mode_level - 1;
2278
0
  int64_t cur_inter_cost;
2279
2280
0
  const int is_globalmv =
2281
0
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2282
0
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2283
2284
  // Thresholds used for pruning:
2285
  // Lower value indicates aggressive pruning and higher value indicates
2286
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2287
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2288
  // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2289
0
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2290
0
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2291
0
  };
2292
2293
0
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2294
0
  if (!is_comp_pred) {
2295
0
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2296
0
  } else {
2297
0
    const int64_t inter_cost_ref0 =
2298
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2299
0
    const int64_t inter_cost_ref1 =
2300
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2301
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2302
    // more aggressive pruning
2303
0
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2304
0
  }
2305
2306
  // Prune the mode if cur_inter_cost is greater than threshold times
2307
  // best_inter_cost
2308
0
  if (cur_inter_cost >
2309
0
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2310
0
        best_inter_cost) >>
2311
0
       2))
2312
0
    return 1;
2313
0
  return 0;
2314
0
}
2315
2316
/*!\brief High level function to select parameters for compound mode.
2317
 *
2318
 * \ingroup inter_mode_search
2319
 * The main search functionality is done in the call to av1_compound_type_rd().
2320
 *
2321
 * \param[in]     cpi               Top-level encoder structure.
2322
 * \param[in]     x                 Pointer to struct holding all the data for
2323
 *                                  the current macroblock.
2324
 * \param[in]     args              HandleInterModeArgs struct holding
2325
 *                                  miscellaneous arguments for inter mode
2326
 *                                  search. See the documentation for this
2327
 *                                  struct for a description of each member.
2328
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2329
 *                                  It is used for early termination of this
2330
 *                                  search if the RD exceeds this value.
2331
 * \param[in,out] cur_mv            Current motion vector.
2332
 * \param[in]     bsize             Current block size.
2333
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2334
                                    compound mode.
2335
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2336
 *                                  allocated buffers for the compound
2337
 *                                  predictors and masks in the compound type
2338
 *                                  search.
2339
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2340
 *                                  prediction. This will eventually hold the
2341
 *                                  final prediction, and the tmp_dst info will
2342
 *                                  be copied here.
2343
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2344
 *                                  computed prediction.
2345
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2346
 *                                  This will be modified if a motion search is
2347
 *                                  done in the motion mode search.
2348
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2349
 *                                  information.
2350
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2351
 *                                  best total RD for a skip mode so far, and
2352
 *                                  skip_rd[1] is the best RD for a skip mode so
2353
 *                                  far in luma. This is used as a speed feature
2354
 *                                  to skip the transform search if the computed
2355
 *                                  skip RD for the current mode is not better
2356
 *                                  than the best skip_rd so far.
2357
 * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2358
 *                                  predictor. If this is 0, the inter predictor
2359
 *                                  has already been built and thus we can avoid
2360
 *                                  repeating computation.
2361
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2362
 * a viable candidate.
2363
 */
2364
static int process_compound_inter_mode(
2365
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2366
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2367
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2368
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2369
0
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2370
0
  MACROBLOCKD *xd = &x->e_mbd;
2371
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2372
0
  const AV1_COMMON *cm = &cpi->common;
2373
0
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2374
0
                                   cm->seq_params->enable_masked_compound;
2375
0
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2376
0
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2377
2378
0
  const int num_planes = av1_num_planes(cm);
2379
0
  const int mi_row = xd->mi_row;
2380
0
  const int mi_col = xd->mi_col;
2381
0
  int is_luma_interp_done = 0;
2382
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2383
2384
0
  int64_t best_rd_compound;
2385
0
  int64_t rd_thresh;
2386
0
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2387
0
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2388
0
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2389
0
                                         comp_type_rd_scale);
2390
  // Select compound type and any parameters related to that type
2391
  // (for example, the mask parameters if it is a masked mode) and compute
2392
  // the RD
2393
0
  *compmode_interinter_cost = av1_compound_type_rd(
2394
0
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2395
0
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2396
0
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2397
0
  if (ref_best_rd < INT64_MAX &&
2398
0
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2399
0
          ref_best_rd) {
2400
0
    restore_dst_buf(xd, *orig_dst, num_planes);
2401
0
    return 1;
2402
0
  }
2403
2404
  // Build only uv predictor for COMPOUND_AVERAGE.
2405
  // Note there is no need to call av1_enc_build_inter_predictor
2406
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2407
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2408
  // rather than the tmp_buf.
2409
0
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2410
0
    if (num_planes > 1) {
2411
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2412
0
                                    AOM_PLANE_U, num_planes - 1);
2413
0
    }
2414
0
    *skip_build_pred = 1;
2415
0
  }
2416
0
  return 0;
2417
0
}
2418
2419
// Speed feature to prune out MVs that are similar to previous MVs if they
2420
// don't achieve the best RD advantage.
2421
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2422
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2423
0
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2424
0
  int i;
2425
0
  const int is_comp_pred = has_second_ref(mbmi);
2426
0
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2427
2428
  // Skip the evaluation if an MV match is found.
2429
0
  if (ref_mv_idx > 0) {
2430
0
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2431
0
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2432
2433
0
      int mv_diff = 0;
2434
0
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2435
0
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2436
0
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2437
0
      }
2438
2439
      // If this mode is not the best one, and current MV is similar to
2440
      // previous stored MV, terminate this ref_mv_idx evaluation.
2441
0
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2442
0
    }
2443
0
  }
2444
2445
0
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2446
0
    for (i = 0; i < is_comp_pred + 1; ++i)
2447
0
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2448
0
  }
2449
2450
0
  return 0;
2451
0
}
2452
2453
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2454
 *
2455
 * \ingroup inter_mode_search
2456
 *
2457
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2458
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2459
 * Else returns 0.
2460
 *
2461
 * Note that the sse of here comes from single_motion_search. So it is
2462
 * interpolated with the filter in motion search, not the actual interpolation
2463
 * filter used in encoding.
2464
 *
2465
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2466
 * \param[in]     x                 Pointer to struct holding all the data for
2467
 *                                  the current macroblock.
2468
 * \param[in]     bsize             The current block_size.
2469
 * \param[in]     args              The args to handle_inter_mode, used to track
2470
 *                                  the best SSE.
2471
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2472
 *                                       prune_zero_mv_with_sse value
2473
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2474
 */
2475
static AOM_INLINE int prune_zero_mv_with_sse(
2476
    const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2477
0
    const HandleInterModeArgs *args, int prune_zero_mv_with_sse) {
2478
0
  const MACROBLOCKD *xd = &x->e_mbd;
2479
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2480
2481
0
  const int is_comp_pred = has_second_ref(mbmi);
2482
0
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2483
2484
  // Check that the global mv is the same as ZEROMV
2485
0
  assert(mbmi->mv[0].as_int == 0);
2486
0
  assert(IMPLIES(is_comp_pred, mbmi->mv[0].as_int == 0));
2487
0
  assert(xd->global_motion[refs[0]].wmtype == TRANSLATION ||
2488
0
         xd->global_motion[refs[0]].wmtype == IDENTITY);
2489
2490
  // Don't prune if we have invalid data
2491
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2492
0
    assert(mbmi->mv[0].as_int == 0);
2493
0
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2494
0
      return 0;
2495
0
    }
2496
0
  }
2497
2498
  // Sum up the sse of ZEROMV and best NEWMV
2499
0
  unsigned int this_sse_sum = 0;
2500
0
  unsigned int best_sse_sum = 0;
2501
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2502
0
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2503
0
    const struct macroblockd_plane *pd = xd->plane;
2504
0
    const struct buf_2d *src_buf = &p->src;
2505
0
    const struct buf_2d *ref_buf = &pd->pre[idx];
2506
0
    const uint8_t *src = src_buf->buf;
2507
0
    const uint8_t *ref = ref_buf->buf;
2508
0
    const int src_stride = src_buf->stride;
2509
0
    const int ref_stride = ref_buf->stride;
2510
2511
0
    unsigned int this_sse;
2512
0
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2513
0
    this_sse_sum += this_sse;
2514
2515
0
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2516
0
    best_sse_sum += best_sse;
2517
0
  }
2518
2519
0
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2520
0
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2521
0
    return 1;
2522
0
  }
2523
2524
0
  return 0;
2525
0
}
2526
2527
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2528
 *
2529
 * \ingroup inter_mode_search
2530
 *
2531
 * Does a simple interpolation filter search during winner mode evaluation. This
2532
 * is currently only used by realtime mode as \ref
2533
 * av1_interpolation_filter_search is not called during realtime encoding.
2534
 *
2535
 * This funciton only searches over two possible filters. EIGHTTAP_REGULAR is
2536
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2537
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2538
 *  *
2539
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2540
 *                                  flags.
2541
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2542
 *                                  used to access the buffers.
2543
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2544
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2545
 * \param[in]     bsize             The current block_size.
2546
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2547
 */
2548
static AOM_INLINE bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2549
                                          int mi_row, int mi_col,
2550
0
                                          BLOCK_SIZE bsize) {
2551
0
  static const InterpFilters filters_ref_set[3] = {
2552
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2553
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2554
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2555
0
  };
2556
2557
0
  const AV1_COMMON *const cm = &cpi->common;
2558
0
  MACROBLOCKD *const xd = &x->e_mbd;
2559
0
  MB_MODE_INFO *const mi = xd->mi[0];
2560
0
  int64_t best_cost = INT64_MAX;
2561
0
  int best_filter_index = -1;
2562
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2563
0
  const int num_planes = av1_num_planes(cm);
2564
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2565
0
  assert(is_inter_mode(mi->mode));
2566
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2567
0
  assert(!is_inter_compound_mode(mi->mode));
2568
2569
0
  if (!av1_is_interp_needed(xd)) {
2570
0
    return false;
2571
0
  }
2572
2573
0
  struct macroblockd_plane *pd = xd->plane;
2574
0
  const BUFFER_SET orig_dst = {
2575
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2576
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2577
0
  };
2578
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2579
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2580
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2581
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2582
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2583
2584
0
  for (int i = 0; i < 3; ++i) {
2585
0
    if (is_240p_or_lesser) {
2586
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2587
0
        continue;
2588
0
      }
2589
0
    } else {
2590
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2591
0
        continue;
2592
0
      }
2593
0
    }
2594
0
    int64_t cost;
2595
0
    RD_STATS tmp_rd = { 0 };
2596
2597
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2598
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2599
2600
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2601
0
                       ? MODELRD_LEGACY
2602
0
                       : MODELRD_TYPE_INTERP_FILTER](
2603
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2604
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2605
2606
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2607
0
                                           cm->seq_params->enable_dual_filter);
2608
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2609
0
    if (cost < best_cost) {
2610
0
      best_filter_index = i;
2611
0
      best_cost = cost;
2612
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2613
0
    }
2614
0
  }
2615
0
  assert(best_filter_index >= 0);
2616
2617
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2618
2619
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2620
2621
0
  if (is_best_pred_in_orig) {
2622
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2623
0
  } else {
2624
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2625
    // is_best_pred_in_orig is false, that means the current buffer is the
2626
    // original one.
2627
0
    assert(&orig_dst == dst_bufs[0]);
2628
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2629
0
    const int width = block_size_wide[bsize];
2630
0
    const int height = block_size_high[bsize];
2631
0
#if CONFIG_AV1_HIGHBITDEPTH
2632
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2633
0
    if (is_hbd) {
2634
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2635
0
                               tmp_dst.stride[AOM_PLANE_Y],
2636
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2637
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2638
0
    } else {
2639
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2640
0
                        orig_dst.plane[AOM_PLANE_Y],
2641
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2642
0
    }
2643
#else
2644
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2645
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2646
                      width, height);
2647
#endif
2648
0
  }
2649
2650
  // Build the YUV predictor.
2651
0
  if (num_planes > 1) {
2652
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2653
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2654
0
  }
2655
2656
0
  return true;
2657
0
}
2658
2659
/*!\brief AV1 inter mode RD computation
2660
 *
2661
 * \ingroup inter_mode_search
2662
 * Do the RD search for a given inter mode and compute all information relevant
2663
 * to the input mode. It will compute the best MV,
2664
 * compound parameters (if the mode is a compound mode) and interpolation filter
2665
 * parameters.
2666
 *
2667
 * \param[in]     cpi               Top-level encoder structure.
2668
 * \param[in]     tile_data         Pointer to struct holding adaptive
2669
 *                                  data/contexts/models for the tile during
2670
 *                                  encoding.
2671
 * \param[in]     x                 Pointer to structure holding all the data
2672
 *                                  for the current macroblock.
2673
 * \param[in]     bsize             Current block size.
2674
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2675
 *                                  information.
2676
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2677
 *                                  for only the Y plane.
2678
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2679
 *                                  for only the UV planes.
2680
 * \param[in]     args              HandleInterModeArgs struct holding
2681
 *                                  miscellaneous arguments for inter mode
2682
 *                                  search. See the documentation for this
2683
 *                                  struct for a description of each member.
2684
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2685
 *                                  It is used for early termination of this
2686
 *                                  search if the RD exceeds this value.
2687
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2688
 *                                  built in this search.
2689
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2690
 *                                  allocated buffers for the compound
2691
 *                                  predictors and masks in the compound type
2692
 *                                  search.
2693
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2694
 *                                  do_tx_search (see below) is 0.
2695
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2696
 *                                  a full transform search. This will compute
2697
 *                                  an estimated RD for the modes without the
2698
 *                                  transform search and later perform the full
2699
 *                                  transform search on the best candidates.
2700
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2701
 *                                  information to perform a full transform
2702
 *                                  search only on winning candidates searched
2703
 *                                  with an estimate for transform coding RD.
2704
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2705
 *                                  motion mode information used in a speed
2706
 *                                  feature to search motion modes other than
2707
 *                                  SIMPLE_TRANSLATION only on winning
2708
 *                                  candidates.
2709
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2710
 *                                  best total RD for a skip mode so far, and
2711
 *                                  skip_rd[1] is the best RD for a skip mode so
2712
 *                                  far in luma. This is used as a speed feature
2713
 *                                  to skip the transform search if the computed
2714
 *                                  skip RD for the current mode is not better
2715
 *                                  than the best skip_rd so far.
2716
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2717
 *                                         narrow down the search based on data
2718
 *                                         collected in the TPL model.
2719
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
2720
 *                                  the luma plane.
2721
 *
2722
 * \return The RD cost for the mode being searched.
2723
 */
2724
static int64_t handle_inter_mode(
2725
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2726
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2727
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2728
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2729
    int64_t *best_est_rd, const int do_tx_search,
2730
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2731
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2732
0
    int64_t *yrd) {
2733
0
  const AV1_COMMON *cm = &cpi->common;
2734
0
  const int num_planes = av1_num_planes(cm);
2735
0
  MACROBLOCKD *xd = &x->e_mbd;
2736
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2737
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2738
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2739
0
  const int is_comp_pred = has_second_ref(mbmi);
2740
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2741
2742
#if CONFIG_REALTIME_ONLY
2743
  const int prune_modes_based_on_tpl = 0;
2744
#else   // CONFIG_REALTIME_ONLY
2745
0
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2746
0
  const int prune_modes_based_on_tpl =
2747
0
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2748
0
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2749
0
#endif  // CONFIG_REALTIME_ONLY
2750
0
  int i;
2751
  // Reference frames for this mode
2752
0
  const int refs[2] = { mbmi->ref_frame[0],
2753
0
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2754
0
  int rate_mv = 0;
2755
0
  int64_t rd = INT64_MAX;
2756
  // Do first prediction into the destination buffer. Do the next
2757
  // prediction into a temporary buffer. Then keep track of which one
2758
  // of these currently holds the best predictor, and use the other
2759
  // one for future predictions. In the end, copy from tmp_buf to
2760
  // dst if necessary.
2761
0
  struct macroblockd_plane *pd = xd->plane;
2762
0
  const BUFFER_SET orig_dst = {
2763
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2764
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2765
0
  };
2766
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2767
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2768
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2769
2770
0
  int64_t ret_val = INT64_MAX;
2771
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2772
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2773
0
  int64_t best_rd = INT64_MAX;
2774
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2775
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2776
0
  int64_t best_yrd = INT64_MAX;
2777
0
  MB_MODE_INFO best_mbmi = *mbmi;
2778
0
  int best_xskip_txfm = 0;
2779
0
  int64_t newmv_ret_val = INT64_MAX;
2780
0
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2781
2782
  // Do not prune the mode based on inter cost from tpl if the current ref frame
2783
  // is the winner ref in neighbouring blocks.
2784
0
  int ref_match_found_in_above_nb = 0;
2785
0
  int ref_match_found_in_left_nb = 0;
2786
0
  if (prune_modes_based_on_tpl) {
2787
0
    ref_match_found_in_above_nb =
2788
0
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2789
0
    ref_match_found_in_left_nb =
2790
0
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2791
0
  }
2792
2793
  // First, perform a simple translation search for each of the indices. If
2794
  // an index performs well, it will be fully searched in the main loop
2795
  // of this function.
2796
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2797
  // Save MV results from first 2 ref_mv_idx.
2798
0
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2799
0
  int best_ref_mv_idx = -1;
2800
0
  const int idx_mask =
2801
0
      ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2802
0
  const int16_t mode_ctx =
2803
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2804
0
  const ModeCosts *mode_costs = &x->mode_costs;
2805
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2806
0
  const int base_rate =
2807
0
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2808
2809
  // As per the experiments, in real-time preset impact of model rd based
2810
  // breakouts is less on encoding time if the following conditions are true.
2811
  //    (1) compound mode is disabled
2812
  //    (2) interpolation filter search is disabled
2813
  // TODO(any): Check the impact of model rd based breakouts in other presets
2814
0
  const int skip_interp_search_modelrd_calc =
2815
0
      cpi->oxcf.mode == REALTIME &&
2816
0
      cm->current_frame.reference_mode == SINGLE_REFERENCE &&
2817
0
      (cpi->sf.rt_sf.skip_interp_filter_search ||
2818
0
       cpi->sf.winner_mode_sf.winner_mode_ifs);
2819
2820
0
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2821
0
    save_mv[i][0].as_int = INVALID_MV;
2822
0
    save_mv[i][1].as_int = INVALID_MV;
2823
0
  }
2824
2825
  // Main loop of this function. This will  iterate over all of the ref mvs
2826
  // in the dynamic reference list and do the following:
2827
  //    1.) Get the current MV. Create newmv MV if necessary
2828
  //    2.) Search compound type and parameters if applicable
2829
  //    3.) Do interpolation filter search
2830
  //    4.) Build the inter predictor
2831
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2832
  //        WARPED_CAUSAL)
2833
  //    6.) Update stats if best so far
2834
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2835
0
    mbmi->ref_mv_idx = ref_mv_idx;
2836
2837
0
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2838
0
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2839
0
    const int drl_cost = get_drl_cost(
2840
0
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2841
0
    mode_info[ref_mv_idx].drl_cost = drl_cost;
2842
0
    mode_info[ref_mv_idx].skip = 0;
2843
2844
0
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2845
      // MV did not perform well in simple translation search. Skip it.
2846
0
      continue;
2847
0
    }
2848
0
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2849
0
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2850
      // Skip mode if TPL model indicates it will not be beneficial.
2851
0
      if (prune_modes_based_on_tpl_stats(
2852
0
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2853
0
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2854
0
        continue;
2855
0
    }
2856
0
    av1_init_rd_stats(rd_stats);
2857
2858
    // Initialize compound mode data
2859
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2860
0
    mbmi->comp_group_idx = 0;
2861
0
    mbmi->compound_idx = 1;
2862
0
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2863
2864
0
    mbmi->num_proj_ref = 0;
2865
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
2866
2867
    // Compute cost for signalling this DRL index
2868
0
    rd_stats->rate = base_rate;
2869
0
    rd_stats->rate += drl_cost;
2870
2871
0
    int rs = 0;
2872
0
    int compmode_interinter_cost = 0;
2873
2874
0
    int_mv cur_mv[2];
2875
2876
    // TODO(Cherma): Extend this speed feature to support compound mode
2877
0
    int skip_repeated_ref_mv =
2878
0
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2879
    // Generate the current mv according to the prediction mode
2880
0
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2881
0
      continue;
2882
0
    }
2883
2884
    // The above call to build_cur_mv does not handle NEWMV modes. Build
2885
    // the mv here if we have NEWMV for any predictors.
2886
0
    if (have_newmv_in_inter_mode(this_mode)) {
2887
#if CONFIG_COLLECT_COMPONENT_TIMING
2888
      start_timing(cpi, handle_newmv_time);
2889
#endif
2890
0
      newmv_ret_val =
2891
0
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2892
#if CONFIG_COLLECT_COMPONENT_TIMING
2893
      end_timing(cpi, handle_newmv_time);
2894
#endif
2895
2896
0
      if (newmv_ret_val != 0) continue;
2897
2898
0
      if (is_inter_singleref_mode(this_mode) &&
2899
0
          cur_mv[0].as_int != INVALID_MV) {
2900
0
        const MV_REFERENCE_FRAME ref = refs[0];
2901
0
        const unsigned int this_sse = x->pred_sse[ref];
2902
0
        if (this_sse < args->best_single_sse_in_refs[ref]) {
2903
0
          args->best_single_sse_in_refs[ref] = this_sse;
2904
0
        }
2905
2906
0
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2907
0
          const double scale_factor[11] = { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8,
2908
0
                                            0.8, 0.9, 0.9, 0.9, 0.9 };
2909
0
          assert(num_pels_log2_lookup[bsize] >= 4);
2910
0
          if (args->best_pred_sse <
2911
0
              scale_factor[num_pels_log2_lookup[bsize] - 4] * this_sse)
2912
0
            continue;
2913
0
        }
2914
0
      }
2915
2916
0
      rd_stats->rate += rate_mv;
2917
0
    }
2918
    // Copy the motion vector for this mode into mbmi struct
2919
0
    for (i = 0; i < is_comp_pred + 1; ++i) {
2920
0
      mbmi->mv[i].as_int = cur_mv[i].as_int;
2921
0
    }
2922
2923
0
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2924
0
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2925
0
      continue;
2926
0
    }
2927
2928
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2929
    // is enabled, and the current MV is similar to a previous one.
2930
0
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2931
0
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2932
0
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
2933
0
      continue;
2934
2935
0
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2936
0
        cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH &&
2937
0
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2938
0
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2939
0
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2940
0
        continue;
2941
0
      }
2942
0
    }
2943
2944
0
    int skip_build_pred = 0;
2945
0
    const int mi_row = xd->mi_row;
2946
0
    const int mi_col = xd->mi_col;
2947
2948
    // Handle a compound predictor, continue if it is determined this
2949
    // cannot be the best compound mode
2950
0
    if (is_comp_pred) {
2951
#if CONFIG_COLLECT_COMPONENT_TIMING
2952
      start_timing(cpi, compound_type_rd_time);
2953
#endif
2954
0
      const int not_best_mode = process_compound_inter_mode(
2955
0
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2956
0
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2957
0
          &skip_build_pred);
2958
#if CONFIG_COLLECT_COMPONENT_TIMING
2959
      end_timing(cpi, compound_type_rd_time);
2960
#endif
2961
0
      if (not_best_mode) continue;
2962
0
    }
2963
2964
0
    if (!skip_interp_search_modelrd_calc) {
2965
#if CONFIG_COLLECT_COMPONENT_TIMING
2966
      start_timing(cpi, interpolation_filter_search_time);
2967
#endif
2968
      // Determine the interpolation filter for this mode
2969
0
      ret_val = av1_interpolation_filter_search(
2970
0
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2971
0
          &skip_build_pred, args, ref_best_rd);
2972
#if CONFIG_COLLECT_COMPONENT_TIMING
2973
      end_timing(cpi, interpolation_filter_search_time);
2974
#endif
2975
0
      if (args->modelled_rd != NULL && !is_comp_pred) {
2976
0
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2977
0
      }
2978
0
      if (ret_val != 0) {
2979
0
        restore_dst_buf(xd, orig_dst, num_planes);
2980
0
        continue;
2981
0
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2982
0
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2983
0
        restore_dst_buf(xd, orig_dst, num_planes);
2984
0
        continue;
2985
0
      }
2986
2987
      // Compute modelled RD if enabled
2988
0
      if (args->modelled_rd != NULL) {
2989
0
        if (is_comp_pred) {
2990
0
          const int mode0 = compound_ref0_mode(this_mode);
2991
0
          const int mode1 = compound_ref1_mode(this_mode);
2992
0
          const int64_t mrd =
2993
0
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
2994
0
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
2995
0
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
2996
0
            restore_dst_buf(xd, orig_dst, num_planes);
2997
0
            continue;
2998
0
          }
2999
0
        }
3000
0
      }
3001
0
    }
3002
3003
0
    rd_stats->rate += compmode_interinter_cost;
3004
0
    if (skip_build_pred != 1) {
3005
      // Build this inter predictor if it has not been previously built
3006
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3007
0
                                    av1_num_planes(cm) - 1);
3008
0
    }
3009
3010
#if CONFIG_COLLECT_COMPONENT_TIMING
3011
    start_timing(cpi, motion_mode_rd_time);
3012
#endif
3013
0
    int rate2_nocoeff = rd_stats->rate;
3014
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3015
    // OBMC_CAUSAL or WARPED_CAUSAL
3016
0
    int64_t this_yrd;
3017
0
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3018
0
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3019
0
                             &orig_dst, best_est_rd, do_tx_search,
3020
0
                             inter_modes_info, 0, &this_yrd);
3021
#if CONFIG_COLLECT_COMPONENT_TIMING
3022
    end_timing(cpi, motion_mode_rd_time);
3023
#endif
3024
0
    assert(
3025
0
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3026
3027
0
    if (ret_val != INT64_MAX) {
3028
0
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3029
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
3030
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3031
      // Collect mode stats for multiwinner mode processing
3032
0
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3033
0
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3034
0
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3035
0
                              do_tx_search);
3036
0
      if (tmp_rd < best_rd) {
3037
0
        best_yrd = this_yrd;
3038
        // Update the best rd stats if we found the best mode so far
3039
0
        best_rd_stats = *rd_stats;
3040
0
        best_rd_stats_y = *rd_stats_y;
3041
0
        best_rd_stats_uv = *rd_stats_uv;
3042
0
        best_rd = tmp_rd;
3043
0
        best_mbmi = *mbmi;
3044
0
        best_xskip_txfm = txfm_info->skip_txfm;
3045
0
        memcpy(best_blk_skip, txfm_info->blk_skip,
3046
0
               sizeof(best_blk_skip[0]) * xd->height * xd->width);
3047
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3048
0
                       xd->height * xd->width);
3049
0
        motion_mode_cand->rate_mv = rate_mv;
3050
0
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3051
0
      }
3052
3053
0
      if (tmp_rd < ref_best_rd) {
3054
0
        ref_best_rd = tmp_rd;
3055
0
        best_ref_mv_idx = ref_mv_idx;
3056
0
      }
3057
0
    }
3058
0
    restore_dst_buf(xd, orig_dst, num_planes);
3059
0
  }
3060
3061
0
  if (best_rd == INT64_MAX) return INT64_MAX;
3062
3063
  // re-instate status of the best choice
3064
0
  *rd_stats = best_rd_stats;
3065
0
  *rd_stats_y = best_rd_stats_y;
3066
0
  *rd_stats_uv = best_rd_stats_uv;
3067
0
  *yrd = best_yrd;
3068
0
  *mbmi = best_mbmi;
3069
0
  txfm_info->skip_txfm = best_xskip_txfm;
3070
0
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3071
0
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3072
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
3073
0
         sizeof(best_blk_skip[0]) * xd->height * xd->width);
3074
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3075
3076
0
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3077
3078
0
  return rd_stats->rdcost;
3079
0
}
3080
3081
/*!\brief Search for the best intrabc predictor
3082
 *
3083
 * \ingroup intra_mode_search
3084
 * \callergraph
3085
 * This function performs a motion search to find the best intrabc predictor.
3086
 *
3087
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3088
 * search before this function).
3089
 */
3090
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3091
                                       PICK_MODE_CONTEXT *ctx,
3092
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3093
0
                                       int64_t best_rd) {
3094
0
  const AV1_COMMON *const cm = &cpi->common;
3095
0
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3096
0
      cpi->sf.rt_sf.use_nonrd_pick_mode)
3097
0
    return INT64_MAX;
3098
0
  const int num_planes = av1_num_planes(cm);
3099
3100
0
  MACROBLOCKD *const xd = &x->e_mbd;
3101
0
  const TileInfo *tile = &xd->tile;
3102
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3103
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3104
3105
0
  const int mi_row = xd->mi_row;
3106
0
  const int mi_col = xd->mi_col;
3107
0
  const int w = block_size_wide[bsize];
3108
0
  const int h = block_size_high[bsize];
3109
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3110
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3111
3112
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3113
0
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3114
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3115
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3116
0
                   mbmi_ext->mode_context);
3117
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3118
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3119
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3120
0
  int_mv nearestmv, nearmv;
3121
0
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3122
0
                                   0);
3123
3124
0
  if (nearestmv.as_int == INVALID_MV) {
3125
0
    nearestmv.as_int = 0;
3126
0
  }
3127
0
  if (nearmv.as_int == INVALID_MV) {
3128
0
    nearmv.as_int = 0;
3129
0
  }
3130
3131
0
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3132
0
  if (dv_ref.as_int == 0) {
3133
0
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3134
0
  }
3135
  // Ref DV should not have sub-pel.
3136
0
  assert((dv_ref.as_mv.col & 7) == 0);
3137
0
  assert((dv_ref.as_mv.row & 7) == 0);
3138
0
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3139
3140
0
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3141
0
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3142
0
  for (int i = 0; i < num_planes; ++i) {
3143
0
    xd->plane[i].pre[0] = yv12_mb[i];
3144
0
  }
3145
3146
0
  enum IntrabcMotionDirection {
3147
0
    IBC_MOTION_ABOVE,
3148
0
    IBC_MOTION_LEFT,
3149
0
    IBC_MOTION_DIRECTIONS
3150
0
  };
3151
3152
0
  MB_MODE_INFO best_mbmi = *mbmi;
3153
0
  RD_STATS best_rdstats = *rd_stats;
3154
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3155
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3156
0
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3157
3158
0
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3159
0
  const search_site_config *lookahead_search_sites =
3160
0
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3161
0
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3162
0
                                     &dv_ref.as_mv, lookahead_search_sites,
3163
0
                                     /*fine_search_interval=*/0);
3164
0
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3165
0
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3166
3167
0
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3168
0
       dir < IBC_MOTION_DIRECTIONS; ++dir) {
3169
0
    switch (dir) {
3170
0
      case IBC_MOTION_ABOVE:
3171
0
        fullms_params.mv_limits.col_min =
3172
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3173
0
        fullms_params.mv_limits.col_max =
3174
0
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3175
0
        fullms_params.mv_limits.row_min =
3176
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3177
0
        fullms_params.mv_limits.row_max =
3178
0
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3179
0
        break;
3180
0
      case IBC_MOTION_LEFT:
3181
0
        fullms_params.mv_limits.col_min =
3182
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3183
0
        fullms_params.mv_limits.col_max =
3184
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3185
        // TODO(aconverse@google.com): Minimize the overlap between above and
3186
        // left areas.
3187
0
        fullms_params.mv_limits.row_min =
3188
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3189
0
        int bottom_coded_mi_edge =
3190
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3191
0
        fullms_params.mv_limits.row_max =
3192
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3193
0
        break;
3194
0
      default: assert(0);
3195
0
    }
3196
0
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3197
0
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3198
0
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3199
0
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3200
3201
0
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3202
3203
0
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3204
0
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3205
0
      continue;
3206
0
    }
3207
3208
0
    const int step_param = cpi->mv_search_params.mv_step_param;
3209
0
    const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3210
0
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3211
0
    int_mv best_mv, best_hash_mv;
3212
3213
0
    int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3214
0
                                        NULL, &best_mv.as_fullmv, NULL);
3215
0
    const int hashsme = av1_intrabc_hash_search(
3216
0
        cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3217
0
    if (hashsme < bestsme) {
3218
0
      best_mv = best_hash_mv;
3219
0
      bestsme = hashsme;
3220
0
    }
3221
3222
0
    if (bestsme == INT_MAX) continue;
3223
0
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3224
0
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3225
0
                                get_fullmv_from_mv(&dv)))
3226
0
      continue;
3227
0
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3228
0
                         cm->seq_params->mib_size_log2))
3229
0
      continue;
3230
3231
    // DV should not have sub-pel.
3232
0
    assert((dv.col & 7) == 0);
3233
0
    assert((dv.row & 7) == 0);
3234
0
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3235
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3236
0
    mbmi->use_intrabc = 1;
3237
0
    mbmi->mode = DC_PRED;
3238
0
    mbmi->uv_mode = UV_DC_PRED;
3239
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3240
0
    mbmi->mv[0].as_mv = dv;
3241
0
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3242
0
    mbmi->skip_txfm = 0;
3243
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3244
0
                                  av1_num_planes(cm) - 1);
3245
3246
    // TODO(aconverse@google.com): The full motion field defining discount
3247
    // in MV_COST_WEIGHT is too large. Explore other values.
3248
0
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3249
0
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3250
0
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3251
0
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3252
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3253
0
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3254
0
      continue;
3255
0
    rd_stats_yuv.rdcost =
3256
0
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3257
0
    if (rd_stats_yuv.rdcost < best_rd) {
3258
0
      best_rd = rd_stats_yuv.rdcost;
3259
0
      best_mbmi = *mbmi;
3260
0
      best_rdstats = rd_stats_yuv;
3261
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
3262
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3263
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3264
0
    }
3265
0
  }
3266
0
  *mbmi = best_mbmi;
3267
0
  *rd_stats = best_rdstats;
3268
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
3269
0
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3270
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3271
#if CONFIG_RD_DEBUG
3272
  mbmi->rd_stats = *rd_stats;
3273
#endif
3274
0
  return best_rd;
3275
0
}
3276
3277
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3278
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3279
// the typedef will prevent doxygen from finding this function and generating
3280
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3281
// doxygen, we can revert back to using the typedefs.
3282
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3283
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3284
0
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3285
0
  const AV1_COMMON *const cm = &cpi->common;
3286
0
  MACROBLOCKD *const xd = &x->e_mbd;
3287
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3288
0
  const int num_planes = av1_num_planes(cm);
3289
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3290
0
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3291
0
  int y_skip_txfm = 0, uv_skip_txfm = 0;
3292
0
  int64_t dist_y = 0, dist_uv = 0;
3293
3294
0
  ctx->rd_stats.skip_txfm = 0;
3295
0
  mbmi->ref_frame[0] = INTRA_FRAME;
3296
0
  mbmi->ref_frame[1] = NONE_FRAME;
3297
0
  mbmi->use_intrabc = 0;
3298
0
  mbmi->mv[0].as_int = 0;
3299
0
  mbmi->skip_mode = 0;
3300
3301
0
  const int64_t intra_yrd =
3302
0
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3303
0
                                 &y_skip_txfm, bsize, best_rd, ctx);
3304
3305
  // Initialize default mode evaluation params
3306
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3307
3308
0
  if (intra_yrd < best_rd) {
3309
    // Search intra modes for uv planes if needed
3310
0
    if (num_planes > 1) {
3311
      // Set up the tx variables for reproducing the y predictions in case we
3312
      // need it for chroma-from-luma.
3313
0
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3314
0
        memcpy(txfm_info->blk_skip, ctx->blk_skip,
3315
0
               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3316
0
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3317
0
      }
3318
0
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3319
0
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3320
0
                                  &dist_uv, &uv_skip_txfm, bsize,
3321
0
                                  max_uv_tx_size);
3322
0
    }
3323
3324
    // Intra block is always coded as non-skip
3325
0
    rd_cost->rate =
3326
0
        rate_y + rate_uv +
3327
0
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3328
0
    rd_cost->dist = dist_y + dist_uv;
3329
0
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3330
0
    rd_cost->skip_txfm = 0;
3331
0
  } else {
3332
0
    rd_cost->rate = INT_MAX;
3333
0
  }
3334
3335
0
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3336
0
    best_rd = rd_cost->rdcost;
3337
0
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3338
0
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3339
0
    memcpy(ctx->blk_skip, txfm_info->blk_skip,
3340
0
           sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3341
0
    assert(rd_cost->rate != INT_MAX);
3342
0
  }
3343
0
  if (rd_cost->rate == INT_MAX) return;
3344
3345
0
  ctx->mic = *xd->mi[0];
3346
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3347
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3348
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3349
0
}
3350
3351
static AOM_INLINE void calc_target_weighted_pred(
3352
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3353
    const uint8_t *above, int above_stride, const uint8_t *left,
3354
    int left_stride);
3355
3356
static AOM_INLINE void rd_pick_skip_mode(
3357
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3358
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3359
0
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3360
0
  const AV1_COMMON *const cm = &cpi->common;
3361
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3362
0
  const int num_planes = av1_num_planes(cm);
3363
0
  MACROBLOCKD *const xd = &x->e_mbd;
3364
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3365
3366
0
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3367
0
  RD_STATS skip_mode_rd_stats;
3368
0
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3369
3370
0
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3371
0
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3372
0
    return;
3373
0
  }
3374
3375
0
  const MV_REFERENCE_FRAME ref_frame =
3376
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3377
0
  const MV_REFERENCE_FRAME second_ref_frame =
3378
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3379
0
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3380
0
  const THR_MODES mode_index =
3381
0
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3382
3383
0
  if (mode_index == THR_INVALID) {
3384
0
    return;
3385
0
  }
3386
3387
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3388
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
3389
0
      cpi->all_one_sided_refs) {
3390
0
    return;
3391
0
  }
3392
3393
0
  mbmi->mode = this_mode;
3394
0
  mbmi->uv_mode = UV_DC_PRED;
3395
0
  mbmi->ref_frame[0] = ref_frame;
3396
0
  mbmi->ref_frame[1] = second_ref_frame;
3397
0
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3398
0
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3399
0
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3400
0
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3401
0
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3402
0
      return;
3403
0
    }
3404
0
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3405
0
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3406
0
                     mbmi_ext->mode_context);
3407
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3408
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3409
0
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3410
0
  }
3411
3412
0
  assert(this_mode == NEAREST_NEARESTMV);
3413
0
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3414
0
    return;
3415
0
  }
3416
3417
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3418
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3419
0
  mbmi->comp_group_idx = 0;
3420
0
  mbmi->compound_idx = x->compound_idx;
3421
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3422
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3423
0
  mbmi->ref_mv_idx = 0;
3424
0
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3425
0
  mbmi->palette_mode_info.palette_size[0] = 0;
3426
0
  mbmi->palette_mode_info.palette_size[1] = 0;
3427
3428
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3429
3430
0
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3431
0
  for (int i = 0; i < num_planes; i++) {
3432
0
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3433
0
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3434
0
  }
3435
3436
0
  BUFFER_SET orig_dst;
3437
0
  for (int i = 0; i < num_planes; i++) {
3438
0
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3439
0
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3440
0
  }
3441
3442
  // Obtain the rdcost for skip_mode.
3443
0
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3444
3445
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3446
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3447
0
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3448
0
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3449
0
    const ModeCosts *mode_costs = &x->mode_costs;
3450
0
    best_intra_inter_mode_cost = RDCOST(
3451
0
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3452
0
        rd_cost->dist);
3453
    // Account for non-skip mode rate in total rd stats
3454
0
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3455
0
    av1_rd_cost_update(x->rdmult, rd_cost);
3456
0
  }
3457
3458
0
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3459
0
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3460
0
    assert(mode_index != THR_INVALID);
3461
0
    search_state->best_mbmode.skip_mode = 1;
3462
0
    search_state->best_mbmode = *mbmi;
3463
0
    memset(search_state->best_mbmode.inter_tx_size,
3464
0
           search_state->best_mbmode.tx_size,
3465
0
           sizeof(search_state->best_mbmode.inter_tx_size));
3466
0
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3467
0
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3468
0
                  xd);
3469
0
    search_state->best_mode_index = mode_index;
3470
3471
    // Update rd_cost
3472
0
    rd_cost->rate = skip_mode_rd_stats.rate;
3473
0
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3474
0
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3475
3476
0
    search_state->best_rd = rd_cost->rdcost;
3477
0
    search_state->best_skip2 = 1;
3478
0
    search_state->best_mode_skippable = 1;
3479
3480
0
    x->txfm_search_info.skip_txfm = 1;
3481
0
  }
3482
0
}
3483
3484
// Get winner mode stats of given mode index
3485
static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3486
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3487
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3488
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3489
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3490
0
    int mode_idx) {
3491
0
  MB_MODE_INFO *winner_mbmi;
3492
0
  if (multi_winner_mode_type) {
3493
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3494
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3495
0
    winner_mbmi = &winner_mode_stat->mbmi;
3496
3497
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3498
0
    *winner_rate_y = winner_mode_stat->rate_y;
3499
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3500
0
    *winner_mode_index = winner_mode_stat->mode_index;
3501
0
  } else {
3502
0
    winner_mbmi = best_mbmode;
3503
0
    *winner_rd_cost = best_rd_cost;
3504
0
    *winner_rate_y = best_rate_y;
3505
0
    *winner_rate_uv = best_rate_uv;
3506
0
    *winner_mode_index = *best_mode_index;
3507
0
  }
3508
0
  return winner_mbmi;
3509
0
}
3510
3511
// speed feature: fast intra/inter transform type search
3512
// Used for speed >= 2
3513
// When this speed feature is on, in rd mode search, only DCT is used.
3514
// After the mode is determined, this function is called, to select
3515
// transform types and get accurate rdcost.
3516
static AOM_INLINE void refine_winner_mode_tx(
3517
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3518
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3519
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3520
0
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3521
0
  const AV1_COMMON *const cm = &cpi->common;
3522
0
  MACROBLOCKD *const xd = &x->e_mbd;
3523
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3524
0
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3525
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3526
0
  int64_t best_rd;
3527
0
  const int num_planes = av1_num_planes(cm);
3528
3529
0
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3530
0
                                         best_mbmode->mode))
3531
0
    return;
3532
3533
  // Set params for winner mode evaluation
3534
0
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3535
3536
  // No best mode identified so far
3537
0
  if (*best_mode_index == THR_INVALID) return;
3538
3539
0
  int skip_winner_mode_eval =
3540
0
      cpi->sf.winner_mode_sf.disable_winner_mode_eval_for_txskip;
3541
  // Do not skip winner mode evaluation at low quantizers if normal mode's
3542
  // transform search was too aggressive.
3543
0
  if (cpi->sf.rd_sf.perform_coeff_opt >= 5 && x->qindex <= 70)
3544
0
    skip_winner_mode_eval = 0;
3545
3546
0
  if (skip_winner_mode_eval && (best_mbmode->skip_txfm || rd_cost->skip_txfm))
3547
0
    return;
3548
3549
0
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3550
0
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3551
0
    RD_STATS *winner_rd_stats = NULL;
3552
0
    int winner_rate_y = 0, winner_rate_uv = 0;
3553
0
    THR_MODES winner_mode_index = 0;
3554
3555
    // TODO(any): Combine best mode and multi-winner mode processing paths
3556
    // Get winner mode stats for current mode index
3557
0
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3558
0
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3559
0
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3560
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3561
3562
0
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3563
0
        winner_mode_index != THR_INVALID &&
3564
0
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3565
0
                                          winner_mbmi->mode)) {
3566
0
      RD_STATS rd_stats = *winner_rd_stats;
3567
0
      int skip_blk = 0;
3568
0
      RD_STATS rd_stats_y, rd_stats_uv;
3569
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3570
3571
0
      *mbmi = *winner_mbmi;
3572
3573
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3574
3575
      // Select prediction reference frames.
3576
0
      for (int i = 0; i < num_planes; i++) {
3577
0
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3578
0
        if (has_second_ref(mbmi))
3579
0
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3580
0
      }
3581
3582
0
      if (is_inter_mode(mbmi->mode)) {
3583
0
        const int mi_row = xd->mi_row;
3584
0
        const int mi_col = xd->mi_col;
3585
0
        bool is_predictor_built = false;
3586
0
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3587
        // Do interpolation filter search for realtime mode if applicable.
3588
0
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3589
0
            cpi->oxcf.mode == REALTIME &&
3590
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3591
0
            is_inter_mode(prediction_mode) &&
3592
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3593
0
            !is_inter_compound_mode(prediction_mode)) {
3594
0
          is_predictor_built =
3595
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3596
0
        }
3597
0
        if (!is_predictor_built) {
3598
0
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3599
0
                                        av1_num_planes(cm) - 1);
3600
0
        }
3601
0
        if (mbmi->motion_mode == OBMC_CAUSAL)
3602
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3603
3604
0
        av1_subtract_plane(x, bsize, 0);
3605
0
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3606
0
            !xd->lossless[mbmi->segment_id]) {
3607
0
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3608
0
                                              INT64_MAX);
3609
0
          assert(rd_stats_y.rate != INT_MAX);
3610
0
        } else {
3611
0
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3612
0
                                            INT64_MAX);
3613
0
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3614
0
                 sizeof(mbmi->inter_tx_size));
3615
0
          for (int i = 0; i < xd->height * xd->width; ++i)
3616
0
            set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3617
0
        }
3618
0
      } else {
3619
0
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3620
0
                                          INT64_MAX);
3621
0
      }
3622
3623
0
      if (num_planes > 1) {
3624
0
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3625
0
      } else {
3626
0
        av1_init_rd_stats(&rd_stats_uv);
3627
0
      }
3628
3629
0
      const ModeCosts *mode_costs = &x->mode_costs;
3630
0
      if (is_inter_mode(mbmi->mode) &&
3631
0
          RDCOST(x->rdmult,
3632
0
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3633
0
                     rd_stats_uv.rate,
3634
0
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3635
0
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3636
0
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3637
0
        skip_blk = 1;
3638
0
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3639
0
        rd_stats_uv.rate = 0;
3640
0
        rd_stats_y.dist = rd_stats_y.sse;
3641
0
        rd_stats_uv.dist = rd_stats_uv.sse;
3642
0
      } else {
3643
0
        skip_blk = 0;
3644
0
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3645
0
      }
3646
0
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3647
0
                      winner_rate_y - winner_rate_uv;
3648
0
      int64_t this_rd =
3649
0
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3650
0
      if (best_rd > this_rd) {
3651
0
        *best_mbmode = *mbmi;
3652
0
        *best_mode_index = winner_mode_index;
3653
0
        av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3654
0
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3655
0
        rd_cost->rate = this_rate;
3656
0
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3657
0
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3658
0
        rd_cost->rdcost = this_rd;
3659
0
        best_rd = this_rd;
3660
0
        *best_skip2 = skip_blk;
3661
0
      }
3662
0
    }
3663
0
  }
3664
0
}
3665
3666
/*!\cond */
3667
typedef struct {
3668
  // Mask for each reference frame, specifying which prediction modes to NOT try
3669
  // during search.
3670
  uint32_t pred_modes[REF_FRAMES];
3671
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3672
  // reference frames (i, j).
3673
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3674
  // (NONE_FRAME).
3675
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3676
} mode_skip_mask_t;
3677
/*!\endcond */
3678
3679
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
3680
static AOM_INLINE void disable_reference(
3681
0
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3682
0
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3683
0
    ref_combo[ref][ref2 + 1] = true;
3684
0
  }
3685
0
}
3686
3687
// Update 'ref_combo' mask to disable all inter references except ALTREF.
3688
static AOM_INLINE void disable_inter_references_except_altref(
3689
0
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3690
0
  disable_reference(LAST_FRAME, ref_combo);
3691
0
  disable_reference(LAST2_FRAME, ref_combo);
3692
0
  disable_reference(LAST3_FRAME, ref_combo);
3693
0
  disable_reference(GOLDEN_FRAME, ref_combo);
3694
0
  disable_reference(BWDREF_FRAME, ref_combo);
3695
0
  disable_reference(ALTREF2_FRAME, ref_combo);
3696
0
}
3697
3698
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3699
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3700
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3701
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3702
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3703
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3704
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3705
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3706
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3707
};
3708
3709
static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = {
3710
  { LAST_FRAME, NONE_FRAME },
3711
  { ALTREF_FRAME, NONE_FRAME },
3712
  { GOLDEN_FRAME, NONE_FRAME },
3713
  { INTRA_FRAME, NONE_FRAME }
3714
};
3715
3716
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3717
3718
static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3719
0
                                         REF_SET ref_set) {
3720
0
  if (ref_set == REF_SET_FULL) {
3721
    // Everything available by default.
3722
0
    memset(mask, 0, sizeof(*mask));
3723
0
  } else {
3724
    // All modes available by default.
3725
0
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3726
    // All references disabled first.
3727
0
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3728
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3729
0
        mask->ref_combo[ref1][ref2 + 1] = true;
3730
0
      }
3731
0
    }
3732
0
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3733
0
    int num_ref_combos;
3734
3735
    // Then enable reduced set of references explicitly.
3736
0
    switch (ref_set) {
3737
0
      case REF_SET_REDUCED:
3738
0
        ref_set_combos = reduced_ref_combos;
3739
0
        num_ref_combos =
3740
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3741
0
        break;
3742
0
      case REF_SET_REALTIME:
3743
0
        ref_set_combos = real_time_ref_combos;
3744
0
        num_ref_combos =
3745
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3746
0
        break;
3747
0
      default: assert(0); num_ref_combos = 0;
3748
0
    }
3749
3750
0
    for (int i = 0; i < num_ref_combos; ++i) {
3751
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3752
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3753
0
    }
3754
0
  }
3755
0
}
3756
3757
static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3758
                                           const AV1_COMP *cpi, MACROBLOCK *x,
3759
0
                                           BLOCK_SIZE bsize) {
3760
0
  const AV1_COMMON *const cm = &cpi->common;
3761
0
  const struct segmentation *const seg = &cm->seg;
3762
0
  MACROBLOCKD *const xd = &x->e_mbd;
3763
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3764
0
  unsigned char segment_id = mbmi->segment_id;
3765
0
  const SPEED_FEATURES *const sf = &cpi->sf;
3766
0
  REF_SET ref_set = REF_SET_FULL;
3767
3768
0
  if (sf->rt_sf.use_real_time_ref_set)
3769
0
    ref_set = REF_SET_REALTIME;
3770
0
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3771
0
    ref_set = REF_SET_REDUCED;
3772
3773
0
  default_skip_mask(mask, ref_set);
3774
3775
0
  int min_pred_mv_sad = INT_MAX;
3776
0
  MV_REFERENCE_FRAME ref_frame;
3777
0
  if (ref_set == REF_SET_REALTIME) {
3778
    // For real-time encoding, we only look at a subset of ref frames. So the
3779
    // threshold for pruning should be computed from this subset as well.
3780
0
    const int num_rt_refs =
3781
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3782
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3783
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3784
0
      if (ref != INTRA_FRAME) {
3785
0
        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3786
0
      }
3787
0
    }
3788
0
  } else {
3789
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3790
0
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3791
0
  }
3792
3793
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3794
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3795
      // Skip checking missing reference in both single and compound reference
3796
      // modes.
3797
0
      disable_reference(ref_frame, mask->ref_combo);
3798
0
    } else {
3799
      // Skip fixed mv modes for poor references
3800
0
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3801
0
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3802
0
      }
3803
0
    }
3804
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3805
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3806
      // Reference not used for the segment.
3807
0
      disable_reference(ref_frame, mask->ref_combo);
3808
0
    }
3809
0
  }
3810
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3811
  // is disabled for this segment. This is to prevent the possibility that we
3812
  // end up unable to pick any mode.
3813
0
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3814
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3815
    // unless ARNR filtering is enabled in which case we want
3816
    // an unfiltered alternative. We allow near/nearest as well
3817
    // because they may result in zero-zero MVs but be cheaper.
3818
0
    if (cpi->rc.is_src_frame_alt_ref &&
3819
0
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3820
0
      disable_inter_references_except_altref(mask->ref_combo);
3821
3822
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3823
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3824
0
      int_mv near_mv, nearest_mv, global_mv;
3825
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3826
0
                  &x->mbmi_ext);
3827
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3828
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3829
3830
0
      if (near_mv.as_int != global_mv.as_int)
3831
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3832
0
      if (nearest_mv.as_int != global_mv.as_int)
3833
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3834
0
    }
3835
0
  }
3836
3837
0
  if (cpi->rc.is_src_frame_alt_ref) {
3838
0
    if (sf->inter_sf.alt_ref_search_fp &&
3839
0
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3840
0
      mask->pred_modes[ALTREF_FRAME] = 0;
3841
0
      disable_inter_references_except_altref(mask->ref_combo);
3842
0
      disable_reference(INTRA_FRAME, mask->ref_combo);
3843
0
    }
3844
0
  }
3845
3846
0
  if (sf->inter_sf.alt_ref_search_fp) {
3847
0
    if (!cm->show_frame && x->best_pred_mv_sad < INT_MAX) {
3848
0
      int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 3);
3849
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3850
      // those are past frames
3851
0
      MV_REFERENCE_FRAME start_frame =
3852
0
          sf->inter_sf.alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3853
0
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3854
0
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3855
0
            0) {
3856
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3857
          // to the relative dist of LAST_FRAME.
3858
0
          if (sf->inter_sf.alt_ref_search_fp == 1 &&
3859
0
              (abs(cpi->ref_frame_dist_info
3860
0
                       .ref_relative_dist[ref_frame - LAST_FRAME]) >
3861
0
               1.5 * abs(cpi->ref_frame_dist_info
3862
0
                             .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3863
0
            continue;
3864
0
          }
3865
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
3866
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
3867
0
        }
3868
0
      }
3869
0
    }
3870
0
  }
3871
3872
0
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3873
0
    if (x->best_pred_mv_sad < INT_MAX) {
3874
0
      int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 1);
3875
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3876
3877
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3878
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3879
0
        ref_frame = prune_ref_list[ref_idx];
3880
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
3881
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3882
0
      }
3883
0
    }
3884
0
  }
3885
3886
0
  if (bsize > sf->part_sf.max_intra_bsize) {
3887
0
    disable_reference(INTRA_FRAME, mask->ref_combo);
3888
0
  }
3889
3890
0
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3891
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3892
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3893
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3894
0
    }
3895
0
  }
3896
3897
0
  mask->pred_modes[INTRA_FRAME] |=
3898
0
      ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
3899
0
}
3900
3901
static AOM_INLINE void init_neighbor_pred_buf(
3902
    const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3903
0
    int is_hbd) {
3904
0
  if (is_hbd) {
3905
0
    const int len = sizeof(uint16_t);
3906
0
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3907
0
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3908
0
                                                 (MAX_SB_SQUARE >> 1) * len);
3909
0
    args->above_pred_buf[2] =
3910
0
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3911
0
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3912
0
    args->left_pred_buf[1] =
3913
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3914
0
    args->left_pred_buf[2] =
3915
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3916
0
  } else {
3917
0
    args->above_pred_buf[0] = obmc_buffer->above_pred;
3918
0
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3919
0
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3920
0
    args->left_pred_buf[0] = obmc_buffer->left_pred;
3921
0
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3922
0
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3923
0
  }
3924
0
}
3925
3926
static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3927
0
                                      MV_REFERENCE_FRAME ref_frame) {
3928
0
  const AV1_COMMON *const cm = &cpi->common;
3929
0
  MV_REFERENCE_FRAME rf[2];
3930
0
  av1_set_ref_frame(rf, ref_frame);
3931
3932
0
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3933
3934
0
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3935
0
                                       cm->cur_frame->ref_display_order_hint)) {
3936
0
    return 1;
3937
0
  }
3938
3939
0
  return 0;
3940
0
}
3941
3942
static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3943
0
    int ref_frame, int skip_ref_frame_mask) {
3944
0
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3945
0
    if (!(skip_ref_frame_mask & (1 << r))) {
3946
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3947
0
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
3948
0
        return 1;
3949
0
      }
3950
0
    }
3951
0
  }
3952
0
  return 0;
3953
0
}
3954
3955
static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3956
0
                                                 const MB_MODE_INFO *mi_cache) {
3957
0
  if (!mi_cache) {
3958
0
    return 0;
3959
0
  }
3960
3961
0
  if (ref_frame < REF_FRAMES) {
3962
0
    return (ref_frame == mi_cache->ref_frame[0] ||
3963
0
            ref_frame == mi_cache->ref_frame[1]);
3964
0
  }
3965
3966
  // if we are here, then the current mode is compound.
3967
0
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3968
0
  return ref_frame == cached_ref_type;
3969
0
}
3970
3971
// Please add/modify parameter setting in this function, making it consistent
3972
// and easy to read and maintain.
3973
static AOM_INLINE void set_params_rd_pick_inter_mode(
3974
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3975
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3976
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3977
0
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3978
0
  const AV1_COMMON *const cm = &cpi->common;
3979
0
  MACROBLOCKD *const xd = &x->e_mbd;
3980
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3981
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3982
0
  unsigned char segment_id = mbmi->segment_id;
3983
3984
0
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3985
0
  av1_collect_neighbors_ref_counts(xd);
3986
0
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3987
0
                           ref_costs_comp);
3988
3989
0
  const int mi_row = xd->mi_row;
3990
0
  const int mi_col = xd->mi_col;
3991
0
  x->best_pred_mv_sad = INT_MAX;
3992
3993
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3994
0
       ++ref_frame) {
3995
0
    x->pred_mv_sad[ref_frame] = INT_MAX;
3996
0
    mbmi_ext->mode_context[ref_frame] = 0;
3997
0
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3998
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3999
      // Skip the ref frame if the mask says skip and the ref is not used by
4000
      // compound ref.
4001
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4002
0
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4003
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4004
0
        continue;
4005
0
      }
4006
0
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4007
0
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4008
0
    }
4009
    // Store the best pred_mv_sad across all past frames
4010
0
    if ((cpi->sf.inter_sf.alt_ref_search_fp ||
4011
0
         cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) &&
4012
0
        cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 0)
4013
0
      x->best_pred_mv_sad =
4014
0
          AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
4015
0
  }
4016
4017
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4018
    // No second reference on RT ref set, so no need to initialize
4019
0
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4020
0
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4021
0
      mbmi_ext->mode_context[ref_frame] = 0;
4022
0
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4023
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4024
0
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4025
0
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4026
0
        continue;
4027
0
      }
4028
4029
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4030
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4031
0
        continue;
4032
0
      }
4033
      // Ref mv list population is not required, when compound references are
4034
      // pruned.
4035
0
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4036
4037
0
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4038
0
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4039
0
                       mbmi_ext->mode_context);
4040
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4041
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4042
0
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4043
0
    }
4044
0
  }
4045
4046
0
  av1_count_overlappable_neighbors(cm, xd);
4047
0
  const FRAME_UPDATE_TYPE update_type =
4048
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4049
0
  int use_actual_frame_probs = 1;
4050
0
  int prune_obmc;
4051
#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
4052
  use_actual_frame_probs =
4053
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4054
  if (!use_actual_frame_probs) {
4055
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4056
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4057
  }
4058
#endif
4059
0
  if (use_actual_frame_probs) {
4060
0
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4061
0
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4062
0
  }
4063
0
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4064
0
    if (check_num_overlappable_neighbors(mbmi) &&
4065
0
        is_motion_variation_allowed_bsize(bsize)) {
4066
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4067
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4068
0
                                       MAX_SB_SIZE >> 1 };
4069
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4070
0
                                        MAX_SB_SIZE >> 1 };
4071
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4072
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4073
0
                                          dst_width1, dst_height1,
4074
0
                                          args->above_pred_stride);
4075
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4076
0
                                         dst_width2, dst_height2,
4077
0
                                         args->left_pred_stride);
4078
0
      const int num_planes = av1_num_planes(cm);
4079
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4080
0
                           mi_col, 0, num_planes);
4081
0
      calc_target_weighted_pred(
4082
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4083
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4084
0
    }
4085
0
  }
4086
4087
0
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4088
4089
  // Set params for mode evaluation
4090
0
  set_mode_eval_params(cpi, x, MODE_EVAL);
4091
4092
0
  x->comp_rd_stats_idx = 0;
4093
4094
0
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4095
0
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4096
0
  }
4097
0
}
4098
4099
static AOM_INLINE void init_inter_mode_search_state(
4100
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4101
0
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4102
0
  init_intra_mode_search_state(&search_state->intra_search_state);
4103
0
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4104
4105
0
  search_state->best_rd = best_rd_so_far;
4106
0
  search_state->best_skip_rd[0] = INT64_MAX;
4107
0
  search_state->best_skip_rd[1] = INT64_MAX;
4108
4109
0
  av1_zero(search_state->best_mbmode);
4110
4111
0
  search_state->best_rate_y = INT_MAX;
4112
4113
0
  search_state->best_rate_uv = INT_MAX;
4114
4115
0
  search_state->best_mode_skippable = 0;
4116
4117
0
  search_state->best_skip2 = 0;
4118
4119
0
  search_state->best_mode_index = THR_INVALID;
4120
4121
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4122
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4123
0
  const unsigned char segment_id = mbmi->segment_id;
4124
4125
0
  search_state->num_available_refs = 0;
4126
0
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4127
0
  memset(search_state->dist_order_refs, -1,
4128
0
         sizeof(search_state->dist_order_refs));
4129
4130
0
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4131
0
    search_state->mode_threshold[i] = 0;
4132
0
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4133
0
  for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
4134
0
    search_state->mode_threshold[i] =
4135
0
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4136
0
        RD_THRESH_FAC_FRAC_BITS;
4137
4138
0
  search_state->best_intra_rd = INT64_MAX;
4139
4140
0
  search_state->best_pred_sse = UINT_MAX;
4141
4142
0
  av1_zero(search_state->single_newmv);
4143
0
  av1_zero(search_state->single_newmv_rate);
4144
0
  av1_zero(search_state->single_newmv_valid);
4145
0
  for (int i = 0; i < MB_MODE_COUNT; ++i) {
4146
0
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4147
0
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4148
0
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4149
0
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4150
0
      }
4151
0
    }
4152
0
  }
4153
4154
0
  for (int dir = 0; dir < 2; ++dir) {
4155
0
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4156
0
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4157
0
        SingleInterModeState *state;
4158
4159
0
        state = &search_state->single_state[dir][mode][ref_frame];
4160
0
        state->ref_frame = NONE_FRAME;
4161
0
        state->rd = INT64_MAX;
4162
4163
0
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4164
0
        state->ref_frame = NONE_FRAME;
4165
0
        state->rd = INT64_MAX;
4166
0
      }
4167
0
    }
4168
0
  }
4169
0
  for (int dir = 0; dir < 2; ++dir) {
4170
0
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4171
0
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4172
0
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4173
0
      }
4174
0
    }
4175
0
  }
4176
0
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4177
0
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4178
0
    search_state->best_single_mode[ref_frame] = MB_MODE_COUNT;
4179
0
  }
4180
0
  av1_zero(search_state->single_state_cnt);
4181
0
  av1_zero(search_state->single_state_modelled_cnt);
4182
4183
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4184
0
    search_state->best_pred_rd[i] = INT64_MAX;
4185
0
  }
4186
0
}
4187
4188
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4189
                           const MV_REFERENCE_FRAME *ref_frame,
4190
0
                           const PREDICTION_MODE this_mode) {
4191
0
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4192
0
    return true;
4193
0
  }
4194
4195
0
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4196
0
}
4197
4198
static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4199
                                      BLOCK_SIZE bsize,
4200
                                      PREDICTION_MODE curr_mode,
4201
0
                                      const MV_REFERENCE_FRAME *ref_frames) {
4202
0
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4203
0
  if (comp_pred) {
4204
0
    if (!is_comp_ref_allowed(bsize)) return 1;
4205
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4206
0
      return 1;
4207
0
    }
4208
4209
0
    const AV1_COMMON *const cm = &cpi->common;
4210
0
    if (frame_is_intra_only(cm)) return 1;
4211
4212
0
    const CurrentFrame *const current_frame = &cm->current_frame;
4213
0
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4214
4215
0
    const struct segmentation *const seg = &cm->seg;
4216
0
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4217
    // Do not allow compound prediction if the segment level reference frame
4218
    // feature is in use as in this case there can only be one reference.
4219
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4220
0
  }
4221
4222
0
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4223
    // Mode must be compatible
4224
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4225
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4226
0
  }
4227
4228
0
  return 0;
4229
0
}
4230
4231
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4232
0
                                        BLOCK_SIZE bsize, int mib_size) {
4233
0
  const int sb_size_mask = mib_size - 1;
4234
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4235
0
  const int mi_row = xd->mi_row;
4236
0
  const int mi_col = xd->mi_col;
4237
0
  const int mi_row_in_sb = mi_row & sb_size_mask;
4238
0
  const int mi_col_in_sb = mi_col & sb_size_mask;
4239
0
  const int mi_w = mi_size_wide[bsize];
4240
0
  const int mi_h = mi_size_high[bsize];
4241
0
  int picked_ref_frames_mask = 0;
4242
0
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4243
0
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4244
0
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4245
0
    }
4246
0
  }
4247
0
  return picked_ref_frames_mask;
4248
0
}
4249
4250
// Check if reference frame pair of the current block matches with the given
4251
// block.
4252
static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4253
0
                                       const MV_REFERENCE_FRAME *ref_frames) {
4254
0
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4255
0
          (ref_frames[1] == mbmi->ref_frame[1]));
4256
0
}
4257
4258
// Case 1: return 0, means don't skip this mode
4259
// Case 2: return 1, means skip this mode completely
4260
// Case 3: return 2, means skip compound only, but still try single motion modes
4261
static int inter_mode_search_order_independent_skip(
4262
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4263
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4264
0
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4265
0
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4266
0
    return 1;
4267
0
  }
4268
4269
0
  const int ref_type = av1_ref_frame_type(ref_frame);
4270
0
  if (prune_ref_frame(cpi, x, ref_type)) return 1;
4271
4272
  // This is only used in motion vector unit test.
4273
0
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4274
0
      ref_frame[0] == INTRA_FRAME)
4275
0
    return 1;
4276
4277
0
  const AV1_COMMON *const cm = &cpi->common;
4278
0
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4279
0
    return 1;
4280
0
  }
4281
4282
  // Reuse the prediction mode in cache
4283
0
  if (x->use_mb_mode_cache) {
4284
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4285
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4286
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4287
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4288
4289
    // If the cached mode is intra, then we just need to match the mode.
4290
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4291
0
      return 1;
4292
0
    }
4293
4294
    // If the cached mode is single inter mode, then we match the mode and
4295
    // reference frame.
4296
0
    if (cached_mode_is_single) {
4297
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4298
0
        return 1;
4299
0
      }
4300
0
    } else {
4301
      // If the cached mode is compound, then we need to consider several cases.
4302
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4303
0
      if (mode_is_single) {
4304
        // If the mode is single, we know the modes can't match. But we might
4305
        // still want to search it if compound mode depends on the current mode.
4306
0
        int skip_motion_mode_only = 0;
4307
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4308
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4309
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4310
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4311
0
        } else if (cached_mode == NEW_NEWMV) {
4312
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4313
0
                                   ref_frame[0] == cached_frame[1]);
4314
0
        }
4315
4316
0
        return 1 + skip_motion_mode_only;
4317
0
      } else {
4318
        // If both modes are compound, then everything must match.
4319
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4320
0
            ref_frame[1] != cached_frame[1]) {
4321
0
          return 1;
4322
0
        }
4323
0
      }
4324
0
    }
4325
0
  }
4326
4327
0
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4328
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4329
  // valid partition is required, do not skip mode.
4330
0
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4331
0
      x->must_find_valid_partition)
4332
0
    return 0;
4333
4334
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4335
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4336
  // frames
4337
0
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4338
0
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4339
0
    const MACROBLOCKD *const xd = &x->e_mbd;
4340
0
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4341
0
        xd->up_available) {
4342
0
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4343
0
                                                    { 1, 1, 0 },
4344
0
                                                    { 2, 1, 0 } };
4345
0
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4346
4347
0
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4348
0
             qindex_sub_range < 3);
4349
0
      const int num_ref_frame_pair_match_thresh =
4350
0
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4351
0
                    [qindex_sub_range];
4352
4353
0
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4354
0
             num_ref_frame_pair_match_thresh >= 0);
4355
0
      int num_ref_frame_pair_match = 0;
4356
4357
0
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4358
0
      num_ref_frame_pair_match +=
4359
0
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4360
4361
      // Pruning based on ref frame pair match with neighbors.
4362
0
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4363
0
    }
4364
0
  }
4365
4366
0
  int skip_motion_mode = 0;
4367
0
  if (mbmi->partition != PARTITION_NONE) {
4368
0
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4369
0
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4370
      // Since the compound ref modes depends on the motion estimation result of
4371
      // two single ref modes (best mv of single ref modes as the start point),
4372
      // if current single ref mode is marked skip, we need to check if it will
4373
      // be used in compound ref modes.
4374
0
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4375
        // Found a not skipped compound ref mode which contains current
4376
        // single ref. So this single ref can't be skipped completely
4377
        // Just skip its motion mode search, still try its simple
4378
        // transition mode.
4379
0
        skip_motion_mode = 1;
4380
0
        skip_ref = 0;
4381
0
      }
4382
0
    }
4383
    // If we are reusing the prediction from cache, and the current frame is
4384
    // required by the cache, then we cannot prune it.
4385
0
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4386
0
      skip_ref = 0;
4387
      // If the cache only needs the current reference type for compound
4388
      // prediction, then we can skip motion mode search.
4389
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4390
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4391
0
    }
4392
0
    if (skip_ref) return 1;
4393
0
  }
4394
4395
0
  if (ref_frame[0] == INTRA_FRAME) {
4396
0
    if (mode != DC_PRED) {
4397
      // Disable intra modes other than DC_PRED for blocks with low variance
4398
      // Threshold for intra skipping based on source variance
4399
      // TODO(debargha): Specialize the threshold for super block sizes
4400
0
      const unsigned int skip_intra_var_thresh = 64;
4401
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4402
0
          x->source_variance < skip_intra_var_thresh)
4403
0
        return 1;
4404
0
    }
4405
0
  }
4406
4407
0
  if (skip_motion_mode) return 2;
4408
4409
0
  return 0;
4410
0
}
4411
4412
static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4413
                             const MV_REFERENCE_FRAME *ref_frames,
4414
0
                             const AV1_COMMON *cm) {
4415
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4416
0
  mbmi->ref_mv_idx = 0;
4417
0
  mbmi->mode = curr_mode;
4418
0
  mbmi->uv_mode = UV_DC_PRED;
4419
0
  mbmi->ref_frame[0] = ref_frames[0];
4420
0
  mbmi->ref_frame[1] = ref_frames[1];
4421
0
  pmi->palette_size[0] = 0;
4422
0
  pmi->palette_size[1] = 0;
4423
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4424
0
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4425
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4426
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4427
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4428
0
}
4429
4430
static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4431
                                             InterModeSearchState *search_state,
4432
0
                                             const MB_MODE_INFO *const mbmi) {
4433
0
  int i, j;
4434
0
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4435
0
  const PREDICTION_MODE this_mode = mbmi->mode;
4436
0
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4437
0
  const int mode_offset = INTER_OFFSET(this_mode);
4438
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4439
4440
  // Simple rd
4441
0
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4442
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4443
0
    const int64_t rd =
4444
0
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4445
0
    if (rd < simple_rd) simple_rd = rd;
4446
0
  }
4447
4448
  // Insertion sort of single_state
4449
0
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4450
0
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4451
0
  i = search_state->single_state_cnt[dir][mode_offset];
4452
0
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4453
0
    state_s[j] = state_s[j - 1];
4454
0
  state_s[j] = this_state_s;
4455
0
  search_state->single_state_cnt[dir][mode_offset]++;
4456
4457
  // Modelled rd
4458
0
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4459
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4460
0
    const int64_t rd =
4461
0
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4462
0
    if (rd < modelled_rd) modelled_rd = rd;
4463
0
  }
4464
4465
  // Insertion sort of single_state_modelled
4466
0
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4467
0
  SingleInterModeState *state_m =
4468
0
      search_state->single_state_modelled[dir][mode_offset];
4469
0
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4470
0
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4471
0
    state_m[j] = state_m[j - 1];
4472
0
  state_m[j] = this_state_m;
4473
0
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4474
0
}
4475
4476
static AOM_INLINE void analyze_single_states(
4477
0
    const AV1_COMP *cpi, InterModeSearchState *search_state) {
4478
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4479
0
  assert(prune_level >= 1);
4480
0
  int i, j, dir, mode;
4481
4482
0
  for (dir = 0; dir < 2; ++dir) {
4483
0
    int64_t best_rd;
4484
0
    SingleInterModeState(*state)[FWD_REFS];
4485
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4486
4487
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4488
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4489
    // have same motion vectors). Always keep the best of each mode
4490
    // because it might form the best possible combination with other mode.
4491
0
    state = search_state->single_state[dir];
4492
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4493
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4494
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4495
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4496
0
        if (state[mode][i].rd != INT64_MAX &&
4497
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4498
0
          state[mode][i].valid = 0;
4499
0
        }
4500
0
      }
4501
0
    }
4502
4503
0
    state = search_state->single_state_modelled[dir];
4504
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4505
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4506
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4507
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4508
0
        if (state[mode][i].rd != INT64_MAX &&
4509
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4510
0
          state[mode][i].valid = 0;
4511
0
        }
4512
0
      }
4513
0
    }
4514
0
  }
4515
4516
  // Ordering by simple rd first, then by modelled rd
4517
0
  for (dir = 0; dir < 2; ++dir) {
4518
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4519
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4520
0
      const int state_cnt_m =
4521
0
          search_state->single_state_modelled_cnt[dir][mode];
4522
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4523
0
      SingleInterModeState *state_m =
4524
0
          search_state->single_state_modelled[dir][mode];
4525
0
      int count = 0;
4526
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4527
0
      for (i = 0; i < state_cnt_s; ++i) {
4528
0
        if (state_s[i].rd == INT64_MAX) break;
4529
0
        if (state_s[i].valid) {
4530
0
          search_state->single_rd_order[dir][mode][count++] =
4531
0
              state_s[i].ref_frame;
4532
0
        }
4533
0
      }
4534
0
      if (count >= max_candidates) continue;
4535
4536
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4537
0
        if (state_m[i].rd == INT64_MAX) break;
4538
0
        if (!state_m[i].valid) continue;
4539
0
        const int ref_frame = state_m[i].ref_frame;
4540
0
        int match = 0;
4541
        // Check if existing already
4542
0
        for (j = 0; j < count; ++j) {
4543
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4544
0
            match = 1;
4545
0
            break;
4546
0
          }
4547
0
        }
4548
0
        if (match) continue;
4549
        // Check if this ref_frame is removed in simple rd
4550
0
        int valid = 1;
4551
0
        for (j = 0; j < state_cnt_s; ++j) {
4552
0
          if (ref_frame == state_s[j].ref_frame) {
4553
0
            valid = state_s[j].valid;
4554
0
            break;
4555
0
          }
4556
0
        }
4557
0
        if (valid) {
4558
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4559
0
        }
4560
0
      }
4561
0
    }
4562
0
  }
4563
0
}
4564
4565
static int compound_skip_get_candidates(
4566
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4567
0
    const int dir, const PREDICTION_MODE mode) {
4568
0
  const int mode_offset = INTER_OFFSET(mode);
4569
0
  const SingleInterModeState *state =
4570
0
      search_state->single_state[dir][mode_offset];
4571
0
  const SingleInterModeState *state_modelled =
4572
0
      search_state->single_state_modelled[dir][mode_offset];
4573
4574
0
  int max_candidates = 0;
4575
0
  for (int i = 0; i < FWD_REFS; ++i) {
4576
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4577
0
    max_candidates++;
4578
0
  }
4579
4580
0
  int candidates = max_candidates;
4581
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4582
0
    candidates = AOMMIN(2, max_candidates);
4583
0
  }
4584
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4585
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4586
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4587
0
      candidates = 1;
4588
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4589
0
  }
4590
4591
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4592
    // Limit the number of candidates to 1 in each direction for compound
4593
    // prediction
4594
0
    candidates = AOMMIN(1, candidates);
4595
0
  }
4596
0
  return candidates;
4597
0
}
4598
4599
static int compound_skip_by_single_states(
4600
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4601
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4602
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4603
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4604
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4605
0
                        compound_ref1_mode(this_mode) };
4606
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4607
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4608
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4609
0
  int ref_searched[2] = { 0, 0 };
4610
0
  int ref_mv_match[2] = { 1, 1 };
4611
0
  int i, j;
4612
4613
0
  for (i = 0; i < 2; ++i) {
4614
0
    const SingleInterModeState *state =
4615
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
4616
0
    const int state_cnt =
4617
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4618
0
    for (j = 0; j < state_cnt; ++j) {
4619
0
      if (state[j].ref_frame == refs[i]) {
4620
0
        ref_searched[i] = 1;
4621
0
        break;
4622
0
      }
4623
0
    }
4624
0
  }
4625
4626
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4627
0
  for (i = 0; i < 2; ++i) {
4628
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4629
0
      continue;
4630
0
    }
4631
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4632
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4633
0
      int_mv single_mv;
4634
0
      int_mv comp_mv;
4635
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4636
0
                  &x->mbmi_ext);
4637
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4638
0
      if (single_mv.as_int != comp_mv.as_int) {
4639
0
        ref_mv_match[i] = 0;
4640
0
        break;
4641
0
      }
4642
0
    }
4643
0
  }
4644
4645
0
  for (i = 0; i < 2; ++i) {
4646
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
4647
0
    const int candidates =
4648
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4649
0
    const MV_REFERENCE_FRAME *ref_order =
4650
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4651
0
    int match = 0;
4652
0
    for (j = 0; j < candidates; ++j) {
4653
0
      if (refs[i] == ref_order[j]) {
4654
0
        match = 1;
4655
0
        break;
4656
0
      }
4657
0
    }
4658
0
    if (!match) return 1;
4659
0
  }
4660
4661
0
  return 0;
4662
0
}
4663
4664
// Check if ref frames of current block matches with given block.
4665
static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4666
                                   const MV_REFERENCE_FRAME *ref_frames,
4667
0
                                   int *const is_ref_match) {
4668
0
  if (is_inter_block(mbmi)) {
4669
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4670
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4671
0
    if (has_second_ref(mbmi)) {
4672
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4673
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4674
0
    }
4675
0
  }
4676
0
}
4677
4678
// Prune compound mode using ref frames of neighbor blocks.
4679
static INLINE int compound_skip_using_neighbor_refs(
4680
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4681
0
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4682
  // Exclude non-extended compound modes from pruning
4683
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4684
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4685
0
    return 0;
4686
4687
0
  if (prune_ext_comp_using_neighbors >= 3) return 1;
4688
4689
0
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4690
                                // 1 - match for backward refs
4691
  // Check if ref frames of this block matches with left neighbor.
4692
0
  if (xd->left_available)
4693
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4694
4695
  // Check if ref frames of this block matches with above neighbor.
4696
0
  if (xd->up_available)
4697
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4698
4699
  // Combine ref frame match with neighbors in forward and backward refs.
4700
0
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4701
4702
  // Pruning based on ref frame match with neighbors.
4703
0
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4704
0
  return 1;
4705
0
}
4706
4707
// Update best single mode for the given reference frame based on simple rd.
4708
static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4709
                                           const PREDICTION_MODE this_mode,
4710
                                           const MV_REFERENCE_FRAME ref_frame,
4711
0
                                           int64_t this_rd) {
4712
0
  if (this_rd < search_state->best_single_rd[ref_frame]) {
4713
0
    search_state->best_single_rd[ref_frame] = this_rd;
4714
0
    search_state->best_single_mode[ref_frame] = this_mode;
4715
0
  }
4716
0
}
4717
4718
// Prune compound mode using best single mode for the same reference.
4719
static INLINE int skip_compound_using_best_single_mode_ref(
4720
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4721
    const PREDICTION_MODE *best_single_mode,
4722
0
    int prune_comp_using_best_single_mode_ref) {
4723
  // Exclude non-extended compound modes from pruning
4724
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4725
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4726
0
    return 0;
4727
4728
0
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4729
0
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4730
  // Get ref frame direction corresponding to NEWMV
4731
  // 0 - NEWMV corresponding to forward direction
4732
  // 1 - NEWMV corresponding to backward direction
4733
0
  const int newmv_dir = comp_mode_ref0 != NEWMV;
4734
4735
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4736
  // have NEWMV as single mode winner.
4737
  // Example: For an extended-compound mode,
4738
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4739
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4740
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
4741
  //   ALTREF_FRAME is NEWMV
4742
0
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4743
0
  if (single_mode == NEWMV) return 0;
4744
4745
  // Avoid pruning the compound mode when best single mode is not available
4746
0
  if (prune_comp_using_best_single_mode_ref == 1)
4747
0
    if (single_mode == MB_MODE_COUNT) return 0;
4748
0
  return 1;
4749
0
}
4750
4751
0
static int compare_int64(const void *a, const void *b) {
4752
0
  int64_t a64 = *((int64_t *)a);
4753
0
  int64_t b64 = *((int64_t *)b);
4754
0
  if (a64 < b64) {
4755
0
    return -1;
4756
0
  } else if (a64 == b64) {
4757
0
    return 0;
4758
0
  } else {
4759
0
    return 1;
4760
0
  }
4761
0
}
4762
4763
static INLINE void update_search_state(
4764
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4765
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4766
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4767
0
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4768
0
  const MACROBLOCKD *xd = &x->e_mbd;
4769
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
4770
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
4771
0
  const int skip_txfm =
4772
0
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4773
0
  const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4774
4775
0
  search_state->best_rd = new_best_rd_stats->rdcost;
4776
0
  search_state->best_mode_index = new_best_mode;
4777
0
  *best_rd_stats_dst = *new_best_rd_stats;
4778
0
  search_state->best_mbmode = *mbmi;
4779
0
  search_state->best_skip2 = skip_txfm;
4780
0
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4781
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4782
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
4783
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4784
  // These two values will be updated when av1_txfm_search is called.
4785
0
  if (txfm_search_done) {
4786
0
    search_state->best_rate_y =
4787
0
        new_best_rd_stats_y->rate +
4788
0
        x->mode_costs.skip_txfm_cost[skip_ctx]
4789
0
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
4790
0
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4791
0
  }
4792
0
  search_state->best_y_rdcost = *new_best_rd_stats_y;
4793
0
  memcpy(ctx->blk_skip, txfm_info->blk_skip,
4794
0
         sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4795
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4796
0
}
4797
4798
// Find the best RD for a reference frame (among single reference modes)
4799
// and store +10% of it in the 0-th element in ref_frame_rd.
4800
0
static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4801
0
  assert(ref_frame_rd[0] == INT64_MAX);
4802
0
  int64_t ref_copy[REF_FRAMES - 1];
4803
0
  memcpy(ref_copy, ref_frame_rd + 1,
4804
0
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4805
0
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4806
4807
0
  int64_t cutoff = ref_copy[0];
4808
  // The cut-off is within 10% of the best.
4809
0
  if (cutoff != INT64_MAX) {
4810
0
    assert(cutoff < INT64_MAX / 200);
4811
0
    cutoff = (110 * cutoff) / 100;
4812
0
  }
4813
0
  ref_frame_rd[0] = cutoff;
4814
0
}
4815
4816
// Check if either frame is within the cutoff.
4817
static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4818
                                        MV_REFERENCE_FRAME frame1,
4819
0
                                        MV_REFERENCE_FRAME frame2) {
4820
0
  assert(frame2 > 0);
4821
0
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4822
0
         ref_frame_rd[frame2] <= ref_frame_rd[0];
4823
0
}
4824
4825
static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4826
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4827
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4828
    PICK_MODE_CONTEXT *const ctx,
4829
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4830
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
4831
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4832
0
    InterModeSearchState *const search_state, int64_t *yrd) {
4833
0
  const AV1_COMMON *const cm = &cpi->common;
4834
0
  const int num_planes = av1_num_planes(cm);
4835
0
  MACROBLOCKD *const xd = &x->e_mbd;
4836
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4837
0
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
4838
0
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4839
4840
0
  for (int cand = 0; cand < num_best_cand; cand++) {
4841
0
    RD_STATS rd_stats;
4842
0
    RD_STATS rd_stats_y;
4843
0
    RD_STATS rd_stats_uv;
4844
0
    av1_init_rd_stats(&rd_stats);
4845
0
    av1_init_rd_stats(&rd_stats_y);
4846
0
    av1_init_rd_stats(&rd_stats_uv);
4847
0
    int rate_mv;
4848
4849
0
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4850
0
    args->skip_motion_mode =
4851
0
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4852
0
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4853
0
    rd_stats.rate =
4854
0
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4855
4856
    // Continue if the best candidate is compound.
4857
0
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
4858
4859
0
    x->txfm_search_info.skip_txfm = 0;
4860
0
    struct macroblockd_plane *pd = xd->plane;
4861
0
    const BUFFER_SET orig_dst = {
4862
0
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4863
0
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4864
0
    };
4865
4866
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4867
    // Initialize motion mode to simple translation
4868
    // Calculation of switchable rate depends on it.
4869
0
    mbmi->motion_mode = 0;
4870
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4871
0
    for (int i = 0; i < num_planes; i++) {
4872
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4873
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4874
0
    }
4875
4876
0
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4877
0
                           search_state->best_skip_rd[1] };
4878
0
    int64_t this_yrd = INT64_MAX;
4879
0
    int64_t ret_value = motion_mode_rd(
4880
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4881
0
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4882
0
        do_tx_search, inter_modes_info, 1, &this_yrd);
4883
4884
0
    if (ret_value != INT64_MAX) {
4885
0
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4886
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
4887
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4888
      // Collect mode stats for multiwinner mode processing
4889
0
      store_winner_mode_stats(
4890
0
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4891
0
          mode_enum, NULL, bsize, rd_stats.rdcost,
4892
0
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4893
0
      if (rd_stats.rdcost < search_state->best_rd) {
4894
0
        *yrd = this_yrd;
4895
0
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4896
0
                            &rd_stats_uv, mode_enum, x, do_tx_search);
4897
0
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4898
0
      }
4899
0
    }
4900
0
  }
4901
0
}
4902
4903
/*!\cond */
4904
// Arguments for speed feature pruning of inter mode search
4905
typedef struct {
4906
  int *skip_motion_mode;
4907
  mode_skip_mask_t *mode_skip_mask;
4908
  InterModeSearchState *search_state;
4909
  int skip_ref_frame_mask;
4910
  int reach_first_comp_mode;
4911
  int mode_thresh_mul_fact;
4912
  int num_single_modes_processed;
4913
  int prune_cpd_using_sr_stats_ready;
4914
} InterModeSFArgs;
4915
/*!\endcond */
4916
4917
static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4918
                           int64_t *ref_frame_rd, int midx,
4919
0
                           InterModeSFArgs *args, int is_low_temp_var) {
4920
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4921
0
  MACROBLOCKD *const xd = &x->e_mbd;
4922
  // Get the actual prediction mode we are trying in this iteration
4923
0
  const THR_MODES mode_enum = av1_default_mode_order[midx];
4924
0
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4925
0
  const PREDICTION_MODE this_mode = mode_def->mode;
4926
0
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4927
0
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4928
0
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4929
0
  const int comp_pred = second_ref_frame > INTRA_FRAME;
4930
4931
0
  if (ref_frame == INTRA_FRAME) return 1;
4932
4933
0
  const FRAME_UPDATE_TYPE update_type =
4934
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4935
0
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
4936
0
      comp_pred) {
4937
0
    return 1;
4938
0
  }
4939
4940
  // This is for real time encoding.
4941
0
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
4942
0
      this_mode != NEARESTMV)
4943
0
    return 1;
4944
4945
  // Check if this mode should be skipped because it is incompatible with the
4946
  // current frame
4947
0
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4948
0
    return 1;
4949
0
  const int ret = inter_mode_search_order_independent_skip(
4950
0
      cpi, x, args->mode_skip_mask, args->search_state,
4951
0
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4952
0
  if (ret == 1) return 1;
4953
0
  *(args->skip_motion_mode) = (ret == 2);
4954
4955
  // We've reached the first compound prediction mode, get stats from the
4956
  // single reference predictors to help with pruning
4957
0
  if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4958
0
      args->reach_first_comp_mode == 0) {
4959
0
    analyze_single_states(cpi, args->search_state);
4960
0
    args->reach_first_comp_mode = 1;
4961
0
  }
4962
4963
  // Prune aggressively when best mode is skippable.
4964
0
  int mul_fact = args->search_state->best_mode_skippable
4965
0
                     ? args->mode_thresh_mul_fact
4966
0
                     : (1 << MODE_THRESH_QBITS);
4967
0
  int64_t mode_threshold =
4968
0
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4969
0
      MODE_THRESH_QBITS;
4970
4971
0
  if (args->search_state->best_rd < mode_threshold) return 1;
4972
4973
  // Skip this compound mode based on the RD results from the single prediction
4974
  // modes
4975
0
  if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4976
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4977
0
                                       ref_frame, second_ref_frame, x))
4978
0
      return 1;
4979
0
  }
4980
4981
0
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4982
    // After we done with single reference modes, find the 2nd best RD
4983
    // for a reference frame. Only search compound modes that have a reference
4984
    // frame at least as good as the 2nd best.
4985
0
    if (!args->prune_cpd_using_sr_stats_ready &&
4986
0
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
4987
0
      find_top_ref(ref_frame_rd);
4988
0
      args->prune_cpd_using_sr_stats_ready = 1;
4989
0
    }
4990
0
    if (args->prune_cpd_using_sr_stats_ready &&
4991
0
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
4992
0
      return 1;
4993
0
  }
4994
4995
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
4996
0
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
4997
0
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
4998
0
    return 1;
4999
0
  }
5000
5001
0
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5002
0
    if (compound_skip_using_neighbor_refs(
5003
0
            xd, this_mode, ref_frames,
5004
0
            sf->inter_sf.prune_ext_comp_using_neighbors))
5005
0
      return 1;
5006
0
  }
5007
5008
0
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5009
0
    if (skip_compound_using_best_single_mode_ref(
5010
0
            this_mode, ref_frames, args->search_state->best_single_mode,
5011
0
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5012
0
      return 1;
5013
0
  }
5014
5015
0
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5016
0
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5017
0
    if (skip_nearest_near_mv_using_refmv_weight(x, this_mode, ref_frame_type))
5018
0
      return 1;
5019
0
  }
5020
5021
0
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5022
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5023
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5024
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5025
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5026
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5027
0
        return 1;
5028
0
    }
5029
0
  }
5030
5031
0
  return 0;
5032
0
}
5033
5034
static void record_best_compound(REFERENCE_MODE reference_mode,
5035
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5036
                                 InterModeSearchState *search_state,
5037
0
                                 int compmode_cost) {
5038
0
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5039
5040
0
  if (reference_mode == REFERENCE_MODE_SELECT) {
5041
0
    single_rate = rd_stats->rate - compmode_cost;
5042
0
    hybrid_rate = rd_stats->rate;
5043
0
  } else {
5044
0
    single_rate = rd_stats->rate;
5045
0
    hybrid_rate = rd_stats->rate + compmode_cost;
5046
0
  }
5047
5048
0
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5049
0
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5050
5051
0
  if (!comp_pred) {
5052
0
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5053
0
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5054
0
  } else {
5055
0
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5056
0
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5057
0
  }
5058
0
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5059
0
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5060
0
}
5061
5062
// Does a transform search over a list of the best inter mode candidates.
5063
// This is called if the original mode search computed an RD estimate
5064
// for the transform search rather than doing a full search.
5065
static void tx_search_best_inter_candidates(
5066
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5067
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5068
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5069
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5070
0
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5071
0
  AV1_COMMON *const cm = &cpi->common;
5072
0
  MACROBLOCKD *const xd = &x->e_mbd;
5073
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5074
0
  const ModeCosts *mode_costs = &x->mode_costs;
5075
0
  const int num_planes = av1_num_planes(cm);
5076
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5077
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5078
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5079
0
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5080
0
  search_state->best_rd = best_rd_so_far;
5081
0
  search_state->best_mode_index = THR_INVALID;
5082
  // Initialize best mode stats for winner mode processing
5083
0
  x->winner_mode_count = 0;
5084
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5085
0
                          NULL, bsize, best_rd_so_far,
5086
0
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5087
0
  inter_modes_info->num =
5088
0
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5089
0
          ? inter_modes_info->num
5090
0
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5091
0
  const int64_t top_est_rd =
5092
0
      inter_modes_info->num > 0
5093
0
          ? inter_modes_info
5094
0
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5095
0
          : INT64_MAX;
5096
0
  *yrd = INT64_MAX;
5097
0
  int64_t best_rd_in_this_partition = INT64_MAX;
5098
0
  int num_inter_mode_cands = inter_modes_info->num;
5099
0
  int newmv_mode_evaled = 0;
5100
0
  int max_allowed_cands = INT_MAX;
5101
0
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5102
    // The bound on the no. of inter mode candidates, beyond which the
5103
    // candidates are limited if a newmv mode got evaluated, is set as
5104
    // max_allowed_cands + 1.
5105
0
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5106
0
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5107
0
    max_allowed_cands =
5108
0
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5109
0
  }
5110
5111
0
  int num_mode_thresh = INT_MAX;
5112
0
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5113
    // Bound the no. of transform searches per prediction mode beyond a
5114
    // threshold.
5115
0
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5116
0
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5117
0
    num_mode_thresh =
5118
0
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5119
0
  }
5120
5121
0
  int num_tx_cands = 0;
5122
0
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5123
  // Iterate over best inter mode candidates and perform tx search
5124
0
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5125
0
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5126
0
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5127
0
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5128
0
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5129
0
    if (curr_est_rd * 0.80 > top_est_rd) break;
5130
5131
0
    if (num_tx_cands > num_mode_thresh) {
5132
0
      if ((prediction_mode != NEARESTMV &&
5133
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5134
0
          (prediction_mode == NEARESTMV &&
5135
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5136
0
        continue;
5137
0
    }
5138
5139
0
    txfm_info->skip_txfm = 0;
5140
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5141
5142
    // Select prediction reference frames.
5143
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5144
0
    for (int i = 0; i < num_planes; i++) {
5145
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5146
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5147
0
    }
5148
5149
0
    bool is_predictor_built = false;
5150
5151
    // Initialize RD stats
5152
0
    RD_STATS rd_stats;
5153
0
    RD_STATS rd_stats_y;
5154
0
    RD_STATS rd_stats_uv;
5155
0
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5156
0
    int64_t skip_rd = INT64_MAX;
5157
0
    if (cpi->sf.inter_sf.txfm_rd_gate_level) {
5158
      // Check if the mode is good enough based on skip RD
5159
0
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5160
0
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5161
0
      int eval_txfm =
5162
0
          check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
5163
0
                          cpi->sf.inter_sf.txfm_rd_gate_level, 0);
5164
0
      if (!eval_txfm) continue;
5165
0
    }
5166
5167
    // Build the prediction for this mode
5168
0
    if (!is_predictor_built) {
5169
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5170
0
                                    av1_num_planes(cm) - 1);
5171
0
    }
5172
0
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5173
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5174
0
    }
5175
5176
0
    num_tx_cands++;
5177
0
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5178
0
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5179
0
    int64_t this_yrd = INT64_MAX;
5180
    // Do the transform search
5181
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5182
0
                         mode_rate, search_state->best_rd)) {
5183
0
      continue;
5184
0
    } else {
5185
0
      const int y_rate =
5186
0
          rd_stats.skip_txfm
5187
0
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5188
0
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5189
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5190
5191
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5192
0
        inter_mode_data_push(
5193
0
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5194
0
            rd_stats_y.rate + rd_stats_uv.rate +
5195
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5196
0
      }
5197
0
    }
5198
0
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5199
0
    if (rd_stats.rdcost < best_rd_in_this_partition) {
5200
0
      best_rd_in_this_partition = rd_stats.rdcost;
5201
0
      *yrd = this_yrd;
5202
0
    }
5203
5204
0
    const THR_MODES mode_enum = get_prediction_mode_idx(
5205
0
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5206
5207
    // Collect mode stats for multiwinner mode processing
5208
0
    const int txfm_search_done = 1;
5209
0
    store_winner_mode_stats(
5210
0
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5211
0
        NULL, bsize, rd_stats.rdcost,
5212
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5213
5214
0
    if (rd_stats.rdcost < search_state->best_rd) {
5215
0
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5216
0
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5217
0
      search_state->best_skip_rd[0] = skip_rd;
5218
      // Limit the total number of modes to be evaluated if the first is valid
5219
      // and transform skip or compound
5220
0
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5221
0
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5222
          // Evaluate more candidates at high quantizers where occurrence of
5223
          // transform skip is high.
5224
0
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5225
0
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5226
0
          num_inter_mode_cands =
5227
0
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5228
0
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5229
0
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5230
          // Evaluate more candidates at low quantizers where occurrence of
5231
          // single reference mode is high.
5232
0
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5233
0
                                                { 10, 7, 5, 3 } };
5234
0
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5235
0
          num_inter_mode_cands = AOMMIN(
5236
0
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5237
0
        }
5238
0
      }
5239
0
    }
5240
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5241
    // a newmv mode was evaluated already.
5242
0
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5243
0
  }
5244
0
}
5245
5246
// Indicates number of winner simple translation modes to be used
5247
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5248
5249
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5250
// speed feature. This list consists of modes that have only searched
5251
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5252
// modes after the initial RD search.
5253
static void handle_winner_cand(
5254
    MB_MODE_INFO *const mbmi,
5255
    motion_mode_best_st_candidate *best_motion_mode_cands,
5256
    int max_winner_motion_mode_cand, int64_t this_rd,
5257
0
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5258
  // Number of current motion mode candidates in list
5259
0
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5260
0
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5261
5262
  // find the best location to insert new motion mode candidate
5263
0
  for (int j = 0; j < num_motion_mode_cand; j++) {
5264
0
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5265
0
      valid_motion_mode_cand_loc = j;
5266
0
      break;
5267
0
    }
5268
0
  }
5269
5270
  // Insert motion mode if location is found
5271
0
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5272
0
    if (num_motion_mode_cand > 0 &&
5273
0
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5274
0
      memmove(
5275
0
          &best_motion_mode_cands
5276
0
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5277
0
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5278
0
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5279
0
           valid_motion_mode_cand_loc) *
5280
0
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5281
0
    motion_mode_cand->mbmi = *mbmi;
5282
0
    motion_mode_cand->rd_cost = this_rd;
5283
0
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5284
0
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5285
0
        *motion_mode_cand;
5286
0
    best_motion_mode_cands->num_motion_mode_cand =
5287
0
        AOMMIN(max_winner_motion_mode_cand,
5288
0
               best_motion_mode_cands->num_motion_mode_cand + 1);
5289
0
  }
5290
0
}
5291
5292
/*!\brief Search intra modes in interframes
5293
 *
5294
 * \ingroup intra_mode_search
5295
 *
5296
 * This function searches for the best intra mode when the current frame is an
5297
 * interframe. This function however does *not* handle luma palette mode.
5298
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5299
 *
5300
 * This function will first iterate through the luma mode candidates to find the
5301
 * best luma intra mode. Once the best luma mode it's found, it will then search
5302
 * for the best chroma mode. Because palette mode is currently not handled by
5303
 * here, a cache of uv mode is stored in
5304
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5305
 * av1_search_palette_mode.
5306
 *
5307
 * \return Returns the rdcost of the current intra-mode if it's available,
5308
 * otherwise returns INT64_MAX. The corresponding values in x->e_mbd.mi[0],
5309
 * rd_stats, rd_stats_y/uv, and best_intra_rd are also updated. Moreover, in the
5310
 * first evocation of the function, the chroma intra mode result is cached in
5311
 * intra_search_state to be used in subsequent calls. In the first evaluation
5312
 * with directional mode, a prune_mask computed with histogram of gradient is
5313
 * also stored in intra_search_state.
5314
 *
5315
 * \param[in,out] search_state      Struct keep track of the prediction mode
5316
 *                                  search state in interframe.
5317
 *
5318
 * \param[in]     cpi               Top-level encoder structure.
5319
 * \param[in]     x                 Pointer to struct holding all the data for
5320
 *                                  the current prediction block.
5321
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5322
 *                                  prediction modes searched.
5323
 * \param[in]     bsize             Current block size.
5324
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5325
 *                                  copy the tx_type and txfm_skip arrays.
5326
 *                                  for only the Y plane.
5327
 * \param[in,out] sf_args           Stores the list of intra mode candidates
5328
 *                                  to be searched.
5329
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5330
 *                                      current ref frame is an intra frame.
5331
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5332
 *                                  terminate chroma intra mode search.
5333
 *
5334
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
5335
 * current motion mode being tested should be skipped. It returns 0 if the
5336
 * motion mode search is a success.
5337
 */
5338
static AOM_INLINE void search_intra_modes_in_interframe(
5339
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5340
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5341
    InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5342
0
    int64_t yrd_threshold) {
5343
0
  const AV1_COMMON *const cm = &cpi->common;
5344
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5345
0
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5346
0
  MACROBLOCKD *const xd = &x->e_mbd;
5347
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5348
0
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5349
5350
0
  int is_best_y_mode_intra = 0;
5351
0
  RD_STATS best_intra_rd_stats_y;
5352
0
  int64_t best_rd_y = INT64_MAX;
5353
0
  int best_mode_cost_y = -1;
5354
0
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5355
0
  THR_MODES best_mode_enum = THR_INVALID;
5356
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5357
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5358
0
  const int num_4x4 = bsize_to_num_blk(bsize);
5359
5360
  // Performs luma search
5361
0
  int64_t best_model_rd = INT64_MAX;
5362
0
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5363
0
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5364
0
    top_intra_model_rd[i] = INT64_MAX;
5365
0
  }
5366
0
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5367
0
    if (sf->intra_sf.skip_intra_in_interframe &&
5368
0
        search_state->intra_search_state.skip_intra_modes)
5369
0
      break;
5370
0
    set_y_mode_and_delta_angle(mode_idx, mbmi);
5371
0
    assert(mbmi->mode < INTRA_MODE_END);
5372
5373
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5374
0
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5375
0
      continue;
5376
5377
0
    const THR_MODES mode_enum =
5378
0
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5379
0
    if ((!intra_mode_cfg->enable_smooth_intra ||
5380
0
         cpi->sf.intra_sf.disable_smooth_intra) &&
5381
0
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5382
0
         mbmi->mode == SMOOTH_V_PRED))
5383
0
      continue;
5384
0
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5385
0
      continue;
5386
0
    if (av1_is_directional_mode(mbmi->mode) &&
5387
0
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5388
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5389
0
      continue;
5390
0
    const PREDICTION_MODE this_mode = mbmi->mode;
5391
5392
0
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5393
0
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5394
0
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5395
0
    x->txfm_search_info.skip_txfm = 0;
5396
5397
0
    if (this_mode != DC_PRED) {
5398
      // Only search the oblique modes if the best so far is
5399
      // one of the neighboring directional modes
5400
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5401
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5402
0
        if (search_state->best_mode_index != THR_INVALID &&
5403
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5404
0
          continue;
5405
0
      }
5406
0
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5407
0
        if (conditional_skipintra(
5408
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5409
0
          continue;
5410
0
      }
5411
0
    }
5412
5413
0
    RD_STATS intra_rd_stats_y;
5414
0
    int mode_cost_y;
5415
0
    int64_t intra_rd_y = INT64_MAX;
5416
0
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5417
0
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5418
0
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5419
0
        &best_model_rd, top_intra_model_rd);
5420
0
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5421
0
      is_best_y_mode_intra = 1;
5422
0
      if (intra_rd_y < best_rd_y) {
5423
0
        best_intra_rd_stats_y = intra_rd_stats_y;
5424
0
        best_mode_cost_y = mode_cost_y;
5425
0
        best_rd_y = intra_rd_y;
5426
0
        best_mbmi = *mbmi;
5427
0
        best_mode_enum = mode_enum;
5428
0
        memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5429
0
               sizeof(best_blk_skip[0]) * num_4x4);
5430
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5431
0
      }
5432
0
    }
5433
0
  }
5434
5435
0
  if (!is_best_y_mode_intra) {
5436
0
    return;
5437
0
  }
5438
5439
0
  assert(best_rd_y < INT64_MAX);
5440
5441
  // Restores the best luma mode
5442
0
  *mbmi = best_mbmi;
5443
0
  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5444
0
         sizeof(best_blk_skip[0]) * num_4x4);
5445
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5446
5447
  // Performs chroma search
5448
0
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5449
0
  av1_init_rd_stats(&intra_rd_stats);
5450
0
  av1_init_rd_stats(&intra_rd_stats_uv);
5451
0
  const int num_planes = av1_num_planes(cm);
5452
0
  if (num_planes > 1) {
5453
0
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5454
0
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5455
0
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5456
5457
0
    if (!intra_uv_mode_valid) {
5458
0
      return;
5459
0
    }
5460
0
  }
5461
5462
  // Merge the luma and chroma rd stats
5463
0
  assert(best_mode_cost_y >= 0);
5464
0
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5465
0
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5466
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5467
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5468
    // (prediction granularity), so we account for it in the full rate,
5469
    // not the tokenonly rate.
5470
0
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5471
0
  }
5472
5473
0
  const ModeCosts *mode_costs = &x->mode_costs;
5474
0
  const PREDICTION_MODE mode = mbmi->mode;
5475
0
  if (num_planes > 1 && xd->is_chroma_ref) {
5476
0
    const int uv_mode_cost =
5477
0
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5478
0
    intra_rd_stats.rate +=
5479
0
        intra_rd_stats_uv.rate +
5480
0
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5481
0
  }
5482
5483
  // Intra block is always coded as non-skip
5484
0
  intra_rd_stats.skip_txfm = 0;
5485
0
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5486
  // Add in the cost of the no skip flag.
5487
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5488
0
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5489
  // Calculate the final RD estimate for this mode.
5490
0
  const int64_t this_rd =
5491
0
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5492
  // Keep record of best intra rd
5493
0
  if (this_rd < search_state->best_intra_rd) {
5494
0
    search_state->best_intra_rd = this_rd;
5495
0
    intra_search_state->best_intra_mode = mode;
5496
0
  }
5497
5498
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5499
0
    search_state->best_pred_rd[i] =
5500
0
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5501
0
  }
5502
5503
0
  intra_rd_stats.rdcost = this_rd;
5504
5505
  // Collect mode stats for multiwinner mode processing
5506
0
  const int txfm_search_done = 1;
5507
0
  store_winner_mode_stats(
5508
0
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5509
0
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5510
0
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5511
0
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5512
0
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5513
0
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5514
0
                        best_mode_enum, x, txfm_search_done);
5515
0
  }
5516
0
}
5517
5518
#if !CONFIG_REALTIME_ONLY
5519
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5520
// features in intra mode pruning.
5521
static AOM_INLINE void calculate_cost_from_tpl_data(
5522
    const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5523
0
    int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5524
0
  const AV1_COMMON *const cm = &cpi->common;
5525
  // Only consider full SB.
5526
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5527
0
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5528
0
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5529
0
                  (block_size_high[sb_size] / tpl_bsize_1d);
5530
0
  SuperBlockEnc *sb_enc = &x->sb_enc;
5531
0
  if (sb_enc->tpl_data_count == len) {
5532
0
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5533
0
    const int tpl_stride = sb_enc->tpl_stride;
5534
0
    const int tplw = mi_size_wide[tpl_bsize];
5535
0
    const int tplh = mi_size_high[tpl_bsize];
5536
0
    const int nw = mi_size_wide[bsize] / tplw;
5537
0
    const int nh = mi_size_high[bsize] / tplh;
5538
0
    if (nw >= 1 && nh >= 1) {
5539
0
      const int of_h = mi_row % mi_size_high[sb_size];
5540
0
      const int of_w = mi_col % mi_size_wide[sb_size];
5541
0
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5542
5543
0
      for (int k = 0; k < nh; k++) {
5544
0
        for (int l = 0; l < nw; l++) {
5545
0
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5546
0
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5547
0
        }
5548
0
      }
5549
0
      *inter_cost /= nw * nh;
5550
0
      *intra_cost /= nw * nh;
5551
0
    }
5552
0
  }
5553
0
}
5554
#endif  // !CONFIG_REALTIME_ONLY
5555
5556
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5557
// intra mode search.
5558
static AOM_INLINE void skip_intra_modes_in_interframe(
5559
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5560
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5561
0
    int64_t inter_cost, int64_t intra_cost) {
5562
0
  MACROBLOCKD *const xd = &x->e_mbd;
5563
0
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5564
0
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5565
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5566
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5567
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5568
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5569
0
        x->source_variance > 128) {
5570
0
      search_state->intra_search_state.skip_intra_modes = 1;
5571
0
      return;
5572
0
    }
5573
0
  }
5574
5575
0
  const unsigned int src_var_thresh_intra_skip = 1;
5576
0
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5577
0
  if (!(skip_intra_in_interframe &&
5578
0
        (x->source_variance > src_var_thresh_intra_skip)))
5579
0
    return;
5580
5581
  // Prune intra search based on best inter mode being transfrom skip.
5582
0
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5583
0
    const int qindex_thresh[2] = { 200, MAXQ };
5584
0
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5585
0
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5586
0
        (x->qindex <= qindex_thresh[ind])) {
5587
0
      search_state->intra_search_state.skip_intra_modes = 1;
5588
0
      return;
5589
0
    } else if ((skip_intra_in_interframe >= 4) &&
5590
0
               (inter_cost < 0 || intra_cost < 0)) {
5591
0
      search_state->intra_search_state.skip_intra_modes = 1;
5592
0
      return;
5593
0
    }
5594
0
  }
5595
  // Use ML model to prune intra search.
5596
0
  if (inter_cost >= 0 && intra_cost >= 0) {
5597
0
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5598
0
                                     ? &av1_intrap_nn_config
5599
0
                                     : &av1_intrap_hd_nn_config;
5600
0
    float nn_features[6];
5601
0
    float scores[2] = { 0.0f };
5602
5603
0
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5604
0
    nn_features[1] = (float)mi_size_wide_log2[bsize];
5605
0
    nn_features[2] = (float)mi_size_high_log2[bsize];
5606
0
    nn_features[3] = (float)intra_cost;
5607
0
    nn_features[4] = (float)inter_cost;
5608
0
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5609
0
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5610
0
    nn_features[5] = (float)(ac_q_max / ac_q);
5611
5612
0
    av1_nn_predict(nn_features, nn_config, 1, scores);
5613
5614
    // For two parameters, the max prob returned from av1_nn_softmax equals
5615
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5616
    // calling of av1_nn_softmax.
5617
0
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5618
0
    assert(skip_intra_in_interframe <= 5);
5619
0
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5620
0
      search_state->intra_search_state.skip_intra_modes = 1;
5621
0
    }
5622
0
  }
5623
0
}
5624
5625
static AOM_INLINE int get_block_temp_var(const AV1_COMP *cpi,
5626
                                         const MACROBLOCK *x,
5627
0
                                         BLOCK_SIZE bsize) {
5628
0
  const AV1_COMMON *const cm = &cpi->common;
5629
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5630
5631
0
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5632
0
      !sf->rt_sf.short_circuit_low_temp_var ||
5633
0
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
5634
0
    return 0;
5635
0
  }
5636
5637
0
  const int mi_row = x->e_mbd.mi_row;
5638
0
  const int mi_col = x->e_mbd.mi_col;
5639
0
  int is_low_temp_var = 0;
5640
5641
0
  if (cm->seq_params->sb_size == BLOCK_64X64)
5642
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5643
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5644
0
  else
5645
0
    is_low_temp_var = av1_get_force_skip_low_temp_var(
5646
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5647
5648
0
  return is_low_temp_var;
5649
0
}
5650
5651
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
5652
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5653
                            struct macroblock *x, struct RD_STATS *rd_cost,
5654
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5655
0
                            int64_t best_rd_so_far) {
5656
0
  AV1_COMMON *const cm = &cpi->common;
5657
0
  const FeatureFlags *const features = &cm->features;
5658
0
  const int num_planes = av1_num_planes(cm);
5659
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5660
0
  MACROBLOCKD *const xd = &x->e_mbd;
5661
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5662
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5663
0
  int i;
5664
0
  const ModeCosts *mode_costs = &x->mode_costs;
5665
0
  const int *comp_inter_cost =
5666
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5667
5668
0
  InterModeSearchState search_state;
5669
0
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5670
0
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5671
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5672
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5673
0
  };
5674
0
  HandleInterModeArgs args = { { NULL },
5675
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5676
0
                               { NULL },
5677
0
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5678
0
                                 MAX_SB_SIZE >> 1 },
5679
0
                               NULL,
5680
0
                               NULL,
5681
0
                               NULL,
5682
0
                               search_state.modelled_rd,
5683
0
                               INT_MAX,
5684
0
                               INT_MAX,
5685
0
                               search_state.simple_rd,
5686
0
                               0,
5687
0
                               interintra_modes,
5688
0
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5689
0
                               0,
5690
0
                               -1,
5691
0
                               -1,
5692
0
                               -1,
5693
0
                               { 0 },
5694
0
                               { 0 },
5695
0
                               UINT_MAX };
5696
  // Currently, is_low_temp_var is used in real time encoding.
5697
0
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5698
5699
0
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5700
  // Indicates the appropriate number of simple translation winner modes for
5701
  // exhaustive motion mode evaluation
5702
0
  const int max_winner_motion_mode_cand =
5703
0
      num_winner_motion_modes[cpi->sf.winner_mode_sf
5704
0
                                  .motion_mode_for_winner_cand];
5705
0
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5706
0
  motion_mode_candidate motion_mode_cand;
5707
0
  motion_mode_best_st_candidate best_motion_mode_cands;
5708
  // Initializing the number of motion mode candidates to zero.
5709
0
  best_motion_mode_cands.num_motion_mode_cand = 0;
5710
0
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5711
0
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5712
5713
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5714
5715
0
  av1_invalid_rd_stats(rd_cost);
5716
5717
0
  for (i = 0; i < REF_FRAMES; ++i) {
5718
0
    x->warp_sample_info[i].num = -1;
5719
0
  }
5720
5721
  // Ref frames that are selected by square partition blocks.
5722
0
  int picked_ref_frames_mask = 0;
5723
0
  if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions &&
5724
0
      mbmi->partition != PARTITION_NONE) {
5725
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5726
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
5727
    // implies prune for vert, horiz and extended partition blocks.
5728
0
    if ((mbmi->partition != PARTITION_VERT &&
5729
0
         mbmi->partition != PARTITION_HORZ) ||
5730
0
        cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5731
0
      picked_ref_frames_mask =
5732
0
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5733
0
    }
5734
0
  }
5735
5736
#if CONFIG_COLLECT_COMPONENT_TIMING
5737
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
5738
#endif
5739
  // Skip ref frames that never selected by square blocks.
5740
0
  const int skip_ref_frame_mask =
5741
0
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5742
0
  mode_skip_mask_t mode_skip_mask;
5743
0
  unsigned int ref_costs_single[REF_FRAMES];
5744
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5745
0
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5746
  // init params, set frame modes, speed features
5747
0
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5748
0
                                skip_ref_frame_mask, ref_costs_single,
5749
0
                                ref_costs_comp, yv12_mb);
5750
#if CONFIG_COLLECT_COMPONENT_TIMING
5751
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
5752
#endif
5753
5754
0
  int64_t best_est_rd = INT64_MAX;
5755
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5756
  // If do_tx_search is 0, only estimated RD should be computed.
5757
  // If do_tx_search is 1, all modes have TX search performed.
5758
0
  const int do_tx_search =
5759
0
      !((cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5760
0
        (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 &&
5761
0
         num_pels_log2_lookup[bsize] > 8));
5762
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5763
0
  inter_modes_info->num = 0;
5764
5765
  // Temporary buffers used by handle_inter_mode().
5766
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5767
5768
  // The best RD found for the reference frame, among single reference modes.
5769
  // Note that the 0-th element will contain a cut-off that is later used
5770
  // to determine if we should skip a compound mode.
5771
0
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5772
0
                                       INT64_MAX, INT64_MAX, INT64_MAX,
5773
0
                                       INT64_MAX, INT64_MAX };
5774
5775
  // Prepared stats used later to check if we could skip intra mode eval.
5776
0
  int64_t inter_cost = -1;
5777
0
  int64_t intra_cost = -1;
5778
  // Need to tweak the threshold for hdres speed 0 & 1.
5779
0
  const int mi_row = xd->mi_row;
5780
0
  const int mi_col = xd->mi_col;
5781
5782
  // Obtain the relevant tpl stats for pruning inter modes
5783
0
  PruneInfoFromTpl inter_cost_info_from_tpl;
5784
0
#if !CONFIG_REALTIME_ONLY
5785
0
  if (cpi->sf.inter_sf.prune_inter_modes_based_on_tpl) {
5786
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5787
    // prune_ref_by_selective_ref_frame()
5788
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5789
    // prune_ref_by_selective_ref_frame()
5790
    // Populating valid_refs[idx] = 1 ensures that
5791
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5792
    // pruned ref frame.
5793
0
    int valid_refs[INTER_REFS_PER_FRAME];
5794
0
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5795
0
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5796
0
      valid_refs[frame - 1] =
5797
0
          x->tpl_keep_ref_frame[frame] ||
5798
0
          !prune_ref_by_selective_ref_frame(
5799
0
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5800
0
    }
5801
0
    av1_zero(inter_cost_info_from_tpl);
5802
0
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5803
0
                              &inter_cost_info_from_tpl);
5804
0
  }
5805
5806
0
  const int do_pruning =
5807
0
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5808
0
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5809
0
      cpi->oxcf.algo_cfg.enable_tpl_model)
5810
0
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5811
0
                                 &intra_cost);
5812
0
#endif  // !CONFIG_REALTIME_ONLY
5813
5814
  // Initialize best mode stats for winner mode processing
5815
0
  zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTER,
5816
0
                         x->winner_mode_stats);
5817
0
  x->winner_mode_count = 0;
5818
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5819
0
                          NULL, bsize, best_rd_so_far,
5820
0
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5821
5822
0
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5823
0
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
5824
    // Higher multiplication factor values for lower quantizers.
5825
0
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5826
0
  }
5827
5828
  // Initialize arguments for mode loop speed features
5829
0
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
5830
0
                              &mode_skip_mask,
5831
0
                              &search_state,
5832
0
                              skip_ref_frame_mask,
5833
0
                              0,
5834
0
                              mode_thresh_mul_fact,
5835
0
                              0,
5836
0
                              0 };
5837
0
  int64_t best_inter_yrd = INT64_MAX;
5838
5839
  // This is the main loop of this function. It loops over all possible inter
5840
  // modes and calls handle_inter_mode() to compute the RD for each.
5841
  // Here midx is just an iterator index that should not be used by itself
5842
  // except to keep track of the number of modes searched. It should be used
5843
  // with av1_default_mode_order to get the enum that defines the mode, which
5844
  // can be used with av1_mode_defs to get the prediction mode and the ref
5845
  // frames.
5846
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5847
  // good speedup for real time case. If we decide to use compound mode in real
5848
  // time, maybe we can modify av1_default_mode_order table.
5849
0
  THR_MODES mode_start = THR_INTER_MODE_START;
5850
0
  THR_MODES mode_end = THR_INTER_MODE_END;
5851
0
  const CurrentFrame *const current_frame = &cm->current_frame;
5852
0
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
5853
0
    mode_start = SINGLE_REF_MODE_START;
5854
0
    mode_end = SINGLE_REF_MODE_END;
5855
0
  }
5856
5857
0
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5858
    // Get the actual prediction mode we are trying in this iteration
5859
0
    const THR_MODES mode_enum = av1_default_mode_order[midx];
5860
0
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5861
0
    const PREDICTION_MODE this_mode = mode_def->mode;
5862
0
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5863
5864
0
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5865
0
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5866
0
    const int is_single_pred =
5867
0
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5868
0
    const int comp_pred = second_ref_frame > INTRA_FRAME;
5869
5870
0
    init_mbmi(mbmi, this_mode, ref_frames, cm);
5871
5872
0
    txfm_info->skip_txfm = 0;
5873
0
    sf_args.num_single_modes_processed += is_single_pred;
5874
0
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5875
#if CONFIG_COLLECT_COMPONENT_TIMING
5876
    start_timing(cpi, skip_inter_mode_time);
5877
#endif
5878
    // Apply speed features to decide if this inter mode can be skipped
5879
0
    const int is_skip_inter_mode = skip_inter_mode(
5880
0
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5881
#if CONFIG_COLLECT_COMPONENT_TIMING
5882
    end_timing(cpi, skip_inter_mode_time);
5883
#endif
5884
0
    if (is_skip_inter_mode) continue;
5885
5886
    // Select prediction reference frames.
5887
0
    for (i = 0; i < num_planes; i++) {
5888
0
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5889
0
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5890
0
    }
5891
5892
0
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5893
0
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5894
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
5895
0
    mbmi->ref_mv_idx = 0;
5896
5897
0
    const int64_t ref_best_rd = search_state.best_rd;
5898
0
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5899
0
    av1_init_rd_stats(&rd_stats);
5900
5901
0
    const int ref_frame_cost = comp_pred
5902
0
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
5903
0
                                   : ref_costs_single[ref_frame];
5904
0
    const int compmode_cost =
5905
0
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5906
0
    const int real_compmode_cost =
5907
0
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5908
0
            ? compmode_cost
5909
0
            : 0;
5910
    // Point to variables that are maintained between loop iterations
5911
0
    args.single_newmv = search_state.single_newmv;
5912
0
    args.single_newmv_rate = search_state.single_newmv_rate;
5913
0
    args.single_newmv_valid = search_state.single_newmv_valid;
5914
0
    args.single_comp_cost = real_compmode_cost;
5915
0
    args.ref_frame_cost = ref_frame_cost;
5916
0
    args.best_pred_sse = search_state.best_pred_sse;
5917
5918
0
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5919
0
                           search_state.best_skip_rd[1] };
5920
0
    int64_t this_yrd = INT64_MAX;
5921
#if CONFIG_COLLECT_COMPONENT_TIMING
5922
    start_timing(cpi, handle_inter_mode_time);
5923
#endif
5924
0
    int64_t this_rd = handle_inter_mode(
5925
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5926
0
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5927
0
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5928
0
        &this_yrd);
5929
#if CONFIG_COLLECT_COMPONENT_TIMING
5930
    end_timing(cpi, handle_inter_mode_time);
5931
#endif
5932
0
    if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5933
0
        is_inter_singleref_mode(this_mode)) {
5934
0
      collect_single_states(x, &search_state, mbmi);
5935
0
    }
5936
5937
0
    if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5938
0
        is_inter_singleref_mode(this_mode))
5939
0
      update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5940
5941
0
    if (this_rd == INT64_MAX) continue;
5942
5943
0
    if (mbmi->skip_txfm) {
5944
0
      rd_stats_y.rate = 0;
5945
0
      rd_stats_uv.rate = 0;
5946
0
    }
5947
5948
0
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5949
0
        this_rd < ref_frame_rd[ref_frame]) {
5950
0
      ref_frame_rd[ref_frame] = this_rd;
5951
0
    }
5952
5953
    // Did this mode help, i.e., is it the new best mode
5954
0
    if (this_rd < search_state.best_rd) {
5955
0
      assert(IMPLIES(comp_pred,
5956
0
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
5957
0
      search_state.best_pred_sse = x->pred_sse[ref_frame];
5958
0
      best_inter_yrd = this_yrd;
5959
0
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5960
0
                          &rd_stats_uv, mode_enum, x, do_tx_search);
5961
0
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5962
0
      search_state.best_skip_rd[1] = skip_rd[1];
5963
0
    }
5964
0
    if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5965
      // Add this mode to motion mode candidate list for motion mode search
5966
      // if using motion_mode_for_winner_cand speed feature
5967
0
      handle_winner_cand(mbmi, &best_motion_mode_cands,
5968
0
                         max_winner_motion_mode_cand, this_rd,
5969
0
                         &motion_mode_cand, args.skip_motion_mode);
5970
0
    }
5971
5972
    /* keep record of best compound/single-only prediction */
5973
0
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5974
0
                         x->rdmult, &search_state, compmode_cost);
5975
0
  }
5976
5977
#if CONFIG_COLLECT_COMPONENT_TIMING
5978
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5979
#endif
5980
0
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5981
    // For the single ref winner candidates, evaluate other motion modes (non
5982
    // simple translation).
5983
0
    evaluate_motion_mode_for_winner_candidates(
5984
0
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5985
0
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5986
0
        &search_state, &best_inter_yrd);
5987
0
  }
5988
#if CONFIG_COLLECT_COMPONENT_TIMING
5989
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5990
#endif
5991
5992
#if CONFIG_COLLECT_COMPONENT_TIMING
5993
  start_timing(cpi, do_tx_search_time);
5994
#endif
5995
0
  if (do_tx_search != 1) {
5996
    // A full tx search has not yet been done, do tx search for
5997
    // top mode candidates
5998
0
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
5999
0
                                    yv12_mb, mi_row, mi_col, &search_state,
6000
0
                                    rd_cost, ctx, &best_inter_yrd);
6001
0
  }
6002
#if CONFIG_COLLECT_COMPONENT_TIMING
6003
  end_timing(cpi, do_tx_search_time);
6004
#endif
6005
6006
#if CONFIG_COLLECT_COMPONENT_TIMING
6007
  start_timing(cpi, handle_intra_mode_time);
6008
#endif
6009
  // Gate intra mode evaluation if best of inter is skip except when source
6010
  // variance is extremely low and also based on max intra bsize.
6011
0
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6012
0
                                 intra_cost);
6013
6014
0
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6015
0
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6016
0
                                   &sf_args, intra_ref_frame_cost,
6017
0
                                   best_inter_yrd);
6018
#if CONFIG_COLLECT_COMPONENT_TIMING
6019
  end_timing(cpi, handle_intra_mode_time);
6020
#endif
6021
6022
#if CONFIG_COLLECT_COMPONENT_TIMING
6023
  start_timing(cpi, refine_winner_mode_tx_time);
6024
#endif
6025
0
  int winner_mode_count =
6026
0
      cpi->sf.winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6027
  // In effect only when fast tx search speed features are enabled.
6028
0
  refine_winner_mode_tx(
6029
0
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6030
0
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6031
0
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6032
#if CONFIG_COLLECT_COMPONENT_TIMING
6033
  end_timing(cpi, refine_winner_mode_tx_time);
6034
#endif
6035
6036
  // Initialize default mode evaluation params
6037
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6038
6039
  // Only try palette mode when the best mode so far is an intra mode.
6040
0
  const int try_palette =
6041
0
      cpi->oxcf.tool_cfg.enable_palette &&
6042
0
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6043
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6044
0
  RD_STATS this_rd_cost;
6045
0
  int this_skippable = 0;
6046
0
  if (try_palette) {
6047
#if CONFIG_COLLECT_COMPONENT_TIMING
6048
    start_timing(cpi, av1_search_palette_mode_time);
6049
#endif
6050
0
    this_skippable = av1_search_palette_mode(
6051
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6052
0
        ctx, &this_rd_cost, search_state.best_rd);
6053
#if CONFIG_COLLECT_COMPONENT_TIMING
6054
    end_timing(cpi, av1_search_palette_mode_time);
6055
#endif
6056
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6057
0
      search_state.best_mode_index = THR_DC;
6058
0
      mbmi->mv[0].as_int = 0;
6059
0
      rd_cost->rate = this_rd_cost.rate;
6060
0
      rd_cost->dist = this_rd_cost.dist;
6061
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6062
0
      search_state.best_rd = rd_cost->rdcost;
6063
0
      search_state.best_mbmode = *mbmi;
6064
0
      search_state.best_skip2 = 0;
6065
0
      search_state.best_mode_skippable = this_skippable;
6066
0
      memcpy(ctx->blk_skip, txfm_info->blk_skip,
6067
0
             sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6068
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6069
0
    }
6070
0
  }
6071
6072
0
  search_state.best_mbmode.skip_mode = 0;
6073
0
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6074
0
      is_comp_ref_allowed(bsize)) {
6075
0
    const struct segmentation *const seg = &cm->seg;
6076
0
    unsigned char segment_id = mbmi->segment_id;
6077
0
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6078
0
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6079
0
    }
6080
0
  }
6081
6082
  // Make sure that the ref_mv_idx is only nonzero when we're
6083
  // using a mode which can support ref_mv_idx
6084
0
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6085
0
      !(search_state.best_mbmode.mode == NEWMV ||
6086
0
        search_state.best_mbmode.mode == NEW_NEWMV ||
6087
0
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6088
0
    search_state.best_mbmode.ref_mv_idx = 0;
6089
0
  }
6090
6091
0
  if (search_state.best_mode_index == THR_INVALID ||
6092
0
      search_state.best_rd >= best_rd_so_far) {
6093
0
    rd_cost->rate = INT_MAX;
6094
0
    rd_cost->rdcost = INT64_MAX;
6095
0
    return;
6096
0
  }
6097
6098
0
  const InterpFilter interp_filter = features->interp_filter;
6099
0
  assert((interp_filter == SWITCHABLE) ||
6100
0
         (interp_filter ==
6101
0
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6102
0
         !is_inter_block(&search_state.best_mbmode));
6103
0
  assert((interp_filter == SWITCHABLE) ||
6104
0
         (interp_filter ==
6105
0
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6106
0
         !is_inter_block(&search_state.best_mbmode));
6107
6108
0
  if (!cpi->rc.is_src_frame_alt_ref && cpi->sf.inter_sf.adaptive_rd_thresh) {
6109
0
    av1_update_rd_thresh_fact(
6110
0
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6111
0
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6112
0
  }
6113
6114
  // macroblock modes
6115
0
  *mbmi = search_state.best_mbmode;
6116
0
  txfm_info->skip_txfm |= search_state.best_skip2;
6117
6118
  // Note: this section is needed since the mode may have been forced to
6119
  // GLOBALMV by the all-zero mode handling of ref-mv.
6120
0
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6121
    // Correct the interp filters for GLOBALMV
6122
0
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6123
0
      int_interpfilters filters =
6124
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6125
0
      assert(mbmi->interp_filters.as_int == filters.as_int);
6126
0
      (void)filters;
6127
0
    }
6128
0
  }
6129
6130
0
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6131
6132
0
  assert(search_state.best_mode_index != THR_INVALID);
6133
6134
#if CONFIG_INTERNAL_STATS
6135
  store_coding_context(x, ctx, search_state.best_mode_index,
6136
                       search_state.best_mode_skippable);
6137
#else
6138
0
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6139
0
#endif  // CONFIG_INTERNAL_STATS
6140
6141
0
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6142
0
    assert(try_palette);
6143
0
    av1_restore_uv_color_map(cpi, x);
6144
0
  }
6145
0
}
6146
6147
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6148
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6149
                                        int mi_row, int mi_col,
6150
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6151
                                        PICK_MODE_CONTEXT *ctx,
6152
0
                                        int64_t best_rd_so_far) {
6153
0
  const AV1_COMMON *const cm = &cpi->common;
6154
0
  const FeatureFlags *const features = &cm->features;
6155
0
  MACROBLOCKD *const xd = &x->e_mbd;
6156
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6157
0
  unsigned char segment_id = mbmi->segment_id;
6158
0
  const int comp_pred = 0;
6159
0
  int i;
6160
0
  unsigned int ref_costs_single[REF_FRAMES];
6161
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6162
0
  const ModeCosts *mode_costs = &x->mode_costs;
6163
0
  const int *comp_inter_cost =
6164
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6165
0
  InterpFilter best_filter = SWITCHABLE;
6166
0
  int64_t this_rd = INT64_MAX;
6167
0
  int rate2 = 0;
6168
0
  const int64_t distortion2 = 0;
6169
0
  (void)mi_row;
6170
0
  (void)mi_col;
6171
0
  (void)tile_data;
6172
6173
0
  av1_collect_neighbors_ref_counts(xd);
6174
6175
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6176
0
                           ref_costs_comp);
6177
6178
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6179
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6180
6181
0
  rd_cost->rate = INT_MAX;
6182
6183
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6184
6185
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6186
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6187
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6188
0
  mbmi->mode = GLOBALMV;
6189
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6190
0
  mbmi->uv_mode = UV_DC_PRED;
6191
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6192
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6193
0
  else
6194
0
    mbmi->ref_frame[0] = LAST_FRAME;
6195
0
  mbmi->ref_frame[1] = NONE_FRAME;
6196
0
  mbmi->mv[0].as_int =
6197
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6198
0
                           features->allow_high_precision_mv, bsize, mi_col,
6199
0
                           mi_row, features->cur_frame_force_integer_mv)
6200
0
          .as_int;
6201
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6202
0
  x->txfm_search_info.skip_txfm = 1;
6203
6204
0
  mbmi->ref_mv_idx = 0;
6205
6206
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6207
0
  av1_count_overlappable_neighbors(cm, xd);
6208
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6209
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6210
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6211
    // Select the samples according to motion vector difference
6212
0
    if (mbmi->num_proj_ref > 1) {
6213
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6214
0
                                             mbmi->num_proj_ref, bsize);
6215
0
    }
6216
0
  }
6217
6218
0
  const InterpFilter interp_filter = features->interp_filter;
6219
0
  set_default_interp_filters(mbmi, interp_filter);
6220
6221
0
  if (interp_filter != SWITCHABLE) {
6222
0
    best_filter = interp_filter;
6223
0
  } else {
6224
0
    best_filter = EIGHTTAP_REGULAR;
6225
0
    if (av1_is_interp_needed(xd)) {
6226
0
      int rs;
6227
0
      int best_rs = INT_MAX;
6228
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6229
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6230
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6231
0
                                     cm->seq_params->enable_dual_filter);
6232
0
        if (rs < best_rs) {
6233
0
          best_rs = rs;
6234
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6235
0
        }
6236
0
      }
6237
0
    }
6238
0
  }
6239
  // Set the appropriate filter
6240
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6241
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6242
0
                                   cm->seq_params->enable_dual_filter);
6243
6244
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6245
0
    rate2 += comp_inter_cost[comp_pred];
6246
6247
  // Estimate the reference frame signaling cost and add it
6248
  // to the rolling cost variable.
6249
0
  rate2 += ref_costs_single[LAST_FRAME];
6250
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6251
6252
0
  rd_cost->rate = rate2;
6253
0
  rd_cost->dist = distortion2;
6254
0
  rd_cost->rdcost = this_rd;
6255
6256
0
  if (this_rd >= best_rd_so_far) {
6257
0
    rd_cost->rate = INT_MAX;
6258
0
    rd_cost->rdcost = INT64_MAX;
6259
0
    return;
6260
0
  }
6261
6262
0
  assert((interp_filter == SWITCHABLE) ||
6263
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6264
6265
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6266
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6267
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6268
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6269
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6270
0
  }
6271
6272
#if CONFIG_INTERNAL_STATS
6273
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6274
#else
6275
0
  store_coding_context(x, ctx, 0);
6276
0
#endif  // CONFIG_INTERNAL_STATS
6277
0
}
6278
6279
/*!\cond */
6280
struct calc_target_weighted_pred_ctxt {
6281
  const OBMCBuffer *obmc_buffer;
6282
  const uint8_t *tmp;
6283
  int tmp_stride;
6284
  int overlap;
6285
};
6286
/*!\endcond */
6287
6288
static INLINE void calc_target_weighted_pred_above(
6289
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6290
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6291
0
  (void)nb_mi;
6292
0
  (void)num_planes;
6293
0
  (void)rel_mi_row;
6294
0
  (void)dir;
6295
6296
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6297
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6298
6299
0
  const int bw = xd->width << MI_SIZE_LOG2;
6300
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6301
6302
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6303
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6304
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6305
0
  const int is_hbd = is_cur_buf_hbd(xd);
6306
6307
0
  if (!is_hbd) {
6308
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6309
0
      const uint8_t m0 = mask1d[row];
6310
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6311
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6312
0
        wsrc[col] = m1 * tmp[col];
6313
0
        mask[col] = m0;
6314
0
      }
6315
0
      wsrc += bw;
6316
0
      mask += bw;
6317
0
      tmp += ctxt->tmp_stride;
6318
0
    }
6319
0
  } else {
6320
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6321
6322
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6323
0
      const uint8_t m0 = mask1d[row];
6324
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6325
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6326
0
        wsrc[col] = m1 * tmp16[col];
6327
0
        mask[col] = m0;
6328
0
      }
6329
0
      wsrc += bw;
6330
0
      mask += bw;
6331
0
      tmp16 += ctxt->tmp_stride;
6332
0
    }
6333
0
  }
6334
0
}
6335
6336
static INLINE void calc_target_weighted_pred_left(
6337
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6338
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6339
0
  (void)nb_mi;
6340
0
  (void)num_planes;
6341
0
  (void)rel_mi_col;
6342
0
  (void)dir;
6343
6344
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6345
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6346
6347
0
  const int bw = xd->width << MI_SIZE_LOG2;
6348
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6349
6350
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6351
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6352
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6353
0
  const int is_hbd = is_cur_buf_hbd(xd);
6354
6355
0
  if (!is_hbd) {
6356
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6357
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6358
0
        const uint8_t m0 = mask1d[col];
6359
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6360
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6361
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6362
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6363
0
      }
6364
0
      wsrc += bw;
6365
0
      mask += bw;
6366
0
      tmp += ctxt->tmp_stride;
6367
0
    }
6368
0
  } else {
6369
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6370
6371
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6372
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6373
0
        const uint8_t m0 = mask1d[col];
6374
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6375
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6376
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6377
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6378
0
      }
6379
0
      wsrc += bw;
6380
0
      mask += bw;
6381
0
      tmp16 += ctxt->tmp_stride;
6382
0
    }
6383
0
  }
6384
0
}
6385
6386
// This function has a structure similar to av1_build_obmc_inter_prediction
6387
//
6388
// The OBMC predictor is computed as:
6389
//
6390
//  PObmc(x,y) =
6391
//    AOM_BLEND_A64(Mh(x),
6392
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6393
//                  PLeft(x, y))
6394
//
6395
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6396
// rounding, this can be written as:
6397
//
6398
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6399
//    Mh(x) * Mv(y) * P(x,y) +
6400
//      Mh(x) * Cv(y) * Pabove(x,y) +
6401
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6402
//
6403
// Where :
6404
//
6405
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6406
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6407
//
6408
// This function computes 'wsrc' and 'mask' as:
6409
//
6410
//  wsrc(x, y) =
6411
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6412
//      Mh(x) * Cv(y) * Pabove(x,y) +
6413
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6414
//
6415
//  mask(x, y) = Mh(x) * Mv(y)
6416
//
6417
// These can then be used to efficiently approximate the error for any
6418
// predictor P in the context of the provided neighbouring predictors by
6419
// computing:
6420
//
6421
//  error(x, y) =
6422
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6423
//
6424
static AOM_INLINE void calc_target_weighted_pred(
6425
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6426
    const uint8_t *above, int above_stride, const uint8_t *left,
6427
0
    int left_stride) {
6428
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6429
0
  const int bw = xd->width << MI_SIZE_LOG2;
6430
0
  const int bh = xd->height << MI_SIZE_LOG2;
6431
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6432
0
  int32_t *mask_buf = obmc_buffer->mask;
6433
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6434
6435
0
  const int is_hbd = is_cur_buf_hbd(xd);
6436
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6437
6438
  // plane 0 should not be sub-sampled
6439
0
  assert(xd->plane[0].subsampling_x == 0);
6440
0
  assert(xd->plane[0].subsampling_y == 0);
6441
6442
0
  av1_zero_array(wsrc_buf, bw * bh);
6443
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6444
6445
  // handle above row
6446
0
  if (xd->up_available) {
6447
0
    const int overlap =
6448
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6449
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6450
0
                                                   above_stride, overlap };
6451
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6452
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6453
0
                                  calc_target_weighted_pred_above, &ctxt);
6454
0
  }
6455
6456
0
  for (int i = 0; i < bw * bh; ++i) {
6457
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6458
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6459
0
  }
6460
6461
  // handle left column
6462
0
  if (xd->left_available) {
6463
0
    const int overlap =
6464
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6465
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6466
0
                                                   left_stride, overlap };
6467
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6468
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6469
0
                                 calc_target_weighted_pred_left, &ctxt);
6470
0
  }
6471
6472
0
  if (!is_hbd) {
6473
0
    const uint8_t *src = x->plane[0].src.buf;
6474
6475
0
    for (int row = 0; row < bh; ++row) {
6476
0
      for (int col = 0; col < bw; ++col) {
6477
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6478
0
      }
6479
0
      wsrc_buf += bw;
6480
0
      src += x->plane[0].src.stride;
6481
0
    }
6482
0
  } else {
6483
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6484
6485
0
    for (int row = 0; row < bh; ++row) {
6486
0
      for (int col = 0; col < bw; ++col) {
6487
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6488
0
      }
6489
0
      wsrc_buf += bw;
6490
0
      src += x->plane[0].src.stride;
6491
0
    }
6492
0
  }
6493
0
}