Coverage Report

Created: 2026-06-10 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/rdopt.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdint.h>
16
#include <string.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/blend.h"
24
#include "aom_mem/aom_mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_ports/mem.h"
27
28
#include "av1/common/av1_common_int.h"
29
#include "av1/common/cfl.h"
30
#include "av1/common/blockd.h"
31
#include "av1/common/common.h"
32
#include "av1/common/common_data.h"
33
#include "av1/common/entropy.h"
34
#include "av1/common/entropymode.h"
35
#include "av1/common/enums.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mvref_common.h"
38
#include "av1/common/obmc.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconinter.h"
42
#include "av1/common/reconintra.h"
43
#include "av1/common/scan.h"
44
#include "av1/common/seg_common.h"
45
#include "av1/common/txb_common.h"
46
#include "av1/common/warped_motion.h"
47
48
#include "av1/encoder/aq_variance.h"
49
#include "av1/encoder/av1_quantize.h"
50
#include "av1/encoder/block.h"
51
#include "av1/encoder/cost.h"
52
#include "av1/encoder/compound_type.h"
53
#include "av1/encoder/encodemb.h"
54
#include "av1/encoder/encodemv.h"
55
#include "av1/encoder/encoder.h"
56
#include "av1/encoder/encodetxb.h"
57
#include "av1/encoder/hybrid_fwd_txfm.h"
58
#include "av1/encoder/interp_search.h"
59
#include "av1/encoder/intra_mode_search.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/mcomp.h"
62
#include "av1/encoder/ml.h"
63
#include "av1/encoder/mode_prune_model_weights.h"
64
#include "av1/encoder/model_rd.h"
65
#include "av1/encoder/motion_search_facade.h"
66
#include "av1/encoder/palette.h"
67
#include "av1/encoder/pustats.h"
68
#include "av1/encoder/random.h"
69
#include "av1/encoder/ratectrl.h"
70
#include "av1/encoder/rd.h"
71
#include "av1/encoder/rdopt.h"
72
#include "av1/encoder/reconinter_enc.h"
73
#include "av1/encoder/tokenize.h"
74
#include "av1/encoder/tpl_model.h"
75
#include "av1/encoder/tx_search.h"
76
#include "av1/encoder/var_based_part.h"
77
78
8.06M
#define LAST_NEW_MV_INDEX 6
79
80
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
81
// The values are kept in Q12 format and equation used to derive is
82
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
83
10.6M
#define MODE_THRESH_QBITS 12
84
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
85
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
86
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
87
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
88
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
89
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
90
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
91
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
92
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
93
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
94
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
95
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
96
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
97
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
98
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
99
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
100
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
101
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
102
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
103
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
104
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
105
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
106
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
107
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
108
  4144,  4120,  4096
109
};
110
111
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
112
  THR_NEARESTMV,
113
  THR_NEARESTL2,
114
  THR_NEARESTL3,
115
  THR_NEARESTB,
116
  THR_NEARESTA2,
117
  THR_NEARESTA,
118
  THR_NEARESTG,
119
120
  THR_NEWMV,
121
  THR_NEWL2,
122
  THR_NEWL3,
123
  THR_NEWB,
124
  THR_NEWA2,
125
  THR_NEWA,
126
  THR_NEWG,
127
128
  THR_NEARMV,
129
  THR_NEARL2,
130
  THR_NEARL3,
131
  THR_NEARB,
132
  THR_NEARA2,
133
  THR_NEARA,
134
  THR_NEARG,
135
136
  THR_GLOBALMV,
137
  THR_GLOBALL2,
138
  THR_GLOBALL3,
139
  THR_GLOBALB,
140
  THR_GLOBALA2,
141
  THR_GLOBALA,
142
  THR_GLOBALG,
143
144
  THR_COMP_NEAREST_NEARESTLA,
145
  THR_COMP_NEAREST_NEARESTL2A,
146
  THR_COMP_NEAREST_NEARESTL3A,
147
  THR_COMP_NEAREST_NEARESTGA,
148
  THR_COMP_NEAREST_NEARESTLB,
149
  THR_COMP_NEAREST_NEARESTL2B,
150
  THR_COMP_NEAREST_NEARESTL3B,
151
  THR_COMP_NEAREST_NEARESTGB,
152
  THR_COMP_NEAREST_NEARESTLA2,
153
  THR_COMP_NEAREST_NEARESTL2A2,
154
  THR_COMP_NEAREST_NEARESTL3A2,
155
  THR_COMP_NEAREST_NEARESTGA2,
156
  THR_COMP_NEAREST_NEARESTLL2,
157
  THR_COMP_NEAREST_NEARESTLL3,
158
  THR_COMP_NEAREST_NEARESTLG,
159
  THR_COMP_NEAREST_NEARESTBA,
160
161
  THR_COMP_NEAR_NEARLB,
162
  THR_COMP_NEW_NEWLB,
163
  THR_COMP_NEW_NEARESTLB,
164
  THR_COMP_NEAREST_NEWLB,
165
  THR_COMP_NEW_NEARLB,
166
  THR_COMP_NEAR_NEWLB,
167
  THR_COMP_GLOBAL_GLOBALLB,
168
169
  THR_COMP_NEAR_NEARLA,
170
  THR_COMP_NEW_NEWLA,
171
  THR_COMP_NEW_NEARESTLA,
172
  THR_COMP_NEAREST_NEWLA,
173
  THR_COMP_NEW_NEARLA,
174
  THR_COMP_NEAR_NEWLA,
175
  THR_COMP_GLOBAL_GLOBALLA,
176
177
  THR_COMP_NEAR_NEARL2A,
178
  THR_COMP_NEW_NEWL2A,
179
  THR_COMP_NEW_NEARESTL2A,
180
  THR_COMP_NEAREST_NEWL2A,
181
  THR_COMP_NEW_NEARL2A,
182
  THR_COMP_NEAR_NEWL2A,
183
  THR_COMP_GLOBAL_GLOBALL2A,
184
185
  THR_COMP_NEAR_NEARL3A,
186
  THR_COMP_NEW_NEWL3A,
187
  THR_COMP_NEW_NEARESTL3A,
188
  THR_COMP_NEAREST_NEWL3A,
189
  THR_COMP_NEW_NEARL3A,
190
  THR_COMP_NEAR_NEWL3A,
191
  THR_COMP_GLOBAL_GLOBALL3A,
192
193
  THR_COMP_NEAR_NEARGA,
194
  THR_COMP_NEW_NEWGA,
195
  THR_COMP_NEW_NEARESTGA,
196
  THR_COMP_NEAREST_NEWGA,
197
  THR_COMP_NEW_NEARGA,
198
  THR_COMP_NEAR_NEWGA,
199
  THR_COMP_GLOBAL_GLOBALGA,
200
201
  THR_COMP_NEAR_NEARL2B,
202
  THR_COMP_NEW_NEWL2B,
203
  THR_COMP_NEW_NEARESTL2B,
204
  THR_COMP_NEAREST_NEWL2B,
205
  THR_COMP_NEW_NEARL2B,
206
  THR_COMP_NEAR_NEWL2B,
207
  THR_COMP_GLOBAL_GLOBALL2B,
208
209
  THR_COMP_NEAR_NEARL3B,
210
  THR_COMP_NEW_NEWL3B,
211
  THR_COMP_NEW_NEARESTL3B,
212
  THR_COMP_NEAREST_NEWL3B,
213
  THR_COMP_NEW_NEARL3B,
214
  THR_COMP_NEAR_NEWL3B,
215
  THR_COMP_GLOBAL_GLOBALL3B,
216
217
  THR_COMP_NEAR_NEARGB,
218
  THR_COMP_NEW_NEWGB,
219
  THR_COMP_NEW_NEARESTGB,
220
  THR_COMP_NEAREST_NEWGB,
221
  THR_COMP_NEW_NEARGB,
222
  THR_COMP_NEAR_NEWGB,
223
  THR_COMP_GLOBAL_GLOBALGB,
224
225
  THR_COMP_NEAR_NEARLA2,
226
  THR_COMP_NEW_NEWLA2,
227
  THR_COMP_NEW_NEARESTLA2,
228
  THR_COMP_NEAREST_NEWLA2,
229
  THR_COMP_NEW_NEARLA2,
230
  THR_COMP_NEAR_NEWLA2,
231
  THR_COMP_GLOBAL_GLOBALLA2,
232
233
  THR_COMP_NEAR_NEARL2A2,
234
  THR_COMP_NEW_NEWL2A2,
235
  THR_COMP_NEW_NEARESTL2A2,
236
  THR_COMP_NEAREST_NEWL2A2,
237
  THR_COMP_NEW_NEARL2A2,
238
  THR_COMP_NEAR_NEWL2A2,
239
  THR_COMP_GLOBAL_GLOBALL2A2,
240
241
  THR_COMP_NEAR_NEARL3A2,
242
  THR_COMP_NEW_NEWL3A2,
243
  THR_COMP_NEW_NEARESTL3A2,
244
  THR_COMP_NEAREST_NEWL3A2,
245
  THR_COMP_NEW_NEARL3A2,
246
  THR_COMP_NEAR_NEWL3A2,
247
  THR_COMP_GLOBAL_GLOBALL3A2,
248
249
  THR_COMP_NEAR_NEARGA2,
250
  THR_COMP_NEW_NEWGA2,
251
  THR_COMP_NEW_NEARESTGA2,
252
  THR_COMP_NEAREST_NEWGA2,
253
  THR_COMP_NEW_NEARGA2,
254
  THR_COMP_NEAR_NEWGA2,
255
  THR_COMP_GLOBAL_GLOBALGA2,
256
257
  THR_COMP_NEAR_NEARLL2,
258
  THR_COMP_NEW_NEWLL2,
259
  THR_COMP_NEW_NEARESTLL2,
260
  THR_COMP_NEAREST_NEWLL2,
261
  THR_COMP_NEW_NEARLL2,
262
  THR_COMP_NEAR_NEWLL2,
263
  THR_COMP_GLOBAL_GLOBALLL2,
264
265
  THR_COMP_NEAR_NEARLL3,
266
  THR_COMP_NEW_NEWLL3,
267
  THR_COMP_NEW_NEARESTLL3,
268
  THR_COMP_NEAREST_NEWLL3,
269
  THR_COMP_NEW_NEARLL3,
270
  THR_COMP_NEAR_NEWLL3,
271
  THR_COMP_GLOBAL_GLOBALLL3,
272
273
  THR_COMP_NEAR_NEARLG,
274
  THR_COMP_NEW_NEWLG,
275
  THR_COMP_NEW_NEARESTLG,
276
  THR_COMP_NEAREST_NEWLG,
277
  THR_COMP_NEW_NEARLG,
278
  THR_COMP_NEAR_NEWLG,
279
  THR_COMP_GLOBAL_GLOBALLG,
280
281
  THR_COMP_NEAR_NEARBA,
282
  THR_COMP_NEW_NEWBA,
283
  THR_COMP_NEW_NEARESTBA,
284
  THR_COMP_NEAREST_NEWBA,
285
  THR_COMP_NEW_NEARBA,
286
  THR_COMP_NEAR_NEWBA,
287
  THR_COMP_GLOBAL_GLOBALBA,
288
289
  THR_DC,
290
  THR_PAETH,
291
  THR_SMOOTH,
292
  THR_SMOOTH_V,
293
  THR_SMOOTH_H,
294
  THR_H_PRED,
295
  THR_V_PRED,
296
  THR_D135_PRED,
297
  THR_D203_PRED,
298
  THR_D157_PRED,
299
  THR_D67_PRED,
300
  THR_D113_PRED,
301
  THR_D45_PRED,
302
};
303
304
/*!\cond */
305
typedef struct SingleInterModeState {
306
  int64_t rd;
307
  MV_REFERENCE_FRAME ref_frame;
308
  int valid;
309
} SingleInterModeState;
310
311
typedef struct InterModeSearchState {
312
  int64_t best_rd;
313
  int64_t best_skip_rd[2];
314
  MB_MODE_INFO best_mbmode;
315
  int best_rate_y;
316
  int best_rate_uv;
317
  int best_mode_skippable;
318
  int best_skip2;
319
  THR_MODES best_mode_index;
320
  int num_available_refs;
321
  int64_t dist_refs[REF_FRAMES];
322
  int dist_order_refs[REF_FRAMES];
323
  int64_t mode_threshold[MAX_MODES];
324
  int64_t best_intra_rd;
325
  unsigned int best_pred_sse;
326
327
  /*!
328
   * \brief Keep track of best intra rd for use in compound mode.
329
   */
330
  int64_t best_pred_rd[REFERENCE_MODES];
331
  // Save a set of single_newmv for each checked ref_mv.
332
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
333
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
335
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
336
  // The rd of simple translation in single inter modes
337
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
338
  int64_t best_single_rd[REF_FRAMES];
339
  PREDICTION_MODE best_single_mode[REF_FRAMES];
340
341
  // Single search results by [directions][modes][reference frames]
342
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
343
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
344
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
345
                                            [FWD_REFS];
346
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
347
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
348
  IntraModeSearchState intra_search_state;
349
  RD_STATS best_y_rdcost;
350
} InterModeSearchState;
351
/*!\endcond */
352
353
323k
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
354
7.44M
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
355
7.12M
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
356
7.12M
    md->ready = 0;
357
7.12M
    md->num = 0;
358
7.12M
    md->dist_sum = 0;
359
7.12M
    md->ld_sum = 0;
360
7.12M
    md->sse_sum = 0;
361
7.12M
    md->sse_sse_sum = 0;
362
7.12M
    md->sse_ld_sum = 0;
363
7.12M
  }
364
323k
}
365
366
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
367
                             int64_t sse, int *est_residue_cost,
368
88.7k
                             int64_t *est_dist) {
369
88.7k
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
370
88.7k
  if (md->ready) {
371
88.7k
    if (sse < md->dist_mean) {
372
1
      *est_residue_cost = 0;
373
1
      *est_dist = sse;
374
88.7k
    } else {
375
88.7k
      *est_dist = (int64_t)round(md->dist_mean);
376
88.7k
      const double est_ld = md->a * sse + md->b;
377
      // Clamp estimated rate cost by INT_MAX / 2.
378
      // TODO(angiebird@google.com): find better solution than clamping.
379
88.7k
      if (fabs(est_ld) < 1e-2) {
380
0
        *est_residue_cost = INT_MAX / 2;
381
88.7k
      } else {
382
88.7k
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
383
88.7k
        if (est_residue_cost_dbl < 0) {
384
9
          *est_residue_cost = 0;
385
88.7k
        } else {
386
88.7k
          *est_residue_cost =
387
88.7k
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
388
88.7k
        }
389
88.7k
      }
390
88.7k
      if (*est_residue_cost <= 0) {
391
9
        *est_residue_cost = 0;
392
9
        *est_dist = sse;
393
9
      }
394
88.7k
    }
395
88.7k
    return 1;
396
88.7k
  }
397
0
  return 0;
398
88.7k
}
399
400
18.2k
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
401
419k
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
402
401k
    const int block_idx = inter_mode_data_block_idx(bsize);
403
401k
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
404
401k
    if (block_idx == -1) continue;
405
309k
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
406
309k
      continue;
407
309k
    } else {
408
469
      if (md->ready == 0) {
409
114
        md->dist_mean = md->dist_sum / md->num;
410
114
        md->ld_mean = md->ld_sum / md->num;
411
114
        md->sse_mean = md->sse_sum / md->num;
412
114
        md->sse_sse_mean = md->sse_sse_sum / md->num;
413
114
        md->sse_ld_mean = md->sse_ld_sum / md->num;
414
355
      } else {
415
355
        const double factor = 3;
416
355
        md->dist_mean =
417
355
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
418
355
        md->ld_mean =
419
355
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
420
355
        md->sse_mean =
421
355
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
422
355
        md->sse_sse_mean =
423
355
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
424
355
            (factor + 1);
425
355
        md->sse_ld_mean =
426
355
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
427
355
            (factor + 1);
428
355
      }
429
430
469
      const double my = md->ld_mean;
431
469
      const double mx = md->sse_mean;
432
469
      const double dx = sqrt(md->sse_sse_mean);
433
469
      const double dxy = md->sse_ld_mean;
434
435
469
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
436
469
      md->b = my - md->a * mx;
437
469
      md->ready = 1;
438
439
469
      md->num = 0;
440
469
      md->dist_sum = 0;
441
469
      md->ld_sum = 0;
442
469
      md->sse_sum = 0;
443
469
      md->sse_sse_sum = 0;
444
469
      md->sse_ld_sum = 0;
445
469
    }
446
469
    (void)rdmult;
447
469
  }
448
18.2k
}
449
450
static inline void inter_mode_data_push(TileDataEnc *tile_data,
451
                                        BLOCK_SIZE bsize, int64_t sse,
452
428k
                                        int64_t dist, int residue_cost) {
453
428k
  if (residue_cost == 0 || sse == dist) return;
454
330k
  const int block_idx = inter_mode_data_block_idx(bsize);
455
330k
  if (block_idx == -1) return;
456
330k
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
457
330k
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
458
330k
    const double ld = (sse - dist) * 1. / residue_cost;
459
330k
    ++rd_model->num;
460
330k
    rd_model->dist_sum += dist;
461
330k
    rd_model->ld_sum += ld;
462
330k
    rd_model->sse_sum += sse;
463
330k
    rd_model->sse_sse_sum += (double)sse * (double)sse;
464
330k
    rd_model->sse_ld_sum += sse * ld;
465
330k
  }
466
330k
}
467
468
static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
469
                                         int mode_rate, int64_t sse, int64_t rd,
470
                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
471
                                         RD_STATS *rd_cost_uv,
472
142k
                                         const MB_MODE_INFO *mbmi) {
473
142k
  const int num = inter_modes_info->num;
474
142k
  assert(num < MAX_INTER_MODES);
475
142k
  inter_modes_info->mbmi_arr[num] = *mbmi;
476
142k
  inter_modes_info->mode_rate_arr[num] = mode_rate;
477
142k
  inter_modes_info->sse_arr[num] = sse;
478
142k
  inter_modes_info->est_rd_arr[num] = rd;
479
142k
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
480
142k
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
481
142k
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
482
142k
  ++inter_modes_info->num;
483
142k
}
484
485
174k
static int compare_rd_idx_pair(const void *a, const void *b) {
486
174k
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
487
    // To avoid inconsistency in qsort() ordering when two elements are equal,
488
    // using idx as tie breaker. Refer aomedia:2928
489
5
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
490
0
      return 0;
491
5
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
492
0
      return 1;
493
5
    else
494
5
      return -1;
495
174k
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
496
96.6k
    return 1;
497
96.6k
  } else {
498
77.5k
    return -1;
499
77.5k
  }
500
174k
}
501
502
static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
503
38.2k
                                         RdIdxPair *rd_idx_pair_arr) {
504
38.2k
  if (inter_modes_info->num == 0) {
505
2
    return;
506
2
  }
507
180k
  for (int i = 0; i < inter_modes_info->num; ++i) {
508
142k
    rd_idx_pair_arr[i].idx = i;
509
142k
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
510
142k
  }
511
38.2k
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
512
38.2k
        compare_rd_idx_pair);
513
38.2k
}
514
515
// Initialize estimated RD Cost records of compound average.
516
static inline void init_comp_avg_est_rd(
517
896k
    struct macroblock *x, int skip_cmp_using_top_cmp_avg_est_rd_lvl) {
518
896k
  if (!skip_cmp_using_top_cmp_avg_est_rd_lvl) return;
519
520
5.37M
  for (int j = 0; j < TOP_COMP_AVG_EST_RD_COUNT; j++) {
521
4.47M
    x->top_comp_avg_est_rd[j] = INT64_MAX;
522
4.47M
  }
523
896k
}
524
525
// Similar to get_horver_correlation, but also takes into account first
526
// row/column, when computing horizontal/vertical correlation.
527
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
528
                                       int width, int height, float *hcorr,
529
0
                                       float *vcorr) {
530
  // The following notation is used:
531
  // x - current pixel
532
  // y - left neighbor pixel
533
  // z - top neighbor pixel
534
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
535
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
536
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
537
538
  // First, process horizontal correlation on just the first row
539
0
  x_sum += diff[0];
540
0
  x2_sum += diff[0] * diff[0];
541
0
  x_firstrow += diff[0];
542
0
  x2_firstrow += diff[0] * diff[0];
543
0
  for (int j = 1; j < width; ++j) {
544
0
    const int16_t x = diff[j];
545
0
    const int16_t y = diff[j - 1];
546
0
    x_sum += x;
547
0
    x_firstrow += x;
548
0
    x2_sum += x * x;
549
0
    x2_firstrow += x * x;
550
0
    xy_sum += x * y;
551
0
  }
552
553
  // Process vertical correlation in the first column
554
0
  x_firstcol += diff[0];
555
0
  x2_firstcol += diff[0] * diff[0];
556
0
  for (int i = 1; i < height; ++i) {
557
0
    const int16_t x = diff[i * stride];
558
0
    const int16_t z = diff[(i - 1) * stride];
559
0
    x_sum += x;
560
0
    x_firstcol += x;
561
0
    x2_sum += x * x;
562
0
    x2_firstcol += x * x;
563
0
    xz_sum += x * z;
564
0
  }
565
566
  // Now process horiz and vert correlation through the rest unit
567
0
  for (int i = 1; i < height; ++i) {
568
0
    for (int j = 1; j < width; ++j) {
569
0
      const int16_t x = diff[i * stride + j];
570
0
      const int16_t y = diff[i * stride + j - 1];
571
0
      const int16_t z = diff[(i - 1) * stride + j];
572
0
      x_sum += x;
573
0
      x2_sum += x * x;
574
0
      xy_sum += x * y;
575
0
      xz_sum += x * z;
576
0
    }
577
0
  }
578
579
0
  for (int j = 0; j < width; ++j) {
580
0
    x_finalrow += diff[(height - 1) * stride + j];
581
0
    x2_finalrow +=
582
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
583
0
  }
584
0
  for (int i = 0; i < height; ++i) {
585
0
    x_finalcol += diff[i * stride + width - 1];
586
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
587
0
  }
588
589
0
  int64_t xhor_sum = x_sum - x_finalcol;
590
0
  int64_t xver_sum = x_sum - x_finalrow;
591
0
  int64_t y_sum = x_sum - x_firstcol;
592
0
  int64_t z_sum = x_sum - x_firstrow;
593
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
594
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
595
0
  int64_t y2_sum = x2_sum - x2_firstcol;
596
0
  int64_t z2_sum = x2_sum - x2_firstrow;
597
598
0
  const float num_hor = (float)(height * (width - 1));
599
0
  const float num_ver = (float)((height - 1) * width);
600
601
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
602
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
603
604
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
605
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
606
607
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
608
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
609
610
0
  if (xhor_var_n > 0 && y_var_n > 0) {
611
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
612
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
613
0
  } else {
614
0
    *hcorr = 1.0;
615
0
  }
616
0
  if (xver_var_n > 0 && z_var_n > 0) {
617
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
618
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
619
0
  } else {
620
0
    *vcorr = 1.0;
621
0
  }
622
0
}
623
624
static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var,
625
0
                                   int64_t *rec_var) {
626
0
  const MACROBLOCKD *xd = &x->e_mbd;
627
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
628
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
629
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
630
631
0
  BLOCK_SIZE bsize = mbmi->bsize;
632
0
  int bw = block_size_wide[bsize];
633
0
  int bh = block_size_high[bsize];
634
635
0
  static const int gau_filter[3][3] = {
636
0
    { 1, 2, 1 },
637
0
    { 2, 4, 2 },
638
0
    { 1, 2, 1 },
639
0
  };
640
641
0
  DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
642
643
0
  uint16_t *pred_ptr = &dclevel[bw + 1];
644
0
  int pred_stride = xd->plane[0].dst.stride;
645
646
0
  for (int idy = -1; idy < bh + 1; ++idy) {
647
0
    for (int idx = -1; idx < bw + 1; ++idx) {
648
0
      int offset_idy = idy;
649
0
      int offset_idx = idx;
650
0
      if (idy == -1) offset_idy = 0;
651
0
      if (idy == bh) offset_idy = bh - 1;
652
0
      if (idx == -1) offset_idx = 0;
653
0
      if (idx == bw) offset_idx = bw - 1;
654
655
0
      int offset = offset_idy * pred_stride + offset_idx;
656
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset];
657
0
    }
658
0
  }
659
660
0
  *rec_var = 0;
661
0
  for (int idy = 0; idy < bh; ++idy) {
662
0
    for (int idx = 0; idx < bw; ++idx) {
663
0
      int sum = 0;
664
0
      for (int iy = 0; iy < 3; ++iy)
665
0
        for (int ix = 0; ix < 3; ++ix)
666
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
667
0
                 gau_filter[iy][ix];
668
669
0
      sum = sum >> 4;
670
671
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
672
0
      *rec_var += diff * diff;
673
0
    }
674
0
  }
675
0
  *rec_var <<= 4;
676
677
0
  int src_stride = p->src.stride;
678
0
  for (int idy = -1; idy < bh + 1; ++idy) {
679
0
    for (int idx = -1; idx < bw + 1; ++idx) {
680
0
      int offset_idy = idy;
681
0
      int offset_idx = idx;
682
0
      if (idy == -1) offset_idy = 0;
683
0
      if (idy == bh) offset_idy = bh - 1;
684
0
      if (idx == -1) offset_idx = 0;
685
0
      if (idx == bw) offset_idx = bw - 1;
686
687
0
      int offset = offset_idy * src_stride + offset_idx;
688
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset];
689
0
    }
690
0
  }
691
692
0
  *src_var = 0;
693
0
  for (int idy = 0; idy < bh; ++idy) {
694
0
    for (int idx = 0; idx < bw; ++idx) {
695
0
      int sum = 0;
696
0
      for (int iy = 0; iy < 3; ++iy)
697
0
        for (int ix = 0; ix < 3; ++ix)
698
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
699
0
                 gau_filter[iy][ix];
700
701
0
      sum = sum >> 4;
702
703
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
704
0
      *src_var += diff * diff;
705
0
    }
706
0
  }
707
0
  *src_var <<= 4;
708
0
}
709
710
static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
711
0
                               int64_t *rec_var) {
712
0
  const MACROBLOCKD *xd = &x->e_mbd;
713
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
714
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
715
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
716
717
0
  BLOCK_SIZE bsize = mbmi->bsize;
718
0
  int bw = block_size_wide[bsize];
719
0
  int bh = block_size_high[bsize];
720
721
0
  static const int gau_filter[3][3] = {
722
0
    { 1, 2, 1 },
723
0
    { 2, 4, 2 },
724
0
    { 1, 2, 1 },
725
0
  };
726
727
0
  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
728
729
0
  uint8_t *pred_ptr = &dclevel[bw + 1];
730
0
  int pred_stride = xd->plane[0].dst.stride;
731
732
0
  for (int idy = -1; idy < bh + 1; ++idy) {
733
0
    for (int idx = -1; idx < bw + 1; ++idx) {
734
0
      int offset_idy = idy;
735
0
      int offset_idx = idx;
736
0
      if (idy == -1) offset_idy = 0;
737
0
      if (idy == bh) offset_idy = bh - 1;
738
0
      if (idx == -1) offset_idx = 0;
739
0
      if (idx == bw) offset_idx = bw - 1;
740
741
0
      int offset = offset_idy * pred_stride + offset_idx;
742
0
      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
743
0
    }
744
0
  }
745
746
0
  *rec_var = 0;
747
0
  for (int idy = 0; idy < bh; ++idy) {
748
0
    for (int idx = 0; idx < bw; ++idx) {
749
0
      int sum = 0;
750
0
      for (int iy = 0; iy < 3; ++iy)
751
0
        for (int ix = 0; ix < 3; ++ix)
752
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
753
0
                 gau_filter[iy][ix];
754
755
0
      sum = sum >> 4;
756
757
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
758
0
      *rec_var += diff * diff;
759
0
    }
760
0
  }
761
0
  *rec_var <<= 4;
762
763
0
  int src_stride = p->src.stride;
764
0
  for (int idy = -1; idy < bh + 1; ++idy) {
765
0
    for (int idx = -1; idx < bw + 1; ++idx) {
766
0
      int offset_idy = idy;
767
0
      int offset_idx = idx;
768
0
      if (idy == -1) offset_idy = 0;
769
0
      if (idy == bh) offset_idy = bh - 1;
770
0
      if (idx == -1) offset_idx = 0;
771
0
      if (idx == bw) offset_idx = bw - 1;
772
773
0
      int offset = offset_idy * src_stride + offset_idx;
774
0
      pred_ptr[idy * bw + idx] = p->src.buf[offset];
775
0
    }
776
0
  }
777
778
0
  *src_var = 0;
779
0
  for (int idy = 0; idy < bh; ++idy) {
780
0
    for (int idx = 0; idx < bw; ++idx) {
781
0
      int sum = 0;
782
0
      for (int iy = 0; iy < 3; ++iy)
783
0
        for (int ix = 0; ix < 3; ++ix)
784
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
785
0
                 gau_filter[iy][ix];
786
787
0
      sum = sum >> 4;
788
789
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
790
0
      *src_var += diff * diff;
791
0
    }
792
0
  }
793
0
  *src_var <<= 4;
794
0
}
795
796
static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
797
6.47M
                          RD_STATS *rd_cost, bool is_inter_pred) {
798
6.47M
  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
799
6.47M
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
800
0
      is_inter_pred) {
801
    // Tune IQ and SSIMULACRA2 can be used to encode layered images, where
802
    // keyframes could be encoded at a lower or similar quality (i.e. higher
803
    // QP) than inter-coded frames.
804
    // In this case, libaom tends to underestimate the true RD cost of inter
805
    // prediction candidates, causing encoded file size to increase without a
806
    // corresponding increase in quality.
807
    // When both intra and inter encoded block candidates are available (with
808
    // rdcosts close to each other), the intra-coded candidate was subjectively
809
    // observed to be a bit less blurry, with a corresponding increase in
810
    // SSIMULACRA 2 scores.
811
    // Apply a 1.125x inter block bias to increase overall perceptual
812
    // compression efficiency, while still allowing the encoder to pick inter
813
    // prediction when it's beneficial.
814
0
    rd_cost->dist += rd_cost->dist >> 3;
815
0
    rd_cost->rdcost += rd_cost->rdcost >> 3;
816
0
    return;
817
0
  }
818
819
6.47M
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
820
821
107
  if (frame_is_kf_gf_arf(cpi)) return;
822
823
107
  int64_t src_var, rec_var;
824
825
107
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
826
107
  if (is_hbd)
827
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
828
107
  else
829
107
    get_variance_stats(x, &src_var, &rec_var);
830
831
107
  if (src_var <= rec_var) return;
832
833
107
  int64_t var_offset = src_var - rec_var;
834
835
107
  rd_cost->dist += var_offset;
836
837
107
  rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
838
107
}
839
840
static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
841
5.15M
                        int64_t *rd_cost, bool is_inter_pred) {
842
5.15M
  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
843
5.15M
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
844
0
      is_inter_pred) {
845
0
    *rd_cost += *rd_cost >> 3;
846
0
    return;
847
0
  }
848
849
5.15M
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
850
851
18.4E
  if (frame_is_kf_gf_arf(cpi)) return;
852
853
18.4E
  int64_t src_var, rec_var;
854
18.4E
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
855
856
18.4E
  if (is_hbd)
857
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
858
18.4E
  else
859
18.4E
    get_variance_stats(x, &src_var, &rec_var);
860
861
18.4E
  if (src_var <= rec_var) return;
862
863
18.4E
  int64_t var_offset = src_var - rec_var;
864
865
18.4E
  *rd_cost += RDCOST(x->rdmult, 0, var_offset);
866
18.4E
}
867
868
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
869
3.57M
                       int64_t *sse_y) {
870
3.57M
  const AV1_COMMON *cm = &cpi->common;
871
3.57M
  const int num_planes = av1_num_planes(cm);
872
3.57M
  const MACROBLOCKD *xd = &x->e_mbd;
873
3.57M
  const MB_MODE_INFO *mbmi = xd->mi[0];
874
3.57M
  int64_t total_sse = 0;
875
10.1M
  for (int plane = 0; plane < num_planes; ++plane) {
876
6.60M
    if (plane && !xd->is_chroma_ref) break;
877
6.60M
    const struct macroblock_plane *const p = &x->plane[plane];
878
6.60M
    const struct macroblockd_plane *const pd = &xd->plane[plane];
879
6.60M
    const BLOCK_SIZE bs =
880
6.60M
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
881
6.60M
    unsigned int sse;
882
883
6.60M
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
884
6.60M
                            pd->dst.stride, &sse);
885
6.60M
    total_sse += sse;
886
6.60M
    if (!plane && sse_y) *sse_y = sse;
887
6.60M
  }
888
3.57M
  total_sse <<= 4;
889
3.57M
  return total_sse;
890
3.57M
}
891
892
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
893
0
                          intptr_t block_size, int64_t *ssz) {
894
0
  int i;
895
0
  int64_t error = 0, sqcoeff = 0;
896
897
0
  for (i = 0; i < block_size; i++) {
898
0
    const int diff = coeff[i] - dqcoeff[i];
899
0
    error += diff * diff;
900
0
    sqcoeff += coeff[i] * coeff[i];
901
0
  }
902
903
0
  *ssz = sqcoeff;
904
0
  return error;
905
0
}
906
907
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
908
0
                             intptr_t block_size) {
909
0
  int64_t error = 0;
910
911
0
  for (int i = 0; i < block_size; i++) {
912
0
    const int diff = coeff[i] - dqcoeff[i];
913
0
    error += diff * diff;
914
0
  }
915
916
0
  return error;
917
0
}
918
919
#if CONFIG_AV1_HIGHBITDEPTH
920
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
921
                                 const tran_low_t *dqcoeff, intptr_t block_size,
922
0
                                 int64_t *ssz, int bd) {
923
0
  int i;
924
0
  int64_t error = 0, sqcoeff = 0;
925
0
  int shift = 2 * (bd - 8);
926
0
  int rounding = (1 << shift) >> 1;
927
928
0
  for (i = 0; i < block_size; i++) {
929
0
    const int64_t diff = coeff[i] - dqcoeff[i];
930
0
    error += diff * diff;
931
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
932
0
  }
933
0
  error = (error + rounding) >> shift;
934
0
  sqcoeff = (sqcoeff + rounding) >> shift;
935
936
0
  *ssz = sqcoeff;
937
0
  return error;
938
0
}
939
#endif
940
941
static int conditional_skipintra(PREDICTION_MODE mode,
942
0
                                 PREDICTION_MODE best_intra_mode) {
943
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
944
0
      best_intra_mode != D135_PRED)
945
0
    return 1;
946
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
947
0
      best_intra_mode != D45_PRED)
948
0
    return 1;
949
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
950
0
      best_intra_mode != D45_PRED)
951
0
    return 1;
952
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
953
0
      best_intra_mode != D135_PRED)
954
0
    return 1;
955
0
  return 0;
956
0
}
957
958
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
959
4.77M
                       int16_t mode_context) {
960
4.77M
  if (is_inter_compound_mode(mode)) {
961
367k
    return mode_costs
962
367k
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
963
367k
  }
964
965
4.40M
  int mode_cost = 0;
966
4.40M
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
967
968
4.40M
  assert(is_inter_mode(mode));
969
970
4.40M
  if (mode == NEWMV) {
971
1.25M
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
972
1.25M
    return mode_cost;
973
3.15M
  } else {
974
3.15M
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
975
3.15M
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
976
977
3.15M
    if (mode == GLOBALMV) {
978
1.24M
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
979
1.24M
      return mode_cost;
980
1.91M
    } else {
981
1.91M
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
982
1.91M
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
983
1.91M
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
984
1.91M
      return mode_cost;
985
1.91M
    }
986
3.15M
  }
987
4.40M
}
988
989
static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
990
11.5M
                                              int ref_idx) {
991
11.5M
  return ref_idx ? compound_ref1_mode(this_mode)
992
11.5M
                 : compound_ref0_mode(this_mode);
993
11.5M
}
994
995
static inline void estimate_ref_frame_costs(
996
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
997
    int segment_id, unsigned int *ref_costs_single,
998
896k
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
999
896k
  int seg_ref_active =
1000
896k
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
1001
896k
  if (seg_ref_active) {
1002
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
1003
0
    int ref_frame;
1004
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1005
0
      memset(ref_costs_comp[ref_frame], 0,
1006
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
1007
896k
  } else {
1008
896k
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
1009
896k
    ref_costs_single[INTRA_FRAME] =
1010
896k
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
1011
896k
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
1012
1013
7.16M
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
1014
6.27M
      ref_costs_single[i] = base_cost;
1015
1016
896k
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
1017
896k
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
1018
896k
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
1019
896k
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
1020
896k
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
1021
896k
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
1022
1023
    // Determine cost of a single ref frame, where frame types are represented
1024
    // by a tree:
1025
    // Level 0: add cost whether this ref is a forward or backward ref
1026
896k
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1027
896k
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1028
896k
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1029
896k
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1030
896k
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
1031
896k
    ref_costs_single[ALTREF2_FRAME] +=
1032
896k
        mode_costs->single_ref_cost[ctx_p1][0][1];
1033
896k
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
1034
1035
    // Level 1: if this ref is forward ref,
1036
    // add cost whether it is last/last2 or last3/golden
1037
896k
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1038
896k
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1039
896k
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1040
896k
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1041
1042
    // Level 1: if this ref is backward ref
1043
    // then add cost whether this ref is altref or backward ref
1044
896k
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
1045
896k
    ref_costs_single[ALTREF2_FRAME] +=
1046
896k
        mode_costs->single_ref_cost[ctx_p2][1][0];
1047
896k
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
1048
1049
    // Level 2: further add cost whether this ref is last or last2
1050
896k
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
1051
896k
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
1052
1053
    // Level 2: last3 or golden
1054
896k
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
1055
896k
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
1056
1057
    // Level 2: bwdref or altref2
1058
896k
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
1059
896k
    ref_costs_single[ALTREF2_FRAME] +=
1060
896k
        mode_costs->single_ref_cost[ctx_p6][5][1];
1061
1062
896k
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
1063
      // Similar to single ref, determine cost of compound ref frames.
1064
      // cost_compound_refs = cost_first_ref + cost_second_ref
1065
896k
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
1066
896k
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
1067
896k
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
1068
896k
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
1069
896k
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
1070
1071
896k
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
1072
896k
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
1073
1074
896k
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
1075
896k
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
1076
896k
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
1077
896k
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
1078
896k
      ref_bicomp_costs[ALTREF_FRAME] = 0;
1079
1080
      // cost of first ref frame
1081
896k
      ref_bicomp_costs[LAST_FRAME] +=
1082
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1083
896k
      ref_bicomp_costs[LAST2_FRAME] +=
1084
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1085
896k
      ref_bicomp_costs[LAST3_FRAME] +=
1086
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1087
896k
      ref_bicomp_costs[GOLDEN_FRAME] +=
1088
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1089
1090
896k
      ref_bicomp_costs[LAST_FRAME] +=
1091
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
1092
896k
      ref_bicomp_costs[LAST2_FRAME] +=
1093
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
1094
1095
896k
      ref_bicomp_costs[LAST3_FRAME] +=
1096
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
1097
896k
      ref_bicomp_costs[GOLDEN_FRAME] +=
1098
896k
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
1099
1100
      // cost of second ref frame
1101
896k
      ref_bicomp_costs[BWDREF_FRAME] +=
1102
896k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1103
896k
      ref_bicomp_costs[ALTREF2_FRAME] +=
1104
896k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1105
896k
      ref_bicomp_costs[ALTREF_FRAME] +=
1106
896k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
1107
1108
896k
      ref_bicomp_costs[BWDREF_FRAME] +=
1109
896k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
1110
896k
      ref_bicomp_costs[ALTREF2_FRAME] +=
1111
896k
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
1112
1113
      // cost: if one ref frame is forward ref, the other ref is backward ref
1114
896k
      int ref0, ref1;
1115
4.47M
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1116
14.3M
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
1117
10.7M
          ref_costs_comp[ref0][ref1] =
1118
10.7M
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
1119
10.7M
        }
1120
3.58M
      }
1121
1122
      // cost: if both ref frames are the same side.
1123
896k
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
1124
896k
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
1125
896k
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
1126
896k
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
1127
896k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1128
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1129
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
1130
896k
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
1131
896k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1132
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1133
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1134
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
1135
896k
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
1136
896k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1137
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1138
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1139
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
1140
896k
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
1141
896k
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1142
896k
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
1143
18.4E
    } else {
1144
18.4E
      int ref0, ref1;
1145
18.4E
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1146
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
1147
0
          ref_costs_comp[ref0][ref1] = 512;
1148
0
      }
1149
18.4E
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
1150
18.4E
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
1151
18.4E
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
1152
18.4E
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
1153
18.4E
    }
1154
896k
  }
1155
896k
}
1156
1157
static inline void store_coding_context(
1158
#if CONFIG_INTERNAL_STATS
1159
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
1160
#else
1161
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1162
#endif  // CONFIG_INTERNAL_STATS
1163
889k
    int skippable) {
1164
889k
  MACROBLOCKD *const xd = &x->e_mbd;
1165
1166
  // Take a snapshot of the coding context so it can be
1167
  // restored if we decide to encode this way
1168
889k
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
1169
889k
  ctx->skippable = skippable;
1170
#if CONFIG_INTERNAL_STATS
1171
  ctx->best_mode_index = mode_index;
1172
#endif  // CONFIG_INTERNAL_STATS
1173
889k
  ctx->mic = *xd->mi[0];
1174
889k
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
1175
889k
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1176
889k
}
1177
1178
static inline void setup_buffer_ref_mvs_inter(
1179
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
1180
1.54M
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
1181
1.54M
  const AV1_COMMON *cm = &cpi->common;
1182
1.54M
  const int num_planes = av1_num_planes(cm);
1183
1.54M
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
1184
1.54M
      av1_get_scaled_ref_frame(cpi, ref_frame);
1185
1.54M
  MACROBLOCKD *const xd = &x->e_mbd;
1186
1.54M
  MB_MODE_INFO *const mbmi = xd->mi[0];
1187
1.54M
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1188
1.54M
  const struct scale_factors *const sf =
1189
1.54M
      get_ref_scale_factors_const(cm, ref_frame);
1190
1.54M
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
1191
1.54M
  assert(yv12 != NULL);
1192
1193
1.54M
  if (scaled_ref_frame) {
1194
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
1195
    // support scaling.
1196
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
1197
0
                         num_planes);
1198
1.54M
  } else {
1199
1.54M
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1200
1.54M
  }
1201
1202
  // Gets an initial list of candidate vectors from neighbours and orders them
1203
1.54M
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
1204
1.54M
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
1205
1.54M
                   mbmi_ext->mode_context);
1206
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
1207
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
1208
1.54M
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
1209
  // Further refinement that is encode side only to test the top few candidates
1210
  // in full and choose the best as the center point for subsequent searches.
1211
  // The current implementation doesn't support scaling.
1212
1.54M
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
1213
1.54M
              ref_frame, block_size);
1214
1215
  // Go back to unscaled reference.
1216
1.54M
  if (scaled_ref_frame) {
1217
    // We had temporarily setup pred block based on scaled reference above. Go
1218
    // back to unscaled reference now, for subsequent use.
1219
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1220
0
  }
1221
1.54M
}
1222
1223
7.56M
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1224
7.56M
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1225
1226
// TODO(jingning): this mv clamping function should be block size dependent.
1227
3.78M
static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
1228
3.78M
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
1229
3.78M
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
1230
3.78M
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
1231
3.78M
                                     xd->mb_to_bottom_edge +
1232
3.78M
                                         RIGHT_BOTTOM_MARGIN };
1233
3.78M
  clamp_mv(mv, &mv_limits);
1234
3.78M
}
1235
1236
/* If the current mode shares the same mv with other modes with higher cost,
1237
 * skip this mode. */
1238
static AOM_FORCE_INLINE int skip_repeated_mv(
1239
    const AV1_COMMON *const cm, const MACROBLOCK *const x,
1240
    PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frames[2],
1241
5.88M
    InterModeSearchState *search_state) {
1242
5.88M
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
1243
5.88M
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
1244
5.88M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1245
5.88M
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1246
5.88M
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
1247
5.88M
  if (!is_comp_pred) {
1248
5.03M
    if (this_mode == NEARMV) {
1249
1.26M
      if (ref_mv_count == 0) {
1250
        // NEARMV has the same motion vector as NEARESTMV
1251
653k
        compare_mode = NEARESTMV;
1252
653k
      }
1253
1.26M
      if (ref_mv_count == 1 &&
1254
314k
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1255
        // NEARMV has the same motion vector as GLOBALMV
1256
314k
        compare_mode = GLOBALMV;
1257
314k
      }
1258
1.26M
    }
1259
5.03M
    if (this_mode == GLOBALMV) {
1260
1.26M
      if (ref_mv_count == 0 &&
1261
653k
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1262
        // GLOBALMV has the same motion vector as NEARESTMV
1263
653k
        compare_mode = NEARESTMV;
1264
653k
      }
1265
1.26M
      if (ref_mv_count == 1) {
1266
        // GLOBALMV has the same motion vector as NEARMV
1267
314k
        compare_mode = NEARMV;
1268
314k
      }
1269
1.26M
    }
1270
1271
5.03M
    if (compare_mode != MB_MODE_COUNT) {
1272
      // Use modelled_rd to check whether compare mode was searched
1273
1.93M
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1274
1.93M
          INT64_MAX) {
1275
61.4k
        const int16_t mode_ctx =
1276
61.4k
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1277
61.4k
        const int compare_cost =
1278
61.4k
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1279
61.4k
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1280
1281
        // Only skip if the mode cost is larger than compare mode cost
1282
61.4k
        if (this_cost > compare_cost) {
1283
61.4k
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1284
61.4k
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1285
61.4k
          return 1;
1286
61.4k
        }
1287
61.4k
      }
1288
1.93M
    }
1289
5.03M
  }
1290
5.81M
  return 0;
1291
5.88M
}
1292
1293
static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1294
                                     const AV1_COMMON *cm,
1295
3.78M
                                     const MACROBLOCK *x) {
1296
3.78M
  const MACROBLOCKD *const xd = &x->e_mbd;
1297
3.78M
  *out_mv = in_mv;
1298
3.78M
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1299
3.78M
                     cm->features.cur_frame_force_integer_mv);
1300
3.78M
  clamp_mv2(&out_mv->as_mv, xd);
1301
3.78M
  return av1_is_fullmv_in_range(&x->mv_limits,
1302
3.78M
                                get_fullmv_from_mv(&out_mv->as_mv));
1303
3.78M
}
1304
1305
// To use single newmv directly for compound modes, need to clamp the mv to the
1306
// valid mv range. Without this, encoder would generate out of range mv, and
1307
// this is seen in 8k encoding.
1308
static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1309
218k
                                     int ref_idx) {
1310
218k
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1311
218k
  SubpelMvLimits mv_limits;
1312
1313
218k
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1314
218k
  clamp_mv(&mv->as_mv, &mv_limits);
1315
218k
}
1316
1317
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1318
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1319
                            int *const rate_mv, HandleInterModeArgs *const args,
1320
1.72M
                            inter_mode_info *mode_info) {
1321
1.72M
  MACROBLOCKD *const xd = &x->e_mbd;
1322
1.72M
  MB_MODE_INFO *const mbmi = xd->mi[0];
1323
1.72M
  const int is_comp_pred = has_second_ref(mbmi);
1324
1.72M
  const PREDICTION_MODE this_mode = mbmi->mode;
1325
1.72M
  const int refs[2] = { mbmi->ref_frame[0],
1326
1.72M
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1327
1.72M
  const int ref_mv_idx = mbmi->ref_mv_idx;
1328
1329
1.72M
  if (is_comp_pred) {
1330
109k
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1331
109k
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1332
109k
    if (this_mode == NEW_NEWMV) {
1333
109k
      if (valid_mv0) {
1334
109k
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1335
109k
        clamp_mv_in_range(x, &cur_mv[0], 0);
1336
109k
      }
1337
109k
      if (valid_mv1) {
1338
109k
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1339
109k
        clamp_mv_in_range(x, &cur_mv[1], 1);
1340
109k
      }
1341
109k
      *rate_mv = 0;
1342
327k
      for (int i = 0; i < 2; ++i) {
1343
218k
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1344
218k
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1345
218k
                                    x->mv_costs->nmv_joint_cost,
1346
218k
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1347
218k
      }
1348
18.4E
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1349
0
      if (valid_mv1) {
1350
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1351
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1352
0
      }
1353
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1354
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1355
0
                                 x->mv_costs->nmv_joint_cost,
1356
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1357
18.4E
    } else {
1358
18.4E
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1359
18.4E
      if (valid_mv0) {
1360
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1361
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1362
0
      }
1363
18.4E
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1364
18.4E
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1365
18.4E
                                 x->mv_costs->nmv_joint_cost,
1366
18.4E
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1367
18.4E
    }
1368
1.61M
  } else {
1369
    // Single ref case.
1370
1.61M
    const int ref_idx = 0;
1371
1.61M
    int search_range = INT_MAX;
1372
1373
1.61M
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1374
362k
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1375
362k
      int min_mv_diff = INT_MAX;
1376
362k
      int best_match = -1;
1377
362k
      MV prev_ref_mv[2] = { { 0 } };
1378
842k
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1379
479k
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1380
479k
                                                     idx, &x->mbmi_ext)
1381
479k
                               .as_mv;
1382
479k
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1383
479k
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1384
1385
479k
        if (min_mv_diff > ref_mv_diff) {
1386
417k
          min_mv_diff = ref_mv_diff;
1387
417k
          best_match = idx;
1388
417k
        }
1389
479k
      }
1390
1391
362k
      if (min_mv_diff < (16 << 3)) {
1392
267k
        if (args->single_newmv_valid[best_match][refs[0]]) {
1393
246k
          search_range = min_mv_diff;
1394
246k
          search_range +=
1395
246k
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1396
246k
                         prev_ref_mv[best_match].row),
1397
246k
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1398
246k
                         prev_ref_mv[best_match].col));
1399
          // Get full pixel search range.
1400
246k
          search_range = (search_range + 4) >> 3;
1401
246k
        }
1402
267k
      }
1403
362k
    }
1404
1405
1.61M
    int_mv best_mv;
1406
1.61M
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1407
1.61M
                             mode_info, &best_mv, args);
1408
1.61M
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1409
1410
1.39M
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1411
1.39M
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1412
1.39M
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1413
1.39M
    cur_mv[0].as_int = best_mv.as_int;
1414
1415
    // Return after single_newmv is set.
1416
1.39M
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1417
1.39M
  }
1418
1419
1.50M
  return 0;
1420
1.72M
}
1421
1422
static inline void update_mode_start_end_index(
1423
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1424
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1425
5.20M
    int interintra_allowed, int eval_motion_mode) {
1426
5.20M
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1427
5.20M
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1428
5.20M
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1429
5.07M
    if (!eval_motion_mode) {
1430
3.42M
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1431
3.42M
    } else {
1432
      // Set the start index appropriately to process motion modes other than
1433
      // simple translation
1434
1.64M
      *mode_index_start = 1;
1435
1.64M
    }
1436
5.07M
  }
1437
5.20M
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1438
0
    *mode_index_end = SIMPLE_TRANSLATION;
1439
5.20M
}
1440
1441
// Increase rd cost of warp and obmc motion modes for low complexity decoding.
1442
static inline void increase_motion_mode_rd(const MB_MODE_INFO *const best_mbmi,
1443
                                           const MB_MODE_INFO *const this_mbmi,
1444
                                           int64_t *const best_scaled_rd,
1445
                                           int64_t *const this_scaled_rd,
1446
                                           int rd_warp_bias_scale_pct,
1447
244k
                                           float rd_obmc_bias_scale_pct) {
1448
244k
  if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return;
1449
1450
  // Experiments have been performed with increasing the RD cost of warp and
1451
  // obmc motion modes at the below locations of inter mode evaluation.
1452
  // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode().
1453
  // (2). Motion mode evaluation during handle_inter_mode() call.
1454
  // (3). Motion mode evaluation for winner motion modes.
1455
  // (4). Tx search for best inter candidates.
1456
  // Based on the speed quality trade-off results of this speed feature, the rd
1457
  // bias logic is enabled only at (2), (3) and (4).
1458
121k
  const double rd_warp_bias_scale = rd_warp_bias_scale_pct / 100.0;
1459
121k
  const double rd_obmc_bias_scale = rd_obmc_bias_scale_pct / 100.0;
1460
121k
  if (best_mbmi->motion_mode == WARPED_CAUSAL)
1461
1.57k
    *best_scaled_rd += (int64_t)(rd_warp_bias_scale * *best_scaled_rd);
1462
119k
  else if (best_mbmi->motion_mode == OBMC_CAUSAL)
1463
0
    *best_scaled_rd += (int64_t)(rd_obmc_bias_scale * *best_scaled_rd);
1464
1465
121k
  if (this_mbmi->motion_mode == WARPED_CAUSAL)
1466
103k
    *this_scaled_rd += (int64_t)(rd_warp_bias_scale * *this_scaled_rd);
1467
17.3k
  else if (this_mbmi->motion_mode == OBMC_CAUSAL)
1468
0
    *this_scaled_rd += (int64_t)(rd_obmc_bias_scale * *this_scaled_rd);
1469
121k
}
1470
1471
/*!\brief AV1 motion mode search
1472
 *
1473
 * \ingroup inter_mode_search
1474
 * Function to search over and determine the motion mode. It will update
1475
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1476
 * WARPED_CAUSAL and determine any necessary side information for the selected
1477
 * motion mode. It will also perform the full transform search, unless the
1478
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1479
 * than an RD corresponding to a full transform search. It will return the
1480
 * RD for the final motion_mode.
1481
 * Do the RD search for a given inter mode and compute all information relevant
1482
 * to the input mode. It will compute the best MV,
1483
 * compound parameters (if the mode is a compound mode) and interpolation filter
1484
 * parameters.
1485
 *
1486
 * \param[in]     cpi               Top-level encoder structure.
1487
 * \param[in]     tile_data         Pointer to struct holding adaptive
1488
 *                                  data/contexts/models for the tile during
1489
 *                                  encoding.
1490
 * \param[in]     x                 Pointer to struct holding all the data for
1491
 *                                  the current macroblock.
1492
 * \param[in]     bsize             Current block size.
1493
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1494
 *                                  information.
1495
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1496
 *                                  for only the Y plane.
1497
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1498
 *                                  for only the UV planes.
1499
 * \param[in]     args              HandleInterModeArgs struct holding
1500
 *                                  miscellaneous arguments for inter mode
1501
 *                                  search. See the documentation for this
1502
 *                                  struct for a description of each member.
1503
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1504
 *                                  It is used for early termination of this
1505
 *                                  search if the RD exceeds this value.
1506
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1507
 *                                  best total RD for a skip mode so far, and
1508
 *                                  skip_rd[1] is the best RD for a skip mode so
1509
 *                                  far in luma. This is used as a speed feature
1510
 *                                  to skip the transform search if the computed
1511
 *                                  skip RD for the current mode is not better
1512
 *                                  than the best skip_rd so far.
1513
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1514
 *                                  This will be modified if a motion search is
1515
 *                                  done in the motion mode search.
1516
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1517
 *                                  prediction. This will eventually hold the
1518
 *                                  final prediction, and the tmp_dst info will
1519
 *                                  be copied here.
1520
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1521
 *                                  do_tx_search (see below) is 0.
1522
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1523
 *                                  a full transform search. This will compute
1524
 *                                  an estimated RD for the modes without the
1525
 *                                  transform search and later perform the full
1526
 *                                  transform search on the best candidates.
1527
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1528
 *                                  information to perform a full transform
1529
 *                                  search only on winning candidates searched
1530
 *                                  with an estimate for transform coding RD.
1531
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1532
 *                                  motion modes other than SIMPLE_TRANSLATION.
1533
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1534
 *                                  the luma plane.
1535
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1536
 * current motion mode being tested should be skipped. It returns 0 if the
1537
 * motion mode search is a success.
1538
 */
1539
static int64_t motion_mode_rd(
1540
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1541
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1542
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1543
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1544
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1545
5.21M
    int eval_motion_mode, int64_t *yrd) {
1546
5.21M
  const AV1_COMMON *const cm = &cpi->common;
1547
5.21M
  const FeatureFlags *const features = &cm->features;
1548
5.21M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1549
5.21M
  const int num_planes = av1_num_planes(cm);
1550
5.21M
  MACROBLOCKD *xd = &x->e_mbd;
1551
5.21M
  MB_MODE_INFO *mbmi = xd->mi[0];
1552
5.21M
  const int is_comp_pred = has_second_ref(mbmi);
1553
5.21M
  const PREDICTION_MODE this_mode = mbmi->mode;
1554
5.21M
  const int rate2_nocoeff = rd_stats->rate;
1555
5.21M
  int best_xskip_txfm = 0;
1556
5.21M
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1557
5.21M
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1558
5.21M
  const int rate_mv0 = *rate_mv;
1559
5.21M
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1560
0
                                 is_interintra_allowed(mbmi) &&
1561
0
                                 mbmi->compound_idx;
1562
5.21M
  WARP_SAMPLE_INFO *const warp_sample_info =
1563
5.21M
      &x->warp_sample_info[mbmi->ref_frame[0]];
1564
5.21M
  int *pts0 = warp_sample_info->pts;
1565
5.21M
  int *pts_inref0 = warp_sample_info->pts_inref;
1566
1567
5.21M
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1568
5.21M
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1569
5.21M
  av1_invalid_rd_stats(&best_rd_stats);
1570
5.21M
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1571
5.21M
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1572
5.21M
  *yrd = INT64_MAX;
1573
5.21M
  if (features->switchable_motion_mode) {
1574
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1575
    // is allowed.
1576
5.20M
    last_motion_mode_allowed = motion_mode_allowed(
1577
5.20M
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1578
5.20M
  }
1579
1580
5.21M
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1581
    // Collect projection samples used in least squares approximation of
1582
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1583
1.85M
    if (warp_sample_info->num < 0) {
1584
414k
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1585
414k
    }
1586
1.85M
    mbmi->num_proj_ref = warp_sample_info->num;
1587
1.85M
  }
1588
5.21M
  const int total_samples = mbmi->num_proj_ref;
1589
5.21M
  if (total_samples == 0) {
1590
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1591
    // warped parameters.
1592
394k
    last_motion_mode_allowed = OBMC_CAUSAL;
1593
394k
  }
1594
1595
5.21M
  const MB_MODE_INFO base_mbmi = *mbmi;
1596
5.21M
  MB_MODE_INFO best_mbmi;
1597
5.21M
  const int interp_filter = features->interp_filter;
1598
5.21M
  const int switchable_rate =
1599
5.21M
      av1_is_interp_needed(xd)
1600
5.21M
          ? av1_get_switchable_rate(x, xd, interp_filter,
1601
5.12M
                                    cm->seq_params->enable_dual_filter)
1602
5.21M
          : 0;
1603
5.21M
  int64_t best_rd = INT64_MAX;
1604
5.21M
  int best_rate_mv = rate_mv0;
1605
5.21M
  const int mi_row = xd->mi_row;
1606
5.21M
  const int mi_col = xd->mi_col;
1607
5.21M
  int mode_index_start, mode_index_end;
1608
5.21M
  const int txfm_rd_gate_level =
1609
5.21M
      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1610
5.21M
                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1611
5.21M
                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
1612
1613
  // Modify the start and end index according to speed features. For example,
1614
  // if SIMPLE_TRANSLATION has already been searched according to
1615
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1616
  // to avoid searching it again.
1617
5.21M
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1618
5.21M
                              last_motion_mode_allowed, interintra_allowed,
1619
5.21M
                              eval_motion_mode);
1620
  // Main function loop. This loops over all of the possible motion modes and
1621
  // computes RD to determine the best one. This process includes computing
1622
  // any necessary side information for the motion mode and performing the
1623
  // transform search.
1624
9.80M
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1625
5.21M
       mode_index++) {
1626
4.58M
    if (args->skip_motion_mode && mode_index) continue;
1627
4.58M
    int tmp_rate2 = rate2_nocoeff;
1628
4.58M
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1629
4.58M
    int tmp_rate_mv = rate_mv0;
1630
1631
4.58M
    *mbmi = base_mbmi;
1632
4.58M
    if (is_interintra_mode) {
1633
      // Only use SIMPLE_TRANSLATION for interintra
1634
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1635
4.58M
    } else {
1636
4.58M
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1637
4.58M
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1638
4.58M
    }
1639
1640
4.58M
    if (cpi->oxcf.algo_cfg.sharpness == 3 &&
1641
0
        (mbmi->motion_mode == OBMC_CAUSAL ||
1642
0
         mbmi->motion_mode == WARPED_CAUSAL))
1643
0
      continue;
1644
1645
    // Do not search OBMC if the probability of selecting it is below a
1646
    // predetermined threshold for this update_type and block size.
1647
4.58M
    const FRAME_UPDATE_TYPE update_type =
1648
4.58M
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1649
4.58M
    int use_actual_frame_probs = 1;
1650
4.58M
    int prune_obmc;
1651
#if CONFIG_FPMT_TEST
1652
    use_actual_frame_probs =
1653
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1654
    if (!use_actual_frame_probs) {
1655
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1656
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1657
    }
1658
#endif
1659
4.58M
    if (use_actual_frame_probs) {
1660
4.58M
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1661
4.58M
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1662
4.58M
    }
1663
4.59M
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1664
4.59M
        mbmi->motion_mode == OBMC_CAUSAL)
1665
550k
      continue;
1666
1667
4.03M
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1668
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1669
      // The prediction is calculated before motion_mode_rd() is called in
1670
      // handle_inter_mode()
1671
3.56M
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1672
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1673
      // OBMC_CAUSAL not allowed for compound prediction
1674
0
      assert(!is_comp_pred);
1675
0
      if (have_newmv_in_inter_mode(this_mode)) {
1676
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1677
0
                                 &mbmi->mv[0], NULL);
1678
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1679
0
      }
1680
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1681
        // Build the predictor according to the current motion vector if it has
1682
        // not already been built
1683
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1684
0
                                      0, av1_num_planes(cm) - 1);
1685
0
      }
1686
      // Build the inter predictor by blending the predictor corresponding to
1687
      // this MV, and the neighboring blocks using the OBMC model
1688
0
      av1_build_obmc_inter_prediction(
1689
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1690
0
          args->left_pred_buf, args->left_pred_stride);
1691
0
#if !CONFIG_REALTIME_ONLY
1692
470k
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1693
470k
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1694
470k
      mbmi->motion_mode = WARPED_CAUSAL;
1695
470k
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1696
470k
      mbmi->interp_filters =
1697
470k
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1698
1699
470k
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1700
470k
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1701
      // Select the samples according to motion vector difference
1702
470k
      if (mbmi->num_proj_ref > 1) {
1703
221k
        mbmi->num_proj_ref = av1_selectSamples(
1704
221k
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1705
221k
      }
1706
1707
      // Compute the warped motion parameters with a least squares fit
1708
      //  using the collected samples
1709
470k
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1710
470k
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1711
470k
                               &mbmi->wm_params, mi_row, mi_col)) {
1712
385k
        assert(!is_comp_pred);
1713
385k
        if (have_newmv_in_inter_mode(this_mode)) {
1714
          // Refine MV for NEWMV mode
1715
81.4k
          const int_mv mv0 = mbmi->mv[0];
1716
81.4k
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1717
81.4k
          const int num_proj_ref0 = mbmi->num_proj_ref;
1718
1719
81.4k
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1720
81.4k
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1721
81.4k
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1722
81.4k
                                            &ref_mv.as_mv, NULL);
1723
1724
          // Refine MV in a small range.
1725
81.4k
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1726
81.4k
                               total_samples, cpi->sf.mv_sf.warp_search_method,
1727
81.4k
                               cpi->sf.mv_sf.warp_search_iters);
1728
1729
81.4k
          if (mv0.as_int != mbmi->mv[0].as_int) {
1730
            // Keep the refined MV and WM parameters.
1731
51.9k
            tmp_rate_mv = av1_mv_bit_cost(
1732
51.9k
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1733
51.9k
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1734
51.9k
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1735
51.9k
          } else {
1736
            // Restore the old MV and WM parameters.
1737
29.5k
            mbmi->mv[0] = mv0;
1738
29.5k
            mbmi->wm_params = wm_params0;
1739
29.5k
            mbmi->num_proj_ref = num_proj_ref0;
1740
29.5k
          }
1741
81.4k
        }
1742
1743
        // Build the warped predictor
1744
385k
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1745
385k
                                      av1_num_planes(cm) - 1);
1746
385k
      } else {
1747
84.9k
        continue;
1748
84.9k
      }
1749
470k
#endif  // !CONFIG_REALTIME_ONLY
1750
18.4E
    } else if (is_interintra_mode) {
1751
0
      const int ret =
1752
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1753
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1754
0
      if (ret < 0) continue;
1755
0
    }
1756
1757
    // If we are searching newmv and the mv is the same as refmv, skip the
1758
    // current mode
1759
3.95M
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1760
1761
    // Update rd_stats for the current motion mode
1762
3.80M
    txfm_info->skip_txfm = 0;
1763
3.80M
    rd_stats->dist = 0;
1764
3.80M
    rd_stats->sse = 0;
1765
3.80M
    rd_stats->skip_txfm = 1;
1766
3.80M
    rd_stats->rate = tmp_rate2;
1767
3.80M
    const ModeCosts *mode_costs = &x->mode_costs;
1768
3.80M
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1769
3.80M
    if (interintra_allowed) {
1770
0
      rd_stats->rate +=
1771
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1772
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1773
0
    }
1774
3.80M
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1775
1.62M
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1776
1.62M
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1777
1.34M
        rd_stats->rate +=
1778
1.34M
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1779
1.34M
      } else {
1780
283k
        rd_stats->rate +=
1781
283k
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1782
283k
      }
1783
1.62M
    }
1784
1785
3.80M
    int64_t this_yrd = INT64_MAX;
1786
1787
3.80M
    if (!do_tx_search) {
1788
      // Avoid doing a transform search here to speed up the overall mode
1789
      // search. It will be done later in the mode search if the current
1790
      // motion mode seems promising.
1791
144k
      int64_t curr_sse = -1;
1792
144k
      int64_t sse_y = -1;
1793
144k
      int est_residue_cost = 0;
1794
144k
      int64_t est_dist = 0;
1795
144k
      int64_t est_rd = 0;
1796
144k
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1797
88.7k
        curr_sse = get_sse(cpi, x, &sse_y);
1798
88.7k
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1799
88.7k
                                                 &est_residue_cost, &est_dist);
1800
88.7k
        (void)has_est_rd;
1801
88.7k
        assert(has_est_rd);
1802
88.7k
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1803
55.8k
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1804
55.8k
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1805
55.8k
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1806
55.8k
            NULL, &curr_sse, NULL, NULL, NULL);
1807
55.8k
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1808
55.8k
      }
1809
144k
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1810
144k
      if (est_rd * 0.80 > *best_est_rd) {
1811
2.36k
        mbmi->ref_frame[1] = ref_frame_1;
1812
2.36k
        continue;
1813
2.36k
      }
1814
142k
      const int mode_rate = rd_stats->rate;
1815
142k
      rd_stats->rate += est_residue_cost;
1816
142k
      rd_stats->dist = est_dist;
1817
142k
      rd_stats->rdcost = est_rd;
1818
142k
      if (rd_stats->rdcost < *best_est_rd) {
1819
88.0k
        *best_est_rd = rd_stats->rdcost;
1820
88.0k
        assert(sse_y >= 0);
1821
88.0k
        ref_skip_rd[1] = txfm_rd_gate_level
1822
88.0k
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1823
88.0k
                             : INT64_MAX;
1824
88.0k
      }
1825
142k
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1826
0
        if (!is_comp_pred) {
1827
0
          assert(curr_sse >= 0);
1828
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1829
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1830
0
                                rd_stats_uv, mbmi);
1831
0
        }
1832
142k
      } else {
1833
142k
        assert(curr_sse >= 0);
1834
142k
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1835
142k
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1836
142k
                              rd_stats_uv, mbmi);
1837
142k
      }
1838
142k
      mbmi->skip_txfm = 0;
1839
3.66M
    } else {
1840
      // Perform full transform search
1841
3.66M
      int64_t skip_rd = INT64_MAX;
1842
3.66M
      int64_t skip_rdy = INT64_MAX;
1843
3.66M
      if (txfm_rd_gate_level) {
1844
        // Check if the mode is good enough based on skip RD
1845
3.48M
        int64_t sse_y = INT64_MAX;
1846
3.48M
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1847
3.48M
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1848
3.48M
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1849
3.48M
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1850
3.48M
                                        txfm_rd_gate_level, 0);
1851
3.48M
        if (!eval_txfm) continue;
1852
3.48M
      }
1853
1854
      // Do transform search
1855
3.19M
      const int mode_rate = rd_stats->rate;
1856
3.19M
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1857
3.19M
                           rd_stats->rate, ref_best_rd)) {
1858
1.34M
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1859
737
          return INT64_MAX;
1860
737
        }
1861
1.34M
        continue;
1862
1.34M
      }
1863
1.85M
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1864
1.85M
      const int y_rate =
1865
1.85M
          rd_stats->skip_txfm
1866
1.85M
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1867
1.85M
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1868
1.85M
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1869
1870
1.85M
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1871
1.85M
      if (curr_rd < ref_best_rd) {
1872
1.85M
        ref_best_rd = curr_rd;
1873
1.85M
        ref_skip_rd[0] = skip_rd;
1874
1.85M
        ref_skip_rd[1] = skip_rdy;
1875
1.85M
      }
1876
1.85M
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1877
395k
        inter_mode_data_push(
1878
395k
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1879
395k
            rd_stats_y->rate + rd_stats_uv->rate +
1880
395k
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1881
395k
      }
1882
1.85M
    }
1883
1884
1.99M
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1885
4.71k
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1886
4.71k
        mbmi->interp_filters =
1887
4.71k
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1888
4.71k
      }
1889
4.71k
    }
1890
1891
1.99M
    if (this_yrd < INT64_MAX) {
1892
1.85M
      adjust_cost(cpi, x, &this_yrd, /*is_inter_pred=*/true);
1893
1.85M
    }
1894
1.99M
    adjust_rdcost(cpi, x, rd_stats, /*is_inter_pred=*/true);
1895
    // Bug 494653438: If do_tx_search is 0, rd_stats_y is uninitialized, so
1896
    // valgrind will warn if we use rd_stats_y->rdcost in a conditional.
1897
1.99M
    if (!do_tx_search || rd_stats_y->rdcost < INT64_MAX) {
1898
1.99M
      adjust_rdcost(cpi, x, rd_stats_y, /*is_inter_pred=*/true);
1899
1.99M
    }
1900
1901
1.99M
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1902
1.99M
    if (mode_index == 0) {
1903
1.89M
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1904
1.89M
    }
1905
1.99M
    int64_t best_scaled_rd = best_rd;
1906
1.99M
    int64_t this_scaled_rd = tmp_rd;
1907
1.99M
    if (mode_index != 0)
1908
104k
      increase_motion_mode_rd(&best_mbmi, mbmi, &best_scaled_rd,
1909
104k
                              &this_scaled_rd,
1910
104k
                              cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
1911
104k
                              cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
1912
1913
1.99M
    if (mode_index == 0 || this_scaled_rd < best_scaled_rd) {
1914
      // Update best_rd data if this is the best motion mode so far
1915
1.99M
      best_mbmi = *mbmi;
1916
1.99M
      best_rd = tmp_rd;
1917
1.99M
      best_rd_stats = *rd_stats;
1918
1.99M
      best_rd_stats_y = *rd_stats_y;
1919
1.99M
      best_rate_mv = tmp_rate_mv;
1920
1.99M
      *yrd = this_yrd;
1921
1.99M
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1922
1.99M
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1923
1.99M
      best_xskip_txfm = mbmi->skip_txfm;
1924
1.99M
    }
1925
1.99M
  }
1926
  // Update RD and mbmi stats for selected motion mode
1927
5.21M
  mbmi->ref_frame[1] = ref_frame_1;
1928
5.21M
  *rate_mv = best_rate_mv;
1929
5.21M
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1930
3.22M
    av1_invalid_rd_stats(rd_stats);
1931
3.22M
    restore_dst_buf(xd, *orig_dst, num_planes);
1932
3.22M
    return INT64_MAX;
1933
3.22M
  }
1934
1.98M
  *mbmi = best_mbmi;
1935
1.98M
  *rd_stats = best_rd_stats;
1936
1.98M
  *rd_stats_y = best_rd_stats_y;
1937
1.98M
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1938
1.98M
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1939
1.98M
  txfm_info->skip_txfm = best_xskip_txfm;
1940
1941
1.98M
  restore_dst_buf(xd, *orig_dst, num_planes);
1942
1.98M
  return 0;
1943
5.21M
}
1944
1945
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1946
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1947
104k
                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
1948
104k
  assert(bsize < BLOCK_SIZES_ALL);
1949
104k
  const AV1_COMMON *cm = &cpi->common;
1950
104k
  const int num_planes = av1_num_planes(cm);
1951
104k
  MACROBLOCKD *const xd = &x->e_mbd;
1952
104k
  const int mi_row = xd->mi_row;
1953
104k
  const int mi_col = xd->mi_col;
1954
104k
  int64_t total_sse = 0;
1955
104k
  int64_t this_rd = INT64_MAX;
1956
104k
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1957
104k
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1958
1959
129k
  for (int plane = 0; plane < num_planes; ++plane) {
1960
    // Call av1_enc_build_inter_predictor() for one plane at a time.
1961
122k
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1962
122k
                                  plane, plane);
1963
122k
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1964
122k
    const BLOCK_SIZE plane_bsize =
1965
122k
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1966
1967
122k
    av1_subtract_plane(x, plane_bsize, plane);
1968
1969
122k
    int64_t sse =
1970
122k
        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1971
122k
    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1972
122k
    sse <<= 4;
1973
122k
    total_sse += sse;
1974
    // When current rd cost is more than the best rd, skip evaluation of
1975
    // remaining planes.
1976
122k
    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1977
122k
    if (this_rd > best_rd) break;
1978
122k
  }
1979
1980
104k
  rd_stats->dist = rd_stats->sse = total_sse;
1981
104k
  rd_stats->rdcost = this_rd;
1982
1983
104k
  restore_dst_buf(xd, *orig_dst, num_planes);
1984
104k
  return 0;
1985
104k
}
1986
1987
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1988
// mode
1989
// Note(rachelbarker): This speed feature currently does not interact correctly
1990
// with global motion. The issue is that, when global motion is used, GLOBALMV
1991
// produces a different prediction to NEARESTMV/NEARMV even if the motion
1992
// vectors are the same. Thus GLOBALMV should not be pruned in this case.
1993
static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1994
                                      int ref_idx,
1995
                                      const MV_REFERENCE_FRAME *ref_frame,
1996
0
                                      PREDICTION_MODE single_mode) {
1997
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1998
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1999
0
  assert(single_mode != NEWMV);
2000
0
  if (single_mode == NEARESTMV) {
2001
0
    return 0;
2002
0
  } else if (single_mode == NEARMV) {
2003
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
2004
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
2005
0
    if (ref_mv_count < 2) return 1;
2006
0
  } else if (single_mode == GLOBALMV) {
2007
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
2008
0
    if (ref_mv_count == 0) return 1;
2009
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
2010
0
    else if (ref_mv_count == 1)
2011
0
      return 0;
2012
2013
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
2014
    // Check GLOBALMV is matching with any mv in ref_mv_stack
2015
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
2016
0
      int_mv this_mv;
2017
2018
0
      if (ref_idx == 0)
2019
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
2020
0
      else
2021
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
2022
2023
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
2024
0
        return 1;
2025
0
    }
2026
0
  }
2027
0
  return 0;
2028
0
}
2029
2030
static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
2031
                              int ref_idx, int ref_mv_idx,
2032
                              int skip_repeated_ref_mv,
2033
                              const MV_REFERENCE_FRAME *ref_frame,
2034
5.60M
                              const MB_MODE_INFO_EXT *mbmi_ext) {
2035
5.60M
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2036
5.60M
  assert(is_inter_singleref_mode(single_mode));
2037
5.60M
  if (single_mode == NEWMV) {
2038
1.82M
    this_mv->as_int = INVALID_MV;
2039
3.78M
  } else if (single_mode == GLOBALMV) {
2040
1.26M
    if (skip_repeated_ref_mv &&
2041
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2042
0
      return 0;
2043
1.26M
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2044
2.51M
  } else {
2045
2.51M
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
2046
2.51M
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2047
2.51M
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
2048
2.51M
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
2049
1.39M
      assert(ref_mv_offset >= 0);
2050
1.39M
      if (ref_idx == 0) {
2051
1.11M
        *this_mv =
2052
1.11M
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
2053
1.11M
      } else {
2054
276k
        *this_mv =
2055
276k
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
2056
276k
      }
2057
1.39M
    } else {
2058
1.12M
      if (skip_repeated_ref_mv &&
2059
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2060
0
        return 0;
2061
1.12M
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2062
1.12M
    }
2063
2.51M
  }
2064
5.60M
  return 1;
2065
5.60M
}
2066
2067
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
2068
// population
2069
static inline int skip_nearest_near_mv_using_refmv_weight(
2070
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
2071
4.31M
    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
2072
4.31M
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
2073
  // Do not skip the mode if the current block has not yet obtained a valid
2074
  // inter mode.
2075
1.93M
  if (!is_inter_mode(best_mode)) return 0;
2076
2077
1.07M
  const MACROBLOCKD *xd = &x->e_mbd;
2078
  // Do not skip the mode if both the top and left neighboring blocks are not
2079
  // available.
2080
1.07M
  if (!xd->left_available || !xd->up_available) return 0;
2081
540k
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2082
540k
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
2083
540k
  const int ref_mv_count =
2084
540k
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
2085
2086
540k
  if (ref_mv_count == 0) return 0;
2087
  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
2088
320k
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
2089
2090
  // Count number of ref mvs populated from nearest candidates
2091
292k
  int nearest_refmv_count = 0;
2092
831k
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
2093
538k
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
2094
538k
  }
2095
2096
  // nearest_refmv_count indicates the closeness of block motion characteristics
2097
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
2098
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
2099
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
2100
  // mode since these modes work well for blocks that shares similar motion
2101
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
2102
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
2103
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
2104
292k
  const int prune_thresh = 1 + (ref_mv_count >= 2);
2105
292k
  if (nearest_refmv_count < prune_thresh) return 1;
2106
111k
  return 0;
2107
292k
}
2108
2109
// This function update the non-new mv for the current prediction mode
2110
static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
2111
                               const AV1_COMMON *cm, const MACROBLOCK *x,
2112
5.16M
                               int skip_repeated_ref_mv) {
2113
5.16M
  const MACROBLOCKD *xd = &x->e_mbd;
2114
5.16M
  const MB_MODE_INFO *mbmi = xd->mi[0];
2115
5.16M
  const int is_comp_pred = has_second_ref(mbmi);
2116
2117
5.16M
  int ret = 1;
2118
10.7M
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2119
5.60M
    int_mv this_mv;
2120
5.60M
    this_mv.as_int = INVALID_MV;
2121
5.60M
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
2122
5.60M
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
2123
5.60M
    if (!ret) return 0;
2124
5.60M
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
2125
5.60M
    if (single_mode == NEWMV) {
2126
1.82M
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2127
1.82M
      cur_mv[i] =
2128
1.82M
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2129
1.72M
                         .this_mv
2130
1.82M
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2131
109k
                         .comp_mv;
2132
3.77M
    } else {
2133
3.77M
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
2134
3.77M
    }
2135
5.60M
  }
2136
5.16M
  return ret;
2137
5.16M
}
2138
2139
static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
2140
                               const MB_MODE_INFO_EXT *mbmi_ext,
2141
                               const int (*const drl_mode_cost0)[2],
2142
6.33M
                               int8_t ref_frame_type) {
2143
6.33M
  int cost = 0;
2144
6.33M
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
2145
6.11M
    for (int idx = 0; idx < 2; ++idx) {
2146
4.50M
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2147
2.19M
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2148
2.19M
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
2149
2.19M
        if (mbmi->ref_mv_idx == idx) return cost;
2150
2.19M
      }
2151
4.50M
    }
2152
1.60M
    return cost;
2153
2.64M
  }
2154
2155
3.68M
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
2156
2.79M
    for (int idx = 1; idx < 3; ++idx) {
2157
1.99M
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2158
708k
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2159
708k
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
2160
708k
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
2161
708k
      }
2162
1.99M
    }
2163
794k
    return cost;
2164
1.09M
  }
2165
2.59M
  return cost;
2166
3.68M
}
2167
2168
static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
2169
                                        const MB_MODE_INFO *const mbmi,
2170
231k
                                        PREDICTION_MODE this_mode) {
2171
493k
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
2172
367k
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2173
367k
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
2174
367k
    if (single_mode == NEWMV &&
2175
333k
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
2176
105k
      return 0;
2177
105k
    }
2178
367k
  }
2179
126k
  return 1;
2180
231k
}
2181
2182
static int get_drl_refmv_count(const MACROBLOCK *const x,
2183
                               const MV_REFERENCE_FRAME *ref_frame,
2184
4.78M
                               PREDICTION_MODE mode) {
2185
4.78M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2186
4.78M
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2187
4.78M
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
2188
4.78M
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
2189
4.78M
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
2190
4.78M
  const int has_drl =
2191
4.78M
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
2192
4.78M
  const int ref_set =
2193
4.78M
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
2194
2195
4.78M
  return ref_set;
2196
4.78M
}
2197
2198
// Checks if particular ref_mv_idx should be pruned.
2199
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
2200
                                         const int qindex,
2201
34.9k
                                         const int ref_mv_idx) {
2202
34.9k
  if (reduce_inter_modes >= 3) return 1;
2203
  // Q-index logic based pruning is enabled only for
2204
  // reduce_inter_modes = 2.
2205
34.9k
  assert(reduce_inter_modes == 2);
2206
  // When reduce_inter_modes=2, pruning happens as below based on q index.
2207
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
2208
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
2209
  // For q index range between 171 and 255: no pruning.
2210
6
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
2211
6
  return (ref_mv_idx >= min_prune_ref_mv_idx);
2212
34.9k
}
2213
2214
// Whether this reference motion vector can be skipped, based on initial
2215
// heuristics.
2216
static bool ref_mv_idx_early_breakout(
2217
    const SPEED_FEATURES *const sf,
2218
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
2219
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
2220
1.15M
    int ref_mv_idx) {
2221
1.15M
  MACROBLOCKD *xd = &x->e_mbd;
2222
1.15M
  MB_MODE_INFO *mbmi = xd->mi[0];
2223
1.15M
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2224
1.15M
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2225
1.15M
  const int is_comp_pred = has_second_ref(mbmi);
2226
1.15M
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
2227
672k
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
2228
663k
        mbmi->ref_frame[0] == LAST3_FRAME ||
2229
663k
        mbmi->ref_frame[1] == LAST2_FRAME ||
2230
663k
        mbmi->ref_frame[1] == LAST3_FRAME) {
2231
9.39k
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2232
9.39k
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2233
9.39k
          REF_CAT_LEVEL) {
2234
9.32k
        return true;
2235
9.32k
      }
2236
9.39k
    }
2237
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
2238
663k
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
2239
512k
        have_newmv_in_inter_mode(mbmi->mode)) {
2240
383k
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
2241
89.4k
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
2242
34.9k
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2243
34.9k
        const int do_prune = prune_ref_mv_idx_using_qindex(
2244
34.9k
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
2245
34.9k
        if (do_prune &&
2246
34.9k
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2247
34.9k
             REF_CAT_LEVEL)) {
2248
33.3k
          return true;
2249
33.3k
        }
2250
34.9k
      }
2251
383k
    }
2252
663k
  }
2253
2254
1.11M
  mbmi->ref_mv_idx = ref_mv_idx;
2255
1.11M
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
2256
105k
    return true;
2257
105k
  }
2258
1.01M
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
2259
1.01M
  const int drl_cost = get_drl_cost(
2260
1.01M
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
2261
1.01M
  est_rd_rate += drl_cost;
2262
1.01M
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
2263
78
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2264
78
    return true;
2265
78
  }
2266
1.01M
  return false;
2267
1.01M
}
2268
2269
// Compute the estimated RD cost for the motion vector with simple translation.
2270
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
2271
                                          HandleInterModeArgs *args,
2272
                                          int ref_mv_idx, int64_t ref_best_rd,
2273
7.44k
                                          BLOCK_SIZE bsize) {
2274
7.44k
  MACROBLOCKD *xd = &x->e_mbd;
2275
7.44k
  MB_MODE_INFO *mbmi = xd->mi[0];
2276
7.44k
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2277
7.44k
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2278
7.44k
  const AV1_COMMON *cm = &cpi->common;
2279
7.44k
  const int is_comp_pred = has_second_ref(mbmi);
2280
7.44k
  const ModeCosts *mode_costs = &x->mode_costs;
2281
2282
7.44k
  struct macroblockd_plane *p = xd->plane;
2283
7.44k
  const BUFFER_SET orig_dst = {
2284
7.44k
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2285
7.44k
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2286
7.44k
  };
2287
7.44k
  RD_STATS rd_stats;
2288
7.44k
  av1_init_rd_stats(&rd_stats);
2289
2290
7.44k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2291
7.44k
  mbmi->comp_group_idx = 0;
2292
7.44k
  mbmi->compound_idx = 1;
2293
7.44k
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
2294
0
    mbmi->ref_frame[1] = NONE_FRAME;
2295
0
  }
2296
7.44k
  int16_t mode_ctx =
2297
7.44k
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2298
2299
7.44k
  mbmi->num_proj_ref = 0;
2300
7.44k
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2301
7.44k
  mbmi->ref_mv_idx = ref_mv_idx;
2302
2303
7.44k
  rd_stats.rate += args->ref_frame_cost + args->single_comp_cost;
2304
7.44k
  const int drl_cost =
2305
7.44k
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2306
7.44k
  rd_stats.rate += drl_cost;
2307
2308
7.44k
  int_mv cur_mv[2];
2309
7.44k
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2310
96
    return INT64_MAX;
2311
96
  }
2312
7.44k
  assert(have_nearmv_in_inter_mode(mbmi->mode));
2313
15.3k
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2314
7.96k
    mbmi->mv[i].as_int = cur_mv[i].as_int;
2315
7.96k
  }
2316
7.34k
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2317
7.34k
  rd_stats.rate += ref_mv_cost;
2318
2319
7.34k
  if (RDCOST(x->rdmult, rd_stats.rate, 0) > ref_best_rd) {
2320
0
    return INT64_MAX;
2321
0
  }
2322
2323
7.34k
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2324
7.34k
  mbmi->num_proj_ref = 0;
2325
7.34k
  if (is_comp_pred) {
2326
    // Only compound_average
2327
622
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2328
622
    mbmi->comp_group_idx = 0;
2329
622
    mbmi->compound_idx = 1;
2330
622
  }
2331
7.34k
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2332
2333
7.34k
  const int mi_row = xd->mi_row;
2334
7.34k
  const int mi_col = xd->mi_col;
2335
7.34k
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2336
7.34k
                                AOM_PLANE_Y, AOM_PLANE_Y);
2337
7.34k
  int est_rate;
2338
7.34k
  int64_t est_dist;
2339
7.34k
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2340
7.34k
                                  NULL, NULL, NULL, NULL, NULL);
2341
7.34k
  return RDCOST(x->rdmult, rd_stats.rate + est_rate, est_dist);
2342
7.34k
}
2343
2344
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2345
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2346
// it is included.
2347
1.01M
static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2348
2349
5.33M
static inline bool mask_check_bit(int mask, int index) {
2350
5.33M
  return (mask >> index) & 0x1;
2351
5.33M
}
2352
2353
// Before performing the full MV search in handle_inter_mode, do a simple
2354
// translation search and see if we can eliminate any motion vectors.
2355
// Returns an integer where, if the i-th bit is set, it means that the i-th
2356
// motion vector should be searched. This is only set for NEAR_MV.
2357
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2358
                                HandleInterModeArgs *const args,
2359
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2360
4.64M
                                const int ref_set) {
2361
  // If the number of ref mv count is equal to 1, do not prune the same. It
2362
  // is better to evaluate the same than to prune it.
2363
4.64M
  if (ref_set == 1) return 1;
2364
485k
  AV1_COMMON *const cm = &cpi->common;
2365
485k
  const MACROBLOCKD *const xd = &x->e_mbd;
2366
485k
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2367
485k
  const PREDICTION_MODE this_mode = mbmi->mode;
2368
2369
  // Only search indices if they have some chance of being good.
2370
485k
  int good_indices = 0;
2371
1.64M
  for (int i = 0; i < ref_set; ++i) {
2372
1.15M
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2373
1.15M
                                  ref_best_rd, i)) {
2374
148k
      continue;
2375
148k
    }
2376
1.01M
    mask_set_bit(&good_indices, i);
2377
1.01M
  }
2378
2379
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2380
  // is large enough. If these conditions are not met, return all good indices
2381
  // found so far.
2382
485k
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2383
0
    return good_indices;
2384
485k
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2385
92.4k
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2386
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2387
  // so b/2384 can be resolved.
2388
1.43k
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2389
2.94k
      (mbmi->ref_frame[1] > 0 &&
2390
248
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2391
0
    return good_indices;
2392
0
  }
2393
2394
  // Calculate the RD cost for the motion vectors using simple translation.
2395
1.43k
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2396
8.94k
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2397
    // If this index is bad, ignore it.
2398
7.50k
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2399
68
      continue;
2400
68
    }
2401
7.44k
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2402
7.44k
        cpi, x, args, ref_mv_idx, ref_best_rd, bsize);
2403
7.44k
  }
2404
  // Find the index with the best RD cost.
2405
1.43k
  int best_idx = 0;
2406
7.32k
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2407
5.89k
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2408
1.77k
      best_idx = i;
2409
1.77k
    }
2410
5.89k
  }
2411
  // Only include indices that are good and within a % of the best.
2412
1.43k
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2413
  // If the simple translation cost is not within this multiple of the
2414
  // best RD, skip it. Note that the cutoff is derived experimentally.
2415
1.43k
  const double ref_dth = 5;
2416
1.43k
  int result = 0;
2417
8.94k
  for (int i = 0; i < ref_set; ++i) {
2418
7.50k
    if (mask_check_bit(good_indices, i) &&
2419
7.44k
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2420
3.37k
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2421
3.32k
      mask_set_bit(&result, i);
2422
3.32k
    }
2423
7.50k
  }
2424
1.43k
  return result;
2425
1.43k
}
2426
2427
/*!\brief Motion mode information for inter mode search speedup.
2428
 *
2429
 * Used in a speed feature to search motion modes other than
2430
 * SIMPLE_TRANSLATION only on winning candidates.
2431
 */
2432
typedef struct motion_mode_candidate {
2433
  /*!
2434
   * Mode info for the motion mode candidate.
2435
   */
2436
  MB_MODE_INFO mbmi;
2437
  /*!
2438
   * Rate describing the cost of the motion vectors for this candidate.
2439
   */
2440
  int rate_mv;
2441
  /*!
2442
   * Rate before motion mode search and transform coding is applied.
2443
   */
2444
  int rate2_nocoeff;
2445
  /*!
2446
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2447
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2448
   * candidate.
2449
   */
2450
  int skip_motion_mode;
2451
  /*!
2452
   * Total RD cost for this candidate.
2453
   */
2454
  int64_t rd_cost;
2455
} motion_mode_candidate;
2456
2457
/*!\cond */
2458
typedef struct motion_mode_best_st_candidate {
2459
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2460
  int num_motion_mode_cand;
2461
} motion_mode_best_st_candidate;
2462
2463
// Checks if the current reference frame matches with neighbouring block's
2464
// (top/left) reference frames
2465
static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2466
1.16M
                                               MB_MODE_INFO *nb_mbmi) {
2467
1.16M
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2468
1.16M
                                          nb_mbmi->ref_frame[1] };
2469
1.16M
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2470
1.16M
                                           cur_mbmi->ref_frame[1] };
2471
1.16M
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2472
1.16M
  int match_found = 0;
2473
2474
2.53M
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2475
1.36M
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2476
477k
        (cur_ref_frames[i] == nb_ref_frames[1]))
2477
1.04M
      match_found = 1;
2478
1.36M
  }
2479
1.16M
  return match_found;
2480
1.16M
}
2481
2482
static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2483
2.12M
                                              MACROBLOCKD *xd) {
2484
2.12M
  if (!xd->up_available) return 1;
2485
1.59M
  const int mi_col = xd->mi_col;
2486
1.59M
  MB_MODE_INFO **cur_mbmi = xd->mi;
2487
  // prev_row_mi points into the mi array, starting at the beginning of the
2488
  // previous row.
2489
1.59M
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2490
1.59M
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2491
1.59M
  uint8_t mi_step;
2492
2.72M
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2493
1.61M
       above_mi_col += mi_step) {
2494
1.61M
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2495
1.61M
    mi_step = mi_size_wide[above_mi[0]->bsize];
2496
1.61M
    int match_found = 0;
2497
1.61M
    if (is_inter_block(*above_mi))
2498
585k
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2499
1.61M
    if (match_found) return 1;
2500
1.61M
  }
2501
1.11M
  return 0;
2502
1.59M
}
2503
2504
static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2505
2.12M
                                             MACROBLOCKD *xd) {
2506
2.12M
  if (!xd->left_available) return 1;
2507
1.57M
  const int mi_row = xd->mi_row;
2508
1.57M
  MB_MODE_INFO **cur_mbmi = xd->mi;
2509
  // prev_col_mi points into the mi array, starting at the top of the
2510
  // previous column
2511
1.57M
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2512
1.57M
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2513
1.57M
  uint8_t mi_step;
2514
2.69M
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2515
1.59M
       left_mi_row += mi_step) {
2516
1.59M
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2517
1.59M
    mi_step = mi_size_high[left_mi[0]->bsize];
2518
1.59M
    int match_found = 0;
2519
1.59M
    if (is_inter_block(*left_mi))
2520
584k
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2521
1.59M
    if (match_found) return 1;
2522
1.59M
  }
2523
1.09M
  return 0;
2524
1.57M
}
2525
/*!\endcond */
2526
2527
/*! \brief Struct used to hold TPL data to
2528
 * narrow down parts of the inter mode search.
2529
 */
2530
typedef struct {
2531
  /*!
2532
   * The best inter cost out of all of the reference frames.
2533
   */
2534
  int64_t best_inter_cost;
2535
  /*!
2536
   * The inter cost for each reference frame.
2537
   */
2538
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2539
} PruneInfoFromTpl;
2540
2541
#if !CONFIG_REALTIME_ONLY
2542
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2543
static inline void get_block_level_tpl_stats(
2544
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2545
896k
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2546
896k
  AV1_COMMON *const cm = &cpi->common;
2547
2548
896k
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2549
896k
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2550
896k
  const int tpl_idx = cpi->gf_frame_index;
2551
896k
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2552
896k
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2553
354k
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2554
354k
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2555
354k
  const int mi_wide = mi_size_wide[bsize];
2556
354k
  const int mi_high = mi_size_high[bsize];
2557
354k
  const int tpl_stride = tpl_frame->stride;
2558
354k
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2559
354k
  const int mi_col_sr =
2560
354k
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2561
354k
  const int mi_col_end_sr =
2562
354k
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2563
354k
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2564
2565
354k
  const int row_step = step;
2566
354k
  const int col_step_sr =
2567
354k
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2568
716k
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2569
361k
       row += row_step) {
2570
741k
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2571
380k
         col += col_step_sr) {
2572
380k
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2573
380k
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2574
2575
      // Sums up the inter cost of corresponding ref frames
2576
3.03M
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2577
2.65M
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2578
2.65M
            this_stats->pred_error[ref_idx];
2579
2.65M
      }
2580
380k
    }
2581
361k
  }
2582
2583
  // Computes the best inter cost (minimum inter_cost)
2584
354k
  int64_t best_inter_cost = INT64_MAX;
2585
2.83M
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2586
2.47M
    const int64_t cur_inter_cost =
2587
2.47M
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2588
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2589
    // calculating the minimum inter_cost
2590
2.47M
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2591
435k
        valid_refs[ref_idx])
2592
435k
      best_inter_cost = cur_inter_cost;
2593
2.47M
  }
2594
354k
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2595
354k
}
2596
#endif
2597
2598
static inline int prune_modes_based_on_tpl_stats(
2599
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2600
677k
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2601
677k
  const int is_ref_last2 = refs[0] == LAST2_FRAME || refs[1] == LAST2_FRAME;
2602
677k
  if (prune_mode_level == 1 && !is_ref_last2) return 0;
2603
2604
644k
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2605
644k
  if ((prune_mode_level == 2) && have_newmv) return 0;
2606
2607
644k
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2608
644k
  if (best_inter_cost == INT64_MAX) return 0;
2609
2610
644k
  int64_t cur_inter_cost;
2611
2612
644k
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2613
644k
  if (!is_comp_pred) {
2614
523k
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2615
523k
  } else {
2616
121k
    const int64_t inter_cost_ref0 =
2617
121k
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2618
121k
    const int64_t inter_cost_ref1 =
2619
121k
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2620
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2621
    // more aggressive pruning
2622
121k
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2623
121k
  }
2624
2625
644k
  if (is_ref_last2) return (cur_inter_cost > best_inter_cost);
2626
2627
644k
  const int is_globalmv =
2628
644k
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2629
644k
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2630
644k
  const int prune_level = prune_mode_level - 2;
2631
2632
  // Thresholds used for pruning:
2633
  // Lower value indicates aggressive pruning and higher value indicates
2634
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2635
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2.
2636
  // prune_index 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2637
644k
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2638
644k
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2639
644k
  };
2640
2641
  // Prune the mode if cur_inter_cost is greater than threshold times
2642
  // best_inter_cost
2643
644k
  if (cur_inter_cost >
2644
644k
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2645
644k
        best_inter_cost) >>
2646
644k
       2))
2647
117k
    return 1;
2648
527k
  return 0;
2649
644k
}
2650
2651
/*!\brief High level function to select parameters for compound mode.
2652
 *
2653
 * \ingroup inter_mode_search
2654
 * The main search functionality is done in the call to av1_compound_type_rd().
2655
 *
2656
 * \param[in]     cpi               Top-level encoder structure.
2657
 * \param[in]     x                 Pointer to struct holding all the data for
2658
 *                                  the current macroblock.
2659
 * \param[in]     args              HandleInterModeArgs struct holding
2660
 *                                  miscellaneous arguments for inter mode
2661
 *                                  search. See the documentation for this
2662
 *                                  struct for a description of each member.
2663
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2664
 *                                  It is used for early termination of this
2665
 *                                  search if the RD exceeds this value.
2666
 * \param[in,out] cur_mv            Current motion vector.
2667
 * \param[in]     bsize             Current block size.
2668
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2669
                                    compound mode.
2670
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2671
 *                                  allocated buffers for the compound
2672
 *                                  predictors and masks in the compound type
2673
 *                                  search.
2674
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2675
 *                                  prediction. This will eventually hold the
2676
 *                                  final prediction, and the tmp_dst info will
2677
 *                                  be copied here.
2678
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2679
 *                                  computed prediction.
2680
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2681
 *                                  This will be modified if a motion search is
2682
 *                                  done in the motion mode search.
2683
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2684
 *                                  information.
2685
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2686
 *                                  best total RD for a skip mode so far, and
2687
 *                                  skip_rd[1] is the best RD for a skip mode so
2688
 *                                  far in luma. This is used as a speed feature
2689
 *                                  to skip the transform search if the computed
2690
 *                                  skip RD for the current mode is not better
2691
 *                                  than the best skip_rd so far.
2692
 * \param[out] skip_build_pred      Indicates whether or not to build the inter
2693
 *                                  predictor during/after interpolation
2694
 *                                  filter search.
2695
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2696
 * a viable candidate.
2697
 */
2698
static int process_compound_inter_mode(
2699
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2700
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2701
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2702
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2703
281k
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2704
281k
  MACROBLOCKD *xd = &x->e_mbd;
2705
281k
  MB_MODE_INFO *mbmi = xd->mi[0];
2706
281k
  const AV1_COMMON *cm = &cpi->common;
2707
281k
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2708
280k
                                   cm->seq_params->enable_masked_compound;
2709
281k
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2710
281k
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2711
2712
281k
  const int num_planes = av1_num_planes(cm);
2713
281k
  const int mi_row = xd->mi_row;
2714
281k
  const int mi_col = xd->mi_col;
2715
281k
  int is_luma_interp_done = 0;
2716
281k
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2717
2718
281k
  int64_t best_rd_compound;
2719
281k
  int64_t rd_thresh;
2720
281k
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2721
281k
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2722
281k
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2723
281k
                                         comp_type_rd_scale);
2724
  // Select compound type and any parameters related to that type
2725
  // (for example, the mask parameters if it is a masked mode) and compute
2726
  // the RD
2727
281k
  *compmode_interinter_cost = av1_compound_type_rd(
2728
281k
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2729
281k
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2730
281k
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2731
281k
  if (ref_best_rd < INT64_MAX &&
2732
281k
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2733
281k
          ref_best_rd) {
2734
121k
    restore_dst_buf(xd, *orig_dst, num_planes);
2735
121k
    return 1;
2736
121k
  }
2737
2738
  // Build only uv predictor for COMPOUND_AVERAGE.
2739
  // Note there is no need to call av1_enc_build_inter_predictor
2740
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2741
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2742
  // rather than the tmp_buf.
2743
160k
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2744
122k
    if (num_planes > 1) {
2745
77.7k
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2746
77.7k
                                    AOM_PLANE_U, num_planes - 1);
2747
77.7k
    }
2748
122k
    *skip_build_pred = INTERP_SKIP_LUMA_SKIP_CHROMA;
2749
122k
  }
2750
160k
  return 0;
2751
281k
}
2752
2753
// Speed feature to prune out MVs that are similar to previous MVs if they
2754
// don't achieve the best RD advantage.
2755
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2756
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2757
341k
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2758
341k
  int i;
2759
341k
  const int is_comp_pred = has_second_ref(mbmi);
2760
341k
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2761
2762
  // Skip the evaluation if an MV match is found.
2763
341k
  if (ref_mv_idx > 0) {
2764
38.0k
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2765
21.4k
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2766
2767
20.6k
      int mv_diff = 0;
2768
62.0k
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2769
41.3k
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2770
41.3k
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2771
41.3k
      }
2772
2773
      // If this mode is not the best one, and current MV is similar to
2774
      // previous stored MV, terminate this ref_mv_idx evaluation.
2775
20.6k
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2776
20.6k
    }
2777
17.3k
  }
2778
2779
340k
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2780
1.00M
    for (i = 0; i < is_comp_pred + 1; ++i)
2781
672k
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2782
336k
  }
2783
2784
340k
  return 0;
2785
341k
}
2786
2787
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2788
 *
2789
 * \ingroup inter_mode_search
2790
 *
2791
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2792
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2793
 * Else returns 0.
2794
 *
2795
 * Note that the sse of here comes from single_motion_search. So it is
2796
 * interpolated with the filter in motion search, not the actual interpolation
2797
 * filter used in encoding.
2798
 *
2799
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2800
 * \param[in]     x                 Pointer to struct holding all the data for
2801
 *                                  the current macroblock.
2802
 * \param[in]     bsize             The current block_size.
2803
 * \param[in]     args              The args to handle_inter_mode, used to track
2804
 *                                  the best SSE.
2805
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2806
 *                                       prune_zero_mv_with_sse value
2807
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2808
 */
2809
static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2810
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
2811
                                         const HandleInterModeArgs *args,
2812
1.20M
                                         int prune_zero_mv_with_sse) {
2813
1.20M
  const MACROBLOCKD *xd = &x->e_mbd;
2814
1.20M
  const MB_MODE_INFO *mbmi = xd->mi[0];
2815
2816
1.20M
  const int is_comp_pred = has_second_ref(mbmi);
2817
1.20M
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2818
2819
2.47M
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2820
1.26M
    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2821
      // Pruning logic only works for IDENTITY type models
2822
      // Note: In theory we could apply similar logic for TRANSLATION
2823
      // type models, but we do not code these due to a spec bug
2824
      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2825
0
      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2826
0
      return 0;
2827
0
    }
2828
2829
    // Don't prune if we have invalid data
2830
1.26M
    assert(mbmi->mv[idx].as_int == 0);
2831
1.26M
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2832
260
      return 0;
2833
260
    }
2834
1.26M
  }
2835
2836
  // Sum up the sse of ZEROMV and best NEWMV
2837
1.20M
  unsigned int this_sse_sum = 0;
2838
1.20M
  unsigned int best_sse_sum = 0;
2839
2.47M
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2840
1.26M
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2841
1.26M
    const struct macroblockd_plane *pd = xd->plane;
2842
1.26M
    const struct buf_2d *src_buf = &p->src;
2843
1.26M
    const struct buf_2d *ref_buf = &pd->pre[idx];
2844
1.26M
    const uint8_t *src = src_buf->buf;
2845
1.26M
    const uint8_t *ref = ref_buf->buf;
2846
1.26M
    const int src_stride = src_buf->stride;
2847
1.26M
    const int ref_stride = ref_buf->stride;
2848
2849
1.26M
    unsigned int this_sse;
2850
1.26M
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2851
1.26M
    this_sse_sum += this_sse;
2852
2853
1.26M
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2854
1.26M
    best_sse_sum += best_sse;
2855
1.26M
  }
2856
2857
1.20M
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2858
1.20M
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2859
1.12M
    return 1;
2860
1.12M
  }
2861
2862
79.4k
  return 0;
2863
1.20M
}
2864
2865
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2866
 *
2867
 * \ingroup inter_mode_search
2868
 *
2869
 * Does a simple interpolation filter search during winner mode evaluation. This
2870
 * is currently only used by realtime mode as \ref
2871
 * av1_interpolation_filter_search is not called during realtime encoding.
2872
 *
2873
 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2874
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2875
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2876
 *  *
2877
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2878
 *                                  flags.
2879
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2880
 *                                  used to access the buffers.
2881
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2882
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2883
 * \param[in]     bsize             The current block_size.
2884
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2885
 */
2886
static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2887
                                      int mi_row, int mi_col,
2888
0
                                      BLOCK_SIZE bsize) {
2889
0
  static const InterpFilters filters_ref_set[3] = {
2890
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2891
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2892
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2893
0
  };
2894
2895
0
  const AV1_COMMON *const cm = &cpi->common;
2896
0
  MACROBLOCKD *const xd = &x->e_mbd;
2897
0
  MB_MODE_INFO *const mi = xd->mi[0];
2898
0
  int64_t best_cost = INT64_MAX;
2899
0
  int best_filter_index = -1;
2900
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2901
0
  const int num_planes = av1_num_planes(cm);
2902
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2903
0
  assert(is_inter_mode(mi->mode));
2904
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2905
0
  assert(!is_inter_compound_mode(mi->mode));
2906
2907
0
  if (!av1_is_interp_needed(xd)) {
2908
0
    return false;
2909
0
  }
2910
2911
0
  struct macroblockd_plane *pd = xd->plane;
2912
0
  const BUFFER_SET orig_dst = {
2913
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2914
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2915
0
  };
2916
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2917
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2918
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2919
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2920
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2921
2922
0
  for (int i = 0; i < 3; ++i) {
2923
0
    if (is_240p_or_lesser) {
2924
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2925
0
        continue;
2926
0
      }
2927
0
    } else {
2928
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2929
0
        continue;
2930
0
      }
2931
0
    }
2932
0
    int64_t cost;
2933
0
    RD_STATS tmp_rd = { 0 };
2934
2935
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2936
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2937
2938
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2939
0
                       ? MODELRD_LEGACY
2940
0
                       : MODELRD_TYPE_INTERP_FILTER](
2941
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2942
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2943
2944
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2945
0
                                           cm->seq_params->enable_dual_filter);
2946
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2947
0
    if (cost < best_cost) {
2948
0
      best_filter_index = i;
2949
0
      best_cost = cost;
2950
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2951
0
    }
2952
0
  }
2953
0
  assert(best_filter_index >= 0);
2954
2955
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2956
2957
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2958
2959
0
  if (is_best_pred_in_orig) {
2960
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2961
0
  } else {
2962
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2963
    // is_best_pred_in_orig is false, that means the current buffer is the
2964
    // original one.
2965
0
    assert(&orig_dst == dst_bufs[0]);
2966
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2967
0
    const int width = block_size_wide[bsize];
2968
0
    const int height = block_size_high[bsize];
2969
0
#if CONFIG_AV1_HIGHBITDEPTH
2970
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2971
0
    if (is_hbd) {
2972
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2973
0
                               tmp_dst.stride[AOM_PLANE_Y],
2974
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2975
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2976
0
    } else {
2977
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2978
0
                        orig_dst.plane[AOM_PLANE_Y],
2979
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2980
0
    }
2981
#else
2982
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2983
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2984
                      width, height);
2985
#endif
2986
0
  }
2987
2988
  // Build the YUV predictor.
2989
0
  if (num_planes > 1) {
2990
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2991
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2992
0
  }
2993
2994
0
  return true;
2995
0
}
2996
2997
/*!\brief AV1 inter mode RD computation
2998
 *
2999
 * \ingroup inter_mode_search
3000
 * Do the RD search for a given inter mode and compute all information relevant
3001
 * to the input mode. It will compute the best MV,
3002
 * compound parameters (if the mode is a compound mode) and interpolation filter
3003
 * parameters.
3004
 *
3005
 * \param[in]     cpi               Top-level encoder structure.
3006
 * \param[in]     tile_data         Pointer to struct holding adaptive
3007
 *                                  data/contexts/models for the tile during
3008
 *                                  encoding.
3009
 * \param[in]     x                 Pointer to structure holding all the data
3010
 *                                  for the current macroblock.
3011
 * \param[in]     bsize             Current block size.
3012
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
3013
 *                                  information.
3014
 * \param[out]    rd_stats_y        Struct to keep track of the RD information
3015
 *                                  for only the Y plane.
3016
 * \param[out]    rd_stats_uv       Struct to keep track of the RD information
3017
 *                                  for only the UV planes.
3018
 * \param[in]     args              HandleInterModeArgs struct holding
3019
 *                                  miscellaneous arguments for inter mode
3020
 *                                  search. See the documentation for this
3021
 *                                  struct for a description of each member.
3022
 * \param[in]     ref_best_rd       Best RD found so far for this block.
3023
 *                                  It is used for early termination of this
3024
 *                                  search if the RD exceeds this value.
3025
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
3026
 *                                  built in this search.
3027
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
3028
 *                                  allocated buffers for the compound
3029
 *                                  predictors and masks in the compound type
3030
 *                                  search.
3031
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
3032
 *                                  do_tx_search (see below) is 0.
3033
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
3034
 *                                  a full transform search. This will compute
3035
 *                                  an estimated RD for the modes without the
3036
 *                                  transform search and later perform the full
3037
 *                                  transform search on the best candidates.
3038
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
3039
 *                                  information to perform a full transform
3040
 *                                  search only on winning candidates searched
3041
 *                                  with an estimate for transform coding RD.
3042
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
3043
 *                                  motion mode information used in a speed
3044
 *                                  feature to search motion modes other than
3045
 *                                  SIMPLE_TRANSLATION only on winning
3046
 *                                  candidates.
3047
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
3048
 *                                  best total RD for a skip mode so far, and
3049
 *                                  skip_rd[1] is the best RD for a skip mode so
3050
 *                                  far in luma. This is used as a speed feature
3051
 *                                  to skip the transform search if the computed
3052
 *                                  skip RD for the current mode is not better
3053
 *                                  than the best skip_rd so far.
3054
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
3055
 *                                         narrow down the search based on data
3056
 *                                         collected in the TPL model.
3057
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
3058
 *                                  the luma plane.
3059
 *
3060
 * \return The RD cost for the mode being searched. If the return value is
3061
 *         INT64_MAX, the output parameters are not set; do not use them.
3062
 */
3063
static int64_t handle_inter_mode(
3064
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
3065
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
3066
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
3067
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
3068
    int64_t *best_est_rd, const int do_tx_search,
3069
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
3070
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
3071
4.64M
    int64_t *yrd) {
3072
4.64M
  const AV1_COMMON *cm = &cpi->common;
3073
4.64M
  const int num_planes = av1_num_planes(cm);
3074
4.64M
  MACROBLOCKD *xd = &x->e_mbd;
3075
4.64M
  MB_MODE_INFO *mbmi = xd->mi[0];
3076
4.64M
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3077
4.64M
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3078
4.64M
  const int is_comp_pred = has_second_ref(mbmi);
3079
4.64M
  const PREDICTION_MODE this_mode = mbmi->mode;
3080
3081
#if CONFIG_REALTIME_ONLY
3082
  const int prune_modes_based_on_tpl = 0;
3083
#else   // CONFIG_REALTIME_ONLY
3084
4.64M
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
3085
4.64M
  const int prune_modes_based_on_tpl =
3086
4.64M
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
3087
4.64M
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
3088
4.64M
#endif  // CONFIG_REALTIME_ONLY
3089
4.64M
  int i;
3090
  // Reference frames for this mode
3091
4.64M
  const int refs[2] = { mbmi->ref_frame[0],
3092
4.64M
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3093
4.64M
  int rate_mv = 0;
3094
4.64M
  int64_t rd = INT64_MAX;
3095
  // Do first prediction into the destination buffer. Do the next
3096
  // prediction into a temporary buffer. Then keep track of which one
3097
  // of these currently holds the best predictor, and use the other
3098
  // one for future predictions. In the end, copy from tmp_buf to
3099
  // dst if necessary.
3100
4.64M
  struct macroblockd_plane *pd = xd->plane;
3101
4.64M
  const BUFFER_SET orig_dst = {
3102
4.64M
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
3103
4.64M
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
3104
4.64M
  };
3105
4.64M
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
3106
4.64M
                                 tmp_buf + 2 * MAX_SB_SQUARE },
3107
4.64M
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
3108
3109
4.64M
  int64_t ret_val = INT64_MAX;
3110
4.64M
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3111
4.64M
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
3112
4.64M
  int64_t best_rd = INT64_MAX;
3113
4.64M
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3114
4.64M
  int64_t best_yrd = INT64_MAX;
3115
4.64M
  MB_MODE_INFO best_mbmi = *mbmi;
3116
4.64M
  int best_xskip_txfm = 0;
3117
4.64M
  int64_t newmv_ret_val = INT64_MAX;
3118
4.64M
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
3119
3120
  // Do not prune the mode based on inter cost from tpl if the current ref frame
3121
  // is the winner ref in neighbouring blocks.
3122
4.64M
  int ref_match_found_in_above_nb = 0;
3123
4.64M
  int ref_match_found_in_left_nb = 0;
3124
4.64M
  if (prune_modes_based_on_tpl) {
3125
2.12M
    ref_match_found_in_above_nb =
3126
2.12M
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
3127
2.12M
    ref_match_found_in_left_nb =
3128
2.12M
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
3129
2.12M
  }
3130
3131
  // First, perform a simple translation search for each of the indices. If
3132
  // an index performs well, it will be fully searched in the main loop
3133
  // of this function.
3134
4.64M
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
3135
  // Save MV results from first 2 ref_mv_idx.
3136
4.64M
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
3137
4.64M
  int best_ref_mv_idx = -1;
3138
4.64M
  const int idx_mask =
3139
4.64M
      ref_mv_idx_to_search(cpi, x, args, ref_best_rd, bsize, ref_set);
3140
4.64M
  const int16_t mode_ctx =
3141
4.64M
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
3142
4.64M
  const ModeCosts *mode_costs = &x->mode_costs;
3143
4.64M
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
3144
4.64M
  const int base_rate =
3145
4.64M
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
3146
3147
13.9M
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
3148
9.29M
    save_mv[i][0].as_int = INVALID_MV;
3149
9.29M
    save_mv[i][1].as_int = INVALID_MV;
3150
9.29M
  }
3151
4.64M
  args->start_mv_cnt = 0;
3152
3153
  // Main loop of this function. This will  iterate over all of the ref mvs
3154
  // in the dynamic reference list and do the following:
3155
  //    1.) Get the current MV. Create newmv MV if necessary
3156
  //    2.) Search compound type and parameters if applicable
3157
  //    3.) Do interpolation filter search
3158
  //    4.) Build the inter predictor
3159
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
3160
  //        WARPED_CAUSAL)
3161
  //    6.) Update stats if best so far
3162
9.96M
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
3163
5.31M
    mbmi->ref_mv_idx = ref_mv_idx;
3164
3165
5.31M
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
3166
5.31M
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
3167
5.31M
    const int drl_cost = get_drl_cost(
3168
5.31M
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
3169
5.31M
    mode_info[ref_mv_idx].drl_cost = drl_cost;
3170
5.31M
    mode_info[ref_mv_idx].skip = 0;
3171
3172
5.31M
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
3173
      // MV did not perform well in simple translation search. Skip it.
3174
152k
      continue;
3175
152k
    }
3176
5.16M
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
3177
1.16M
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
3178
      // Skip mode if TPL model indicates it will not be beneficial.
3179
677k
      if (prune_modes_based_on_tpl_stats(
3180
677k
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
3181
677k
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
3182
117k
        continue;
3183
677k
    }
3184
5.04M
    av1_init_rd_stats(rd_stats);
3185
3186
    // Initialize compound mode data
3187
5.04M
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3188
5.04M
    mbmi->comp_group_idx = 0;
3189
5.04M
    mbmi->compound_idx = 1;
3190
5.04M
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
3191
3192
5.04M
    mbmi->num_proj_ref = 0;
3193
5.04M
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3194
3195
    // Compute cost for signalling this DRL index
3196
5.04M
    rd_stats->rate = base_rate;
3197
5.04M
    rd_stats->rate += drl_cost;
3198
3199
5.04M
    int rs = 0;
3200
5.04M
    int compmode_interinter_cost = 0;
3201
3202
5.04M
    int_mv cur_mv[2];
3203
3204
    // TODO(Cherma): Extend this speed feature to support compound mode
3205
5.04M
    int skip_repeated_ref_mv =
3206
5.04M
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
3207
    // Generate the current mv according to the prediction mode
3208
5.04M
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
3209
12.3k
      continue;
3210
12.3k
    }
3211
3212
    // The above call to build_cur_mv does not handle NEWMV modes. Build
3213
    // the mv here if we have NEWMV for any predictors.
3214
5.03M
    if (have_newmv_in_inter_mode(this_mode)) {
3215
#if CONFIG_COLLECT_COMPONENT_TIMING
3216
      start_timing(cpi, handle_newmv_time);
3217
#endif
3218
1.72M
      newmv_ret_val =
3219
1.72M
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
3220
#if CONFIG_COLLECT_COMPONENT_TIMING
3221
      end_timing(cpi, handle_newmv_time);
3222
#endif
3223
3224
1.72M
      if (newmv_ret_val != 0) continue;
3225
3226
1.50M
      if (is_inter_singleref_mode(this_mode) &&
3227
1.39M
          cur_mv[0].as_int != INVALID_MV) {
3228
1.39M
        const MV_REFERENCE_FRAME ref = refs[0];
3229
1.39M
        const unsigned int this_sse = x->pred_sse[ref];
3230
1.39M
        if (this_sse < args->best_single_sse_in_refs[ref]) {
3231
1.32M
          args->best_single_sse_in_refs[ref] = this_sse;
3232
1.32M
        }
3233
3234
1.39M
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
3235
0
          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
3236
0
          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
3237
0
          const double scale_factor[3][11] = {
3238
0
            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
3239
0
            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
3240
0
            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
3241
0
          };
3242
0
          assert(pix_idx >= 0);
3243
0
          assert(th_idx <= 2);
3244
0
          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
3245
0
            continue;
3246
0
        }
3247
1.39M
      }
3248
3249
1.50M
      rd_stats->rate += rate_mv;
3250
1.50M
    }
3251
    // Copy the motion vector for this mode into mbmi struct
3252
9.98M
    for (i = 0; i < is_comp_pred + 1; ++i) {
3253
5.16M
      mbmi->mv[i].as_int = cur_mv[i].as_int;
3254
5.16M
    }
3255
3256
4.81M
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
3257
1.61k
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
3258
1.54k
      continue;
3259
1.54k
    }
3260
3261
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
3262
    // is enabled, and the current MV is similar to a previous one.
3263
4.82M
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
3264
341k
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
3265
341k
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
3266
758
      continue;
3267
3268
4.81M
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
3269
4.82M
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
3270
1.20M
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
3271
1.20M
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
3272
1.12M
        continue;
3273
1.12M
      }
3274
1.20M
    }
3275
3276
    // Flag to indicate whether to skip av1_enc_build_inter_predictor() after
3277
    // interpolation filter search
3278
3.69M
    int skip_build_pred = INTERP_EVAL_LUMA_EVAL_CHROMA;
3279
3.69M
    const int mi_row = xd->mi_row;
3280
3.69M
    const int mi_col = xd->mi_col;
3281
3282
    // Handle a compound predictor, continue if it is determined this
3283
    // cannot be the best compound mode
3284
3.69M
    if (is_comp_pred) {
3285
#if CONFIG_COLLECT_COMPONENT_TIMING
3286
      start_timing(cpi, compound_type_rd_time);
3287
#endif
3288
281k
      const int not_best_mode = process_compound_inter_mode(
3289
281k
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
3290
281k
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
3291
281k
          &skip_build_pred);
3292
#if CONFIG_COLLECT_COMPONENT_TIMING
3293
      end_timing(cpi, compound_type_rd_time);
3294
#endif
3295
281k
      if (not_best_mode) continue;
3296
281k
    }
3297
3298
3.57M
    if (!args->skip_ifs) {
3299
#if CONFIG_COLLECT_COMPONENT_TIMING
3300
      start_timing(cpi, interpolation_filter_search_time);
3301
#endif
3302
      // Determine the interpolation filter for this mode
3303
302k
      ret_val = av1_interpolation_filter_search(
3304
302k
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
3305
302k
          &skip_build_pred, args, ref_best_rd);
3306
#if CONFIG_COLLECT_COMPONENT_TIMING
3307
      end_timing(cpi, interpolation_filter_search_time);
3308
#endif
3309
302k
      if (args->modelled_rd != NULL && !is_comp_pred) {
3310
142k
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3311
142k
      }
3312
302k
      if (ret_val != 0) {
3313
0
        restore_dst_buf(xd, orig_dst, num_planes);
3314
0
        continue;
3315
302k
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3316
302k
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3317
6.84k
        restore_dst_buf(xd, orig_dst, num_planes);
3318
6.84k
        continue;
3319
6.84k
      }
3320
3321
      // Compute modelled RD if enabled
3322
295k
      if (args->modelled_rd != NULL) {
3323
295k
        if (is_comp_pred) {
3324
157k
          const int mode0 = compound_ref0_mode(this_mode);
3325
157k
          const int mode1 = compound_ref1_mode(this_mode);
3326
157k
          const int64_t mrd =
3327
157k
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3328
157k
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3329
157k
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3330
0
            restore_dst_buf(xd, orig_dst, num_planes);
3331
0
            continue;
3332
0
          }
3333
157k
        }
3334
295k
      }
3335
295k
    }
3336
3337
3.56M
    rd_stats->rate += compmode_interinter_cost;
3338
3.56M
    if (skip_build_pred != INTERP_SKIP_LUMA_SKIP_CHROMA) {
3339
      // Chroma plane of COMPOUND_DIFFWTD mode shares the segment mask of luma
3340
      // which is stored in xd->seg_mask. Hence, the predictor is populated for
3341
      // all planes. This should avoid usage of incorrect segment mask when the
3342
      // call is made only for chroma.
3343
3.46M
      const int skip_luma_plane =
3344
3.46M
          skip_build_pred == INTERP_SKIP_LUMA_EVAL_CHROMA &&
3345
165k
          mbmi->interinter_comp.type != COMPOUND_DIFFWTD;
3346
3.46M
      const int start_plane = skip_luma_plane ? AOM_PLANE_U : AOM_PLANE_Y;
3347
      // Build this inter predictor if it has not been previously built
3348
3.46M
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
3349
3.46M
                                    start_plane, num_planes - 1);
3350
3.46M
    }
3351
#if CONFIG_COLLECT_COMPONENT_TIMING
3352
    start_timing(cpi, motion_mode_rd_time);
3353
#endif
3354
3.56M
    int rate2_nocoeff = rd_stats->rate;
3355
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3356
    // OBMC_CAUSAL or WARPED_CAUSAL
3357
3.56M
    int64_t this_yrd;
3358
3.56M
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3359
3.56M
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3360
3.56M
                             &orig_dst, best_est_rd, do_tx_search,
3361
3.56M
                             inter_modes_info, 0, &this_yrd);
3362
#if CONFIG_COLLECT_COMPONENT_TIMING
3363
    end_timing(cpi, motion_mode_rd_time);
3364
#endif
3365
3.56M
    assert(
3366
3.56M
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3367
3368
3.56M
    if (ret_val != INT64_MAX) {
3369
1.89M
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3370
1.89M
      const THR_MODES mode_enum = get_prediction_mode_idx(
3371
1.89M
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3372
      // Collect mode stats for multiwinner mode processing
3373
1.89M
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3374
1.89M
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3375
1.89M
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3376
1.89M
                              do_tx_search);
3377
1.89M
      if (tmp_rd < best_rd) {
3378
1.89M
        best_yrd = this_yrd;
3379
        // Update the best rd stats if we found the best mode so far
3380
1.89M
        best_rd_stats = *rd_stats;
3381
1.89M
        best_rd_stats_y = *rd_stats_y;
3382
1.89M
        best_rd_stats_uv = *rd_stats_uv;
3383
1.89M
        best_rd = tmp_rd;
3384
1.89M
        best_mbmi = *mbmi;
3385
1.89M
        best_xskip_txfm = txfm_info->skip_txfm;
3386
1.89M
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3387
1.89M
                       xd->height * xd->width);
3388
1.89M
        motion_mode_cand->rate_mv = rate_mv;
3389
1.89M
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3390
1.89M
      }
3391
3392
1.89M
      if (tmp_rd < ref_best_rd) {
3393
1.84M
        ref_best_rd = tmp_rd;
3394
1.84M
        best_ref_mv_idx = ref_mv_idx;
3395
1.84M
      }
3396
1.89M
    }
3397
3.56M
    restore_dst_buf(xd, orig_dst, num_planes);
3398
3.56M
  }
3399
3400
4.64M
  if (best_rd == INT64_MAX) return INT64_MAX;
3401
3402
  // re-instate status of the best choice
3403
1.85M
  *rd_stats = best_rd_stats;
3404
1.85M
  *rd_stats_y = best_rd_stats_y;
3405
1.85M
  *rd_stats_uv = best_rd_stats_uv;
3406
1.85M
  *yrd = best_yrd;
3407
1.85M
  *mbmi = best_mbmi;
3408
1.85M
  txfm_info->skip_txfm = best_xskip_txfm;
3409
1.85M
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3410
1.85M
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3411
1.85M
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3412
3413
1.85M
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3414
3415
1.85M
  return rd_stats->rdcost;
3416
4.64M
}
3417
3418
/*!\brief Search for the best intrabc predictor
3419
 *
3420
 * \ingroup intra_mode_search
3421
 * \callergraph
3422
 * This function performs a motion search to find the best intrabc predictor.
3423
 *
3424
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3425
 * search before this function).
3426
 */
3427
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3428
                                       PICK_MODE_CONTEXT *ctx,
3429
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3430
18.3M
                                       int64_t best_rd) {
3431
18.3M
  const AV1_COMMON *const cm = &cpi->common;
3432
18.3M
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3433
1.73M
      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3434
16.7M
    return INT64_MAX;
3435
1.66M
  if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 &&
3436
828k
      bsize != BLOCK_8X8 && bsize != BLOCK_16X16) {
3437
285k
    return INT64_MAX;
3438
285k
  }
3439
1.37M
  const int num_planes = av1_num_planes(cm);
3440
3441
1.37M
  MACROBLOCKD *const xd = &x->e_mbd;
3442
1.37M
  const TileInfo *tile = &xd->tile;
3443
1.37M
  MB_MODE_INFO *mbmi = xd->mi[0];
3444
3445
1.37M
  const int mi_row = xd->mi_row;
3446
1.37M
  const int mi_col = xd->mi_col;
3447
1.37M
  const int w = block_size_wide[bsize];
3448
1.37M
  const int h = block_size_high[bsize];
3449
1.37M
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3450
1.37M
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3451
3452
1.37M
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3453
1.37M
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3454
1.37M
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3455
1.37M
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3456
1.37M
                   mbmi_ext->mode_context);
3457
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3458
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3459
1.37M
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3460
1.37M
  int_mv nearestmv, nearmv;
3461
1.37M
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3462
1.37M
                                   0);
3463
3464
1.37M
  if (nearestmv.as_int == INVALID_MV) {
3465
1.31M
    nearestmv.as_int = 0;
3466
1.31M
  }
3467
1.37M
  if (nearmv.as_int == INVALID_MV) {
3468
1.34M
    nearmv.as_int = 0;
3469
1.34M
  }
3470
3471
1.37M
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3472
1.37M
  if (dv_ref.as_int == 0) {
3473
1.31M
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3474
1.31M
  }
3475
  // Ref DV should not have sub-pel.
3476
1.37M
  assert((dv_ref.as_mv.col & 7) == 0);
3477
1.37M
  assert((dv_ref.as_mv.row & 7) == 0);
3478
1.37M
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3479
3480
1.37M
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3481
1.37M
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3482
4.38M
  for (int i = 0; i < num_planes; ++i) {
3483
3.00M
    xd->plane[i].pre[0] = yv12_mb[i];
3484
3.00M
  }
3485
3486
1.37M
  enum IntrabcMotionDirection {
3487
1.37M
    IBC_MOTION_ABOVE,
3488
1.37M
    IBC_MOTION_LEFT,
3489
1.37M
    IBC_MOTION_DIRECTIONS
3490
1.37M
  };
3491
3492
1.37M
  MB_MODE_INFO best_mbmi = *mbmi;
3493
1.37M
  RD_STATS best_rdstats = *rd_stats;
3494
1.37M
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3495
1.37M
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3496
3497
1.37M
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3498
1.37M
  const SEARCH_METHODS search_method =
3499
1.37M
      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3500
1.37M
  const search_site_config *lookahead_search_sites =
3501
1.37M
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3502
1.37M
  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3503
1.37M
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3504
1.37M
                                     &dv_ref.as_mv, start_mv,
3505
1.37M
                                     lookahead_search_sites, search_method,
3506
1.37M
                                     /*fine_search_interval=*/0);
3507
1.37M
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3508
1.37M
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3509
3510
1.37M
  const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level
3511
1.37M
                                                  ? IBC_MOTION_LEFT
3512
1.37M
                                                  : IBC_MOTION_DIRECTIONS;
3513
3514
2.75M
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir;
3515
1.37M
       ++dir) {
3516
1.37M
    switch (dir) {
3517
1.37M
      case IBC_MOTION_ABOVE:
3518
1.37M
        fullms_params.mv_limits.col_min =
3519
1.37M
            (tile->mi_col_start - mi_col) * MI_SIZE;
3520
1.37M
        fullms_params.mv_limits.col_max =
3521
1.37M
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3522
1.37M
        fullms_params.mv_limits.row_min =
3523
1.37M
            (tile->mi_row_start - mi_row) * MI_SIZE;
3524
1.37M
        fullms_params.mv_limits.row_max =
3525
1.37M
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3526
1.37M
        break;
3527
0
      case IBC_MOTION_LEFT:
3528
0
        fullms_params.mv_limits.col_min =
3529
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3530
0
        fullms_params.mv_limits.col_max =
3531
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3532
        // TODO(aconverse@google.com): Minimize the overlap between above and
3533
        // left areas.
3534
0
        fullms_params.mv_limits.row_min =
3535
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3536
0
        int bottom_coded_mi_edge =
3537
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3538
0
        fullms_params.mv_limits.row_max =
3539
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3540
0
        break;
3541
0
      default: assert(0);
3542
1.37M
    }
3543
1.37M
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3544
1.37M
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3545
1.37M
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3546
1.37M
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3547
3548
1.37M
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3549
3550
1.37M
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3551
1.37M
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3552
0
      continue;
3553
0
    }
3554
3555
1.37M
    const int step_param = cpi->mv_search_params.mv_step_param;
3556
1.37M
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3557
1.37M
    int_mv best_mv;
3558
1.37M
    FULLPEL_MV_STATS best_mv_stats;
3559
1.37M
    int bestsme = INT_MAX;
3560
3561
    // Perform a hash search first, and see if we get any matches.
3562
1.37M
    if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
3563
1.30M
      bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params,
3564
1.30M
                                        intrabc_hash_info, &best_mv.as_fullmv);
3565
1.30M
    }
3566
3567
    // If intrabc_search_level is not 0 and we found a hash search match, do
3568
    // not proceed with pixel search as the hash match is very likely to be the
3569
    // best intrabc candidate anyway.
3570
1.37M
    if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) {
3571
1.37M
      int_mv best_pixel_mv;
3572
1.37M
      const int pixelsme =
3573
1.37M
          av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3574
1.37M
                                &best_pixel_mv.as_fullmv, &best_mv_stats, NULL);
3575
1.37M
      if (pixelsme < bestsme) {
3576
1.37M
        bestsme = pixelsme;
3577
1.37M
        best_mv = best_pixel_mv;
3578
1.37M
      }
3579
1.37M
    }
3580
1.37M
    if (bestsme == INT_MAX) continue;
3581
1.37M
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3582
1.37M
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3583
1.37M
                                get_fullmv_from_mv(&dv)))
3584
0
      continue;
3585
1.37M
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3586
1.37M
                         cm->seq_params->mib_size_log2))
3587
1.24M
      continue;
3588
3589
    // DV should not have sub-pel.
3590
1.37M
    assert((dv.col & 7) == 0);
3591
125k
    assert((dv.row & 7) == 0);
3592
125k
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3593
125k
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3594
125k
    mbmi->use_intrabc = 1;
3595
125k
    mbmi->mode = DC_PRED;
3596
125k
    mbmi->uv_mode = UV_DC_PRED;
3597
125k
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3598
125k
    mbmi->mv[0].as_mv = dv;
3599
125k
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3600
125k
    mbmi->skip_txfm = 0;
3601
125k
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3602
125k
                                  av1_num_planes(cm) - 1);
3603
3604
    // TODO(aconverse@google.com): The full motion field defining discount
3605
    // in MV_COST_WEIGHT is too large. Explore other values.
3606
125k
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3607
125k
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3608
125k
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3609
125k
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3610
125k
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3611
125k
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3612
0
      continue;
3613
125k
    rd_stats_yuv.rdcost =
3614
125k
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3615
125k
    if (rd_stats_yuv.rdcost < best_rd) {
3616
35.2k
      best_rd = rd_stats_yuv.rdcost;
3617
35.2k
      best_mbmi = *mbmi;
3618
35.2k
      best_rdstats = rd_stats_yuv;
3619
35.2k
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3620
35.2k
    }
3621
125k
  }
3622
1.37M
  *mbmi = best_mbmi;
3623
1.37M
  *rd_stats = best_rdstats;
3624
1.37M
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3625
#if CONFIG_RD_DEBUG
3626
  mbmi->rd_stats = *rd_stats;
3627
#endif
3628
1.37M
  return best_rd;
3629
1.37M
}
3630
3631
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3632
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3633
// the typedef will prevent doxygen from finding this function and generating
3634
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3635
// doxygen, we can revert back to using the typedefs.
3636
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3637
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3638
18.3M
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3639
18.3M
  const AV1_COMMON *const cm = &cpi->common;
3640
18.3M
  MACROBLOCKD *const xd = &x->e_mbd;
3641
18.3M
  MB_MODE_INFO *const mbmi = xd->mi[0];
3642
18.3M
  const int num_planes = av1_num_planes(cm);
3643
18.3M
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3644
18.3M
  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3645
18.3M
  int64_t dist_y = 0, dist_uv = 0;
3646
3647
18.3M
  ctx->rd_stats.skip_txfm = 0;
3648
18.3M
  mbmi->ref_frame[0] = INTRA_FRAME;
3649
18.3M
  mbmi->ref_frame[1] = NONE_FRAME;
3650
18.3M
  mbmi->use_intrabc = 0;
3651
18.3M
  mbmi->mv[0].as_int = 0;
3652
18.3M
  mbmi->skip_mode = 0;
3653
3654
18.3M
  const int64_t intra_yrd =
3655
18.3M
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3656
18.3M
                                 &y_skip_txfm, bsize, best_rd, ctx);
3657
3658
  // Initialize default mode evaluation params
3659
18.3M
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3660
3661
18.3M
  if (intra_yrd < best_rd) {
3662
    // Search intra modes for uv planes if needed
3663
16.5M
    if (num_planes > 1) {
3664
      // Set up the tx variables for reproducing the y predictions in case we
3665
      // need it for chroma-from-luma.
3666
7.70M
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3667
5.88M
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3668
5.88M
      }
3669
7.70M
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3670
7.70M
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3671
7.70M
                                  &dist_uv, &uv_skip_txfm, bsize,
3672
7.70M
                                  max_uv_tx_size);
3673
7.70M
    }
3674
3675
    // Intra block is always coded as non-skip
3676
16.5M
    rd_cost->rate =
3677
16.5M
        rate_y + rate_uv +
3678
16.5M
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3679
16.5M
    rd_cost->dist = dist_y + dist_uv;
3680
16.5M
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3681
16.5M
    rd_cost->skip_txfm = 0;
3682
16.5M
  } else {
3683
1.84M
    rd_cost->rate = INT_MAX;
3684
1.84M
  }
3685
3686
18.3M
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3687
15.6M
    best_rd = rd_cost->rdcost;
3688
18.3M
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3689
35.2k
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3690
35.2k
    assert(rd_cost->rate != INT_MAX);
3691
35.2k
  }
3692
18.3M
  if (rd_cost->rate == INT_MAX) return;
3693
3694
16.5M
  ctx->mic = *mbmi;
3695
16.5M
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3696
16.5M
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3697
16.5M
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3698
16.5M
}
3699
3700
static inline void calc_target_weighted_pred(
3701
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3702
    const uint8_t *above, int above_stride, const uint8_t *left,
3703
    int left_stride);
3704
3705
static inline void rd_pick_skip_mode(
3706
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3707
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3708
197k
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3709
197k
  const AV1_COMMON *const cm = &cpi->common;
3710
197k
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3711
197k
  const int num_planes = av1_num_planes(cm);
3712
197k
  MACROBLOCKD *const xd = &x->e_mbd;
3713
197k
  MB_MODE_INFO *const mbmi = xd->mi[0];
3714
3715
197k
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3716
197k
  RD_STATS skip_mode_rd_stats;
3717
197k
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3718
3719
197k
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3720
197k
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3721
0
    return;
3722
0
  }
3723
3724
197k
  const MV_REFERENCE_FRAME ref_frame =
3725
197k
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3726
197k
  const MV_REFERENCE_FRAME second_ref_frame =
3727
197k
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3728
197k
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3729
197k
  const THR_MODES mode_index =
3730
197k
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3731
3732
197k
  if (mode_index == THR_INVALID) {
3733
0
    return;
3734
0
  }
3735
3736
197k
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3737
197k
       cpi->sf.inter_sf.disable_onesided_comp) &&
3738
197k
      cpi->all_one_sided_refs) {
3739
91.3k
    return;
3740
91.3k
  }
3741
3742
106k
  mbmi->mode = this_mode;
3743
106k
  mbmi->uv_mode = UV_DC_PRED;
3744
106k
  mbmi->ref_frame[0] = ref_frame;
3745
106k
  mbmi->ref_frame[1] = second_ref_frame;
3746
106k
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3747
106k
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3748
1.08k
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3749
1.08k
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3750
908
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3751
908
      return;
3752
908
    }
3753
180
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3754
180
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3755
180
                     mbmi_ext->mode_context);
3756
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3757
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3758
180
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3759
180
  }
3760
3761
106k
  assert(this_mode == NEAREST_NEARESTMV);
3762
105k
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3763
927
    return;
3764
927
  }
3765
3766
104k
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3767
104k
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3768
104k
  mbmi->comp_group_idx = 0;
3769
104k
  mbmi->compound_idx = x->compound_idx;
3770
104k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3771
104k
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3772
104k
  mbmi->ref_mv_idx = 0;
3773
104k
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3774
104k
  mbmi->palette_mode_info.palette_size[0] = 0;
3775
104k
  mbmi->palette_mode_info.palette_size[1] = 0;
3776
3777
104k
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3778
3779
104k
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3780
331k
  for (int i = 0; i < num_planes; i++) {
3781
227k
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3782
227k
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3783
227k
  }
3784
3785
104k
  BUFFER_SET orig_dst;
3786
331k
  for (int i = 0; i < num_planes; i++) {
3787
227k
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3788
227k
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3789
227k
  }
3790
3791
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3792
104k
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3793
104k
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3794
104k
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3795
103k
    const ModeCosts *mode_costs = &x->mode_costs;
3796
103k
    best_intra_inter_mode_cost = RDCOST(
3797
103k
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3798
103k
        rd_cost->dist);
3799
    // Account for non-skip mode rate in total rd stats
3800
103k
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3801
103k
    av1_rd_cost_update(x->rdmult, rd_cost);
3802
103k
  }
3803
3804
  // Obtain the rdcost for skip_mode.
3805
104k
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3806
104k
               best_intra_inter_mode_cost);
3807
3808
104k
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3809
7.71k
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3810
7.71k
    assert(mode_index != THR_INVALID);
3811
7.71k
    search_state->best_mbmode.skip_mode = 1;
3812
7.71k
    search_state->best_mbmode = *mbmi;
3813
7.71k
    memset(search_state->best_mbmode.inter_tx_size,
3814
7.71k
           search_state->best_mbmode.tx_size,
3815
7.71k
           sizeof(search_state->best_mbmode.inter_tx_size));
3816
7.71k
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3817
7.71k
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3818
7.71k
                  xd);
3819
7.71k
    search_state->best_mode_index = mode_index;
3820
3821
    // Update rd_cost
3822
7.71k
    rd_cost->rate = skip_mode_rd_stats.rate;
3823
7.71k
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3824
7.71k
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3825
3826
7.71k
    search_state->best_rd = rd_cost->rdcost;
3827
7.71k
    search_state->best_skip2 = 1;
3828
7.71k
    search_state->best_mode_skippable = 1;
3829
3830
7.71k
    x->txfm_search_info.skip_txfm = 1;
3831
7.71k
  }
3832
104k
}
3833
3834
// Get winner mode stats of given mode index
3835
static inline MB_MODE_INFO *get_winner_mode_stats(
3836
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3837
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3838
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3839
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3840
745k
    int mode_idx) {
3841
745k
  MB_MODE_INFO *winner_mbmi;
3842
745k
  if (multi_winner_mode_type) {
3843
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3844
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3845
0
    winner_mbmi = &winner_mode_stat->mbmi;
3846
3847
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3848
0
    *winner_rate_y = winner_mode_stat->rate_y;
3849
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3850
0
    *winner_mode_index = winner_mode_stat->mode_index;
3851
745k
  } else {
3852
745k
    winner_mbmi = best_mbmode;
3853
745k
    *winner_rd_cost = best_rd_cost;
3854
745k
    *winner_rate_y = best_rate_y;
3855
745k
    *winner_rate_uv = best_rate_uv;
3856
745k
    *winner_mode_index = *best_mode_index;
3857
745k
  }
3858
745k
  return winner_mbmi;
3859
745k
}
3860
3861
// speed feature: fast intra/inter transform type search
3862
// Used for speed >= 2
3863
// When this speed feature is on, in rd mode search, only DCT is used.
3864
// After the mode is determined, this function is called, to select
3865
// transform types and get accurate rdcost.
3866
static inline void refine_winner_mode_tx(
3867
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3868
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3869
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3870
896k
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3871
896k
  const AV1_COMMON *const cm = &cpi->common;
3872
896k
  MACROBLOCKD *const xd = &x->e_mbd;
3873
896k
  MB_MODE_INFO *const mbmi = xd->mi[0];
3874
896k
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3875
896k
  int64_t best_rd;
3876
896k
  const int num_planes = av1_num_planes(cm);
3877
3878
896k
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3879
896k
                                         rd_cost->skip_txfm))
3880
143k
    return;
3881
3882
  // Set params for winner mode evaluation
3883
752k
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3884
3885
  // No best mode identified so far
3886
752k
  if (*best_mode_index == THR_INVALID) return;
3887
3888
745k
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3889
1.49M
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3890
745k
    RD_STATS *winner_rd_stats = NULL;
3891
745k
    int winner_rate_y = 0, winner_rate_uv = 0;
3892
745k
    THR_MODES winner_mode_index = 0;
3893
3894
    // TODO(any): Combine best mode and multi-winner mode processing paths
3895
    // Get winner mode stats for current mode index
3896
745k
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3897
745k
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3898
745k
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3899
745k
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3900
3901
745k
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3902
701k
        winner_mode_index != THR_INVALID &&
3903
701k
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3904
701k
                                          rd_cost->skip_txfm)) {
3905
701k
      RD_STATS rd_stats = *winner_rd_stats;
3906
701k
      int skip_blk = 0;
3907
701k
      RD_STATS rd_stats_y, rd_stats_uv;
3908
701k
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3909
3910
701k
      *mbmi = *winner_mbmi;
3911
3912
701k
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3913
3914
      // Select prediction reference frames.
3915
1.96M
      for (int i = 0; i < num_planes; i++) {
3916
1.26M
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3917
1.26M
        if (has_second_ref(mbmi))
3918
9.34k
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3919
1.26M
      }
3920
3921
701k
      if (is_inter_mode(mbmi->mode)) {
3922
128k
        const int mi_row = xd->mi_row;
3923
128k
        const int mi_col = xd->mi_col;
3924
128k
        bool is_predictor_built = false;
3925
128k
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3926
        // Do interpolation filter search for realtime mode if applicable.
3927
128k
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3928
0
            cpi->oxcf.mode == REALTIME &&
3929
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3930
0
            is_inter_mode(prediction_mode) &&
3931
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3932
0
            !is_inter_compound_mode(prediction_mode)) {
3933
0
          is_predictor_built =
3934
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3935
0
        }
3936
128k
        if (!is_predictor_built) {
3937
128k
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3938
128k
                                        av1_num_planes(cm) - 1);
3939
128k
        }
3940
128k
        if (mbmi->motion_mode == OBMC_CAUSAL)
3941
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3942
3943
128k
        av1_subtract_plane(x, bsize, 0);
3944
128k
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3945
128k
            !xd->lossless[mbmi->segment_id]) {
3946
128k
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3947
128k
                                              INT64_MAX);
3948
128k
          assert(rd_stats_y.rate != INT_MAX);
3949
18.4E
        } else {
3950
18.4E
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3951
18.4E
                                            INT64_MAX);
3952
18.4E
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3953
18.4E
                 sizeof(mbmi->inter_tx_size));
3954
18.4E
        }
3955
572k
      } else {
3956
572k
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3957
572k
                                          INT64_MAX);
3958
572k
      }
3959
3960
701k
      if (num_planes > 1) {
3961
280k
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3962
420k
      } else {
3963
420k
        av1_init_rd_stats(&rd_stats_uv);
3964
420k
      }
3965
3966
701k
      const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
3967
3968
701k
      const ModeCosts *mode_costs = &x->mode_costs;
3969
701k
      if (is_inter_mode(mbmi->mode) &&
3970
128k
          (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
3971
128k
          RDCOST(x->rdmult,
3972
128k
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3973
128k
                     rd_stats_uv.rate,
3974
128k
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3975
128k
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3976
701k
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3977
8.31k
        skip_blk = 1;
3978
8.31k
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3979
8.31k
        rd_stats_uv.rate = 0;
3980
8.31k
        rd_stats_y.dist = rd_stats_y.sse;
3981
8.31k
        rd_stats_uv.dist = rd_stats_uv.sse;
3982
692k
      } else {
3983
692k
        skip_blk = 0;
3984
692k
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3985
692k
      }
3986
701k
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3987
701k
                      winner_rate_y - winner_rate_uv;
3988
701k
      int64_t this_rd =
3989
701k
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3990
701k
      if (best_rd > this_rd) {
3991
582k
        *best_mbmode = *mbmi;
3992
582k
        *best_mode_index = winner_mode_index;
3993
582k
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3994
582k
        rd_cost->rate = this_rate;
3995
582k
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3996
582k
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3997
582k
        rd_cost->rdcost = this_rd;
3998
582k
        best_rd = this_rd;
3999
582k
        *best_skip2 = skip_blk;
4000
582k
      }
4001
701k
    }
4002
745k
  }
4003
745k
}
4004
4005
/*!\cond */
4006
typedef struct {
4007
  // Mask for each reference frame, specifying which prediction modes to NOT try
4008
  // during search.
4009
  uint32_t pred_modes[REF_FRAMES];
4010
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
4011
  // reference frames (i, j).
4012
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
4013
  // (NONE_FRAME).
4014
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
4015
} mode_skip_mask_t;
4016
/*!\endcond */
4017
4018
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
4019
static inline void disable_reference(
4020
4.94M
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
4021
49.4M
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4022
44.4M
    ref_combo[ref][ref2 + 1] = true;
4023
44.4M
  }
4024
4.94M
}
4025
4026
// Update 'ref_combo' mask to disable all inter references except ALTREF.
4027
static inline void disable_inter_references_except_altref(
4028
33.1k
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
4029
33.1k
  disable_reference(LAST_FRAME, ref_combo);
4030
33.1k
  disable_reference(LAST2_FRAME, ref_combo);
4031
33.1k
  disable_reference(LAST3_FRAME, ref_combo);
4032
33.1k
  disable_reference(GOLDEN_FRAME, ref_combo);
4033
33.1k
  disable_reference(BWDREF_FRAME, ref_combo);
4034
33.1k
  disable_reference(ALTREF2_FRAME, ref_combo);
4035
33.1k
}
4036
4037
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
4038
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
4039
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
4040
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
4041
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
4042
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
4043
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
4044
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
4045
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
4046
};
4047
4048
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
4049
4050
895k
static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
4051
895k
  if (ref_set == REF_SET_FULL) {
4052
    // Everything available by default.
4053
895k
    memset(mask, 0, sizeof(*mask));
4054
895k
  } else {
4055
    // All modes available by default.
4056
292
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
4057
    // All references disabled first.
4058
292
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
4059
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4060
0
        mask->ref_combo[ref1][ref2 + 1] = true;
4061
0
      }
4062
0
    }
4063
292
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
4064
292
    int num_ref_combos;
4065
4066
    // Then enable reduced set of references explicitly.
4067
292
    switch (ref_set) {
4068
0
      case REF_SET_REDUCED:
4069
0
        ref_set_combos = reduced_ref_combos;
4070
0
        num_ref_combos =
4071
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
4072
0
        break;
4073
0
      case REF_SET_REALTIME:
4074
0
        ref_set_combos = real_time_ref_combos;
4075
0
        num_ref_combos =
4076
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
4077
0
        break;
4078
0
      default: assert(0); num_ref_combos = 0;
4079
292
    }
4080
4081
0
    for (int i = 0; i < num_ref_combos; ++i) {
4082
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
4083
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
4084
0
    }
4085
0
  }
4086
895k
}
4087
4088
static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
4089
                                       const AV1_COMP *cpi, MACROBLOCK *x,
4090
896k
                                       BLOCK_SIZE bsize) {
4091
896k
  const AV1_COMMON *const cm = &cpi->common;
4092
896k
  const struct segmentation *const seg = &cm->seg;
4093
896k
  MACROBLOCKD *const xd = &x->e_mbd;
4094
896k
  MB_MODE_INFO *const mbmi = xd->mi[0];
4095
896k
  unsigned char segment_id = mbmi->segment_id;
4096
896k
  const SPEED_FEATURES *const sf = &cpi->sf;
4097
896k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
4098
896k
  REF_SET ref_set = REF_SET_FULL;
4099
4100
896k
  if (sf->rt_sf.use_real_time_ref_set)
4101
0
    ref_set = REF_SET_REALTIME;
4102
896k
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
4103
0
    ref_set = REF_SET_REDUCED;
4104
4105
896k
  default_skip_mask(mask, ref_set);
4106
4107
896k
  int min_pred_mv_sad = INT_MAX;
4108
896k
  MV_REFERENCE_FRAME ref_frame;
4109
896k
  if (ref_set == REF_SET_REALTIME) {
4110
    // For real-time encoding, we only look at a subset of ref frames. So the
4111
    // threshold for pruning should be computed from this subset as well.
4112
0
    const int num_rt_refs =
4113
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
4114
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
4115
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
4116
0
      if (ref != INTRA_FRAME) {
4117
0
        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
4118
0
      }
4119
0
    }
4120
896k
  } else {
4121
7.16M
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
4122
6.26M
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
4123
896k
  }
4124
4125
7.15M
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4126
6.25M
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
4127
      // Skip checking missing reference in both single and compound reference
4128
      // modes.
4129
4.70M
      disable_reference(ref_frame, mask->ref_combo);
4130
4.70M
    } else {
4131
      // Skip fixed mv modes for poor references
4132
1.54M
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
4133
74.4k
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4134
74.4k
      }
4135
1.54M
    }
4136
6.25M
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4137
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4138
      // Reference not used for the segment.
4139
0
      disable_reference(ref_frame, mask->ref_combo);
4140
0
    }
4141
6.25M
  }
4142
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
4143
  // is disabled for this segment. This is to prevent the possibility that we
4144
  // end up unable to pick any mode.
4145
896k
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4146
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
4147
    // unless ARNR filtering is enabled in which case we want
4148
    // an unfiltered alternative. We allow near/nearest as well
4149
    // because they may result in zero-zero MVs but be cheaper.
4150
896k
    if (cpi->rc.is_src_frame_alt_ref &&
4151
33.1k
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
4152
0
      disable_inter_references_except_altref(mask->ref_combo);
4153
4154
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4155
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
4156
0
      int_mv near_mv, nearest_mv, global_mv;
4157
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
4158
0
                  &x->mbmi_ext);
4159
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4160
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4161
4162
0
      if (near_mv.as_int != global_mv.as_int)
4163
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
4164
0
      if (nearest_mv.as_int != global_mv.as_int)
4165
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
4166
0
    }
4167
896k
  }
4168
4169
896k
  if (cpi->rc.is_src_frame_alt_ref) {
4170
33.1k
    if (inter_sf->alt_ref_search_fp &&
4171
33.1k
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
4172
33.1k
      mask->pred_modes[ALTREF_FRAME] = 0;
4173
33.1k
      disable_inter_references_except_altref(mask->ref_combo);
4174
33.1k
      disable_reference(INTRA_FRAME, mask->ref_combo);
4175
33.1k
    }
4176
33.1k
  }
4177
4178
896k
  if (inter_sf->alt_ref_search_fp) {
4179
896k
    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
4180
53.0k
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
4181
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
4182
      // those are past frames
4183
53.0k
      MV_REFERENCE_FRAME start_frame =
4184
53.0k
          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
4185
212k
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
4186
159k
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4187
159k
            0) {
4188
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
4189
          // to the relative dist of LAST_FRAME.
4190
0
          if (abs(cpi->ref_frame_dist_info
4191
0
                      .ref_relative_dist[ref_frame - LAST_FRAME] -
4192
0
                  cpi->ref_frame_dist_info
4193
0
                      .ref_relative_dist[LAST_FRAME - LAST_FRAME]) > 4) {
4194
0
            continue;
4195
0
          }
4196
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
4197
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
4198
0
        }
4199
159k
      }
4200
53.0k
    }
4201
896k
  }
4202
4203
896k
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4204
0
    if (x->best_pred_mv_sad[0] < INT_MAX) {
4205
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
4206
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
4207
4208
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
4209
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
4210
0
        ref_frame = prune_ref_list[ref_idx];
4211
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
4212
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4213
0
      }
4214
0
    }
4215
0
  }
4216
4217
896k
  if (bsize > sf->part_sf.max_intra_bsize) {
4218
4.97k
    disable_reference(INTRA_FRAME, mask->ref_combo);
4219
4.97k
  }
4220
4221
896k
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
4222
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4223
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
4224
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
4225
0
    }
4226
0
  }
4227
4228
896k
  mask->pred_modes[INTRA_FRAME] |=
4229
896k
      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
4230
4231
  // Prune reference frames which are not the closest to the current
4232
  // frame and with large pred_mv_sad.
4233
896k
  if (inter_sf->prune_single_ref) {
4234
842k
    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 5);
4235
842k
    const double prune_thresh = (inter_sf->prune_single_ref <= 3) ? 1.20 : 1.05;
4236
4237
6.72M
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4238
5.88M
      const RefFrameDistanceInfo *const ref_frame_dist_info =
4239
5.88M
          &cpi->ref_frame_dist_info;
4240
5.88M
      const int is_closest_ref =
4241
5.88M
          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
4242
5.04M
          (ref_frame == ref_frame_dist_info->nearest_future_ref);
4243
5.88M
      const int ref_idx = ref_frame - LAST_FRAME;
4244
4245
5.88M
      if (!(cpi->keep_single_ref_frame_mask & (1 << ref_idx) ||
4246
5.88M
            is_closest_ref)) {
4247
4.94M
        const int dir =
4248
4.94M
            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
4249
4.94M
                ? 0
4250
4.94M
                : 1;
4251
4.94M
        if (x->best_pred_mv_sad[dir] < INT_MAX &&
4252
1.11M
            x->pred_mv_sad[ref_frame] > prune_thresh * x->best_pred_mv_sad[dir])
4253
789k
          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
4254
4.94M
      }
4255
5.88M
    }
4256
842k
  }
4257
896k
}
4258
4259
static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
4260
                                          HandleInterModeArgs *const args,
4261
896k
                                          int is_hbd) {
4262
896k
  if (is_hbd) {
4263
107k
    const int len = sizeof(uint16_t);
4264
107k
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
4265
107k
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
4266
107k
                                                 (MAX_SB_SQUARE >> 1) * len);
4267
107k
    args->above_pred_buf[2] =
4268
107k
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
4269
107k
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
4270
107k
    args->left_pred_buf[1] =
4271
107k
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
4272
107k
    args->left_pred_buf[2] =
4273
107k
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
4274
788k
  } else {
4275
788k
    args->above_pred_buf[0] = obmc_buffer->above_pred;
4276
788k
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
4277
788k
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
4278
788k
    args->left_pred_buf[0] = obmc_buffer->left_pred;
4279
788k
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
4280
788k
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
4281
788k
  }
4282
896k
}
4283
4284
static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
4285
11.5M
                                  MV_REFERENCE_FRAME ref_frame) {
4286
11.5M
  const AV1_COMMON *const cm = &cpi->common;
4287
11.5M
  MV_REFERENCE_FRAME rf[2];
4288
11.5M
  av1_set_ref_frame(rf, ref_frame);
4289
4290
11.5M
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
4291
4292
6.48M
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
4293
6.48M
                                       cm->cur_frame->ref_display_order_hint)) {
4294
501k
    return 1;
4295
501k
  }
4296
4297
5.98M
  return 0;
4298
6.48M
}
4299
4300
static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
4301
28.3k
                                                    int skip_ref_frame_mask) {
4302
576k
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
4303
551k
    if (!(skip_ref_frame_mask & (1 << r))) {
4304
2.89k
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
4305
2.89k
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
4306
2.71k
        return 1;
4307
2.71k
      }
4308
2.89k
    }
4309
551k
  }
4310
25.6k
  return 0;
4311
28.3k
}
4312
4313
static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
4314
104k
                                             const MB_MODE_INFO *mi_cache) {
4315
104k
  if (!mi_cache) {
4316
104k
    return 0;
4317
104k
  }
4318
4319
12
  if (ref_frame < REF_FRAMES) {
4320
0
    return (ref_frame == mi_cache->ref_frame[0] ||
4321
0
            ref_frame == mi_cache->ref_frame[1]);
4322
0
  }
4323
4324
  // if we are here, then the current mode is compound.
4325
12
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4326
12
  return ref_frame == cached_ref_type;
4327
12
}
4328
4329
// Please add/modify parameter setting in this function, making it consistent
4330
// and easy to read and maintain.
4331
static inline void set_params_rd_pick_inter_mode(
4332
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4333
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4334
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4335
896k
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4336
896k
  const AV1_COMMON *const cm = &cpi->common;
4337
896k
  MACROBLOCKD *const xd = &x->e_mbd;
4338
896k
  MB_MODE_INFO *const mbmi = xd->mi[0];
4339
896k
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4340
896k
  unsigned char segment_id = mbmi->segment_id;
4341
4342
896k
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4343
896k
  av1_collect_neighbors_ref_counts(xd);
4344
896k
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4345
896k
                           ref_costs_comp);
4346
4347
896k
  const int mi_row = xd->mi_row;
4348
896k
  const int mi_col = xd->mi_col;
4349
896k
  x->best_pred_mv_sad[0] = INT_MAX;
4350
896k
  x->best_pred_mv_sad[1] = INT_MAX;
4351
4352
7.16M
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4353
6.27M
       ++ref_frame) {
4354
6.27M
    x->pred_mv_sad[ref_frame] = INT_MAX;
4355
6.27M
    mbmi_ext->mode_context[ref_frame] = 0;
4356
6.27M
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4357
6.27M
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4358
      // Skip the ref frame if the mask says skip and the ref is not used by
4359
      // compound ref.
4360
1.55M
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4361
17.2k
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4362
16.7k
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4363
16.7k
        continue;
4364
16.7k
      }
4365
1.55M
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4366
1.54M
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4367
1.54M
    }
4368
6.25M
    if (cpi->sf.inter_sf.alt_ref_search_fp ||
4369
0
        cpi->sf.inter_sf.prune_single_ref ||
4370
6.25M
        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4371
      // Store the best pred_mv_sad across all past frames
4372
6.25M
      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4373
6.25M
          0)
4374
1.40M
        x->best_pred_mv_sad[0] =
4375
1.40M
            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4376
4.84M
      else
4377
        // Store the best pred_mv_sad across all future frames
4378
4.84M
        x->best_pred_mv_sad[1] =
4379
4.84M
            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4380
6.25M
    }
4381
6.25M
  }
4382
4383
896k
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4384
    // No second reference on RT ref set, so no need to initialize
4385
896k
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4386
19.6M
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4387
18.7M
      mbmi_ext->mode_context[ref_frame] = 0;
4388
18.7M
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4389
18.7M
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4390
18.7M
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4391
17.8M
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4392
17.8M
        continue;
4393
17.8M
      }
4394
4395
904k
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4396
29.2k
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4397
29.1k
        continue;
4398
29.1k
      }
4399
      // Ref mv list population is not required, when compound references are
4400
      // pruned.
4401
874k
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4402
4403
102k
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4404
102k
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4405
102k
                       mbmi_ext->mode_context);
4406
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4407
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4408
102k
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4409
102k
    }
4410
896k
  }
4411
4412
896k
  av1_count_overlappable_neighbors(cm, xd);
4413
896k
  const FRAME_UPDATE_TYPE update_type =
4414
896k
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4415
896k
  int use_actual_frame_probs = 1;
4416
896k
  int prune_obmc;
4417
#if CONFIG_FPMT_TEST
4418
  use_actual_frame_probs =
4419
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4420
  if (!use_actual_frame_probs) {
4421
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4422
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4423
  }
4424
#endif
4425
896k
  if (use_actual_frame_probs) {
4426
896k
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4427
896k
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4428
896k
  }
4429
896k
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4430
0
    if (check_num_overlappable_neighbors(mbmi) &&
4431
0
        is_motion_variation_allowed_bsize(bsize)) {
4432
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4433
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4434
0
                                       MAX_SB_SIZE >> 1 };
4435
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4436
0
                                        MAX_SB_SIZE >> 1 };
4437
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4438
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4439
0
                                          dst_width1, dst_height1,
4440
0
                                          args->above_pred_stride);
4441
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4442
0
                                         dst_width2, dst_height2,
4443
0
                                         args->left_pred_stride);
4444
0
      const int num_planes = av1_num_planes(cm);
4445
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4446
0
                           mi_col, 0, num_planes);
4447
0
      calc_target_weighted_pred(
4448
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4449
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4450
0
    }
4451
0
  }
4452
4453
896k
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4454
4455
  // Set params for mode evaluation
4456
896k
  set_mode_eval_params(cpi, x, MODE_EVAL);
4457
4458
896k
  x->comp_rd_stats_idx = 0;
4459
4460
8.06M
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4461
7.16M
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4462
7.16M
  }
4463
896k
}
4464
4465
static inline void init_single_inter_mode_search_state(
4466
895k
    InterModeSearchState *search_state) {
4467
2.68M
  for (int dir = 0; dir < 2; ++dir) {
4468
8.96M
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4469
35.8M
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4470
28.6M
        SingleInterModeState *state;
4471
4472
28.6M
        state = &search_state->single_state[dir][mode][ref_frame];
4473
28.6M
        state->ref_frame = NONE_FRAME;
4474
28.6M
        state->rd = INT64_MAX;
4475
4476
28.6M
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4477
28.6M
        state->ref_frame = NONE_FRAME;
4478
28.6M
        state->rd = INT64_MAX;
4479
4480
28.6M
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4481
28.6M
      }
4482
7.17M
    }
4483
1.79M
  }
4484
4485
8.06M
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4486
7.17M
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4487
7.17M
    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4488
7.17M
  }
4489
895k
  av1_zero(search_state->single_state_cnt);
4490
895k
  av1_zero(search_state->single_state_modelled_cnt);
4491
895k
}
4492
4493
static inline void init_inter_mode_search_state(
4494
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4495
896k
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4496
896k
  init_intra_mode_search_state(&search_state->intra_search_state);
4497
896k
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4498
4499
896k
  search_state->best_rd = best_rd_so_far;
4500
896k
  search_state->best_skip_rd[0] = INT64_MAX;
4501
896k
  search_state->best_skip_rd[1] = INT64_MAX;
4502
4503
896k
  av1_zero(search_state->best_mbmode);
4504
4505
896k
  search_state->best_rate_y = INT_MAX;
4506
4507
896k
  search_state->best_rate_uv = INT_MAX;
4508
4509
896k
  search_state->best_mode_skippable = 0;
4510
4511
896k
  search_state->best_skip2 = 0;
4512
4513
896k
  search_state->best_mode_index = THR_INVALID;
4514
4515
896k
  const MACROBLOCKD *const xd = &x->e_mbd;
4516
896k
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4517
896k
  const unsigned char segment_id = mbmi->segment_id;
4518
4519
896k
  search_state->num_available_refs = 0;
4520
896k
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4521
896k
  memset(search_state->dist_order_refs, -1,
4522
896k
         sizeof(search_state->dist_order_refs));
4523
4524
7.16M
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4525
6.27M
    search_state->mode_threshold[i] = 0;
4526
896k
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4527
19.6M
  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4528
18.7M
    search_state->mode_threshold[i] =
4529
18.7M
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4530
18.7M
        RD_THRESH_FAC_FRAC_BITS;
4531
4532
896k
  search_state->best_intra_rd = INT64_MAX;
4533
4534
896k
  search_state->best_pred_sse = UINT_MAX;
4535
4536
896k
  av1_zero(search_state->single_newmv);
4537
896k
  av1_zero(search_state->single_newmv_rate);
4538
896k
  av1_zero(search_state->single_newmv_valid);
4539
4.47M
  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4540
14.3M
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4541
96.5M
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4542
85.7M
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4543
85.7M
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4544
85.7M
      }
4545
10.7M
    }
4546
3.57M
  }
4547
4548
3.58M
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4549
2.68M
    search_state->best_pred_rd[i] = INT64_MAX;
4550
2.68M
  }
4551
4552
896k
  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4553
115M
    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4554
114M
      search_state->mode_threshold[i] =
4555
114M
          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4556
114M
          RD_THRESH_FAC_FRAC_BITS;
4557
4558
8.04M
    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4559
28.5M
      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4560
192M
        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4561
171M
          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4562
171M
          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4563
171M
        }
4564
21.4M
      }
4565
7.14M
    }
4566
4567
896k
    init_single_inter_mode_search_state(search_state);
4568
896k
  }
4569
896k
}
4570
4571
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4572
                           const MV_REFERENCE_FRAME *ref_frame,
4573
46.4M
                           const PREDICTION_MODE this_mode) {
4574
46.4M
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4575
4.32M
    return true;
4576
4.32M
  }
4577
4578
42.1M
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4579
46.4M
}
4580
4581
static AOM_FORCE_INLINE int inter_mode_compatible_skip(
4582
    const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize,
4583
132M
    PREDICTION_MODE curr_mode, const MV_REFERENCE_FRAME *ref_frames) {
4584
132M
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4585
132M
  if (comp_pred) {
4586
107M
    if (!is_comp_ref_allowed(bsize)) return 1;
4587
107M
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4588
86.2M
      return 1;
4589
86.2M
    }
4590
4591
21.3M
    const AV1_COMMON *const cm = &cpi->common;
4592
21.3M
    if (frame_is_intra_only(cm)) return 1;
4593
4594
21.3M
    const CurrentFrame *const current_frame = &cm->current_frame;
4595
21.3M
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4596
4597
21.3M
    const struct segmentation *const seg = &cm->seg;
4598
21.3M
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4599
    // Do not allow compound prediction if the segment level reference frame
4600
    // feature is in use as in this case there can only be one reference.
4601
21.3M
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4602
21.3M
  }
4603
4604
46.4M
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4605
    // Mode must be compatible
4606
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4607
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4608
0
  }
4609
4610
46.1M
  return 0;
4611
46.1M
}
4612
4613
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4614
9.74k
                                        BLOCK_SIZE bsize, int mib_size) {
4615
9.74k
  const int sb_size_mask = mib_size - 1;
4616
9.74k
  const MACROBLOCKD *const xd = &x->e_mbd;
4617
9.74k
  const int mi_row = xd->mi_row;
4618
9.74k
  const int mi_col = xd->mi_col;
4619
9.74k
  const int mi_row_in_sb = mi_row & sb_size_mask;
4620
9.74k
  const int mi_col_in_sb = mi_col & sb_size_mask;
4621
9.74k
  const int mi_w = mi_size_wide[bsize];
4622
9.74k
  const int mi_h = mi_size_high[bsize];
4623
9.74k
  int picked_ref_frames_mask = 0;
4624
58.5k
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4625
271k
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4626
222k
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4627
222k
    }
4628
48.7k
  }
4629
9.74k
  return picked_ref_frames_mask;
4630
9.74k
}
4631
4632
// Check if reference frame pair of the current block matches with the given
4633
// block.
4634
static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4635
1.71M
                                       const MV_REFERENCE_FRAME *ref_frames) {
4636
1.71M
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4637
322k
          (ref_frames[1] == mbmi->ref_frame[1]));
4638
1.71M
}
4639
4640
// Case 1: return 0, means don't skip this mode
4641
// Case 2: return 1, means skip this mode completely
4642
// Case 3: return 2, means skip compound only, but still try single motion modes
4643
static AOM_FORCE_INLINE int inter_mode_search_order_independent_skip(
4644
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4645
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4646
46.4M
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4647
46.4M
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4648
35.7M
    return 1;
4649
35.7M
  }
4650
4651
10.6M
  const int ref_type = av1_ref_frame_type(ref_frame);
4652
10.6M
  if (!cpi->sf.rt_sf.use_real_time_ref_set)
4653
10.7M
    if (prune_ref_frame(cpi, x, ref_type)) return 1;
4654
4655
  // This is only used in motion vector unit test.
4656
5.85M
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4657
0
      ref_frame[0] == INTRA_FRAME)
4658
0
    return 1;
4659
4660
5.85M
  const AV1_COMMON *const cm = &cpi->common;
4661
5.85M
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4662
61.4k
    return 1;
4663
61.4k
  }
4664
4665
  // Reuse the prediction mode in cache
4666
5.78M
  if (x->use_mb_mode_cache) {
4667
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4668
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4669
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4670
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4671
4672
    // If the cached mode is intra, then we just need to match the mode.
4673
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4674
0
      return 1;
4675
0
    }
4676
4677
    // If the cached mode is single inter mode, then we match the mode and
4678
    // reference frame.
4679
0
    if (cached_mode_is_single) {
4680
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4681
0
        return 1;
4682
0
      }
4683
0
    } else {
4684
      // If the cached mode is compound, then we need to consider several cases.
4685
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4686
0
      if (mode_is_single) {
4687
        // If the mode is single, we know the modes can't match. But we might
4688
        // still want to search it if compound mode depends on the current mode.
4689
0
        int skip_motion_mode_only = 0;
4690
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4691
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4692
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4693
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4694
0
        } else if (cached_mode == NEW_NEWMV) {
4695
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4696
0
                                   ref_frame[0] == cached_frame[1]);
4697
0
        }
4698
4699
0
        return 1 + skip_motion_mode_only;
4700
0
      } else {
4701
        // If both modes are compound, then everything must match.
4702
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4703
0
            ref_frame[1] != cached_frame[1]) {
4704
0
          return 1;
4705
0
        }
4706
0
      }
4707
0
    }
4708
0
  }
4709
4710
5.78M
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4711
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4712
  // valid partition is required, do not skip mode.
4713
5.78M
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4714
662k
      x->must_find_valid_partition)
4715
0
    return 0;
4716
4717
5.78M
  const SPEED_FEATURES *const sf = &cpi->sf;
4718
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4719
  // frames
4720
5.78M
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4721
5.81M
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4722
1.33M
    const MACROBLOCKD *const xd = &x->e_mbd;
4723
1.33M
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4724
1.06M
        xd->up_available) {
4725
858k
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4726
858k
                                                    { 1, 1, 0 },
4727
858k
                                                    { 2, 1, 0 } };
4728
858k
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4729
4730
858k
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4731
858k
             qindex_sub_range < 3);
4732
858k
      const int num_ref_frame_pair_match_thresh =
4733
858k
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4734
858k
                    [qindex_sub_range];
4735
4736
858k
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4737
858k
             num_ref_frame_pair_match_thresh >= 0);
4738
858k
      int num_ref_frame_pair_match = 0;
4739
4740
858k
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4741
858k
      num_ref_frame_pair_match +=
4742
858k
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4743
4744
      // Pruning based on ref frame pair match with neighbors.
4745
858k
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4746
858k
    }
4747
1.33M
  }
4748
4749
5.26M
  int skip_motion_mode = 0;
4750
5.26M
  if (mbmi->partition != PARTITION_NONE) {
4751
58.2k
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4752
58.2k
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4753
      // Since the compound ref modes depends on the motion estimation result of
4754
      // two single ref modes (best mv of single ref modes as the start point),
4755
      // if current single ref mode is marked skip, we need to check if it will
4756
      // be used in compound ref modes.
4757
11.1k
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4758
        // Found a not skipped compound ref mode which contains current
4759
        // single ref. So this single ref can't be skipped completely
4760
        // Just skip its motion mode search, still try its simple
4761
        // transition mode.
4762
2.17k
        skip_motion_mode = 1;
4763
2.17k
        skip_ref = 0;
4764
2.17k
      }
4765
11.1k
    }
4766
    // If we are reusing the prediction from cache, and the current frame is
4767
    // required by the cache, then we cannot prune it.
4768
58.2k
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4769
0
      skip_ref = 0;
4770
      // If the cache only needs the current reference type for compound
4771
      // prediction, then we can skip motion mode search.
4772
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4773
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4774
0
    }
4775
58.2k
    if (skip_ref) return 1;
4776
58.2k
  }
4777
4778
5.25M
  if (ref_frame[0] == INTRA_FRAME) {
4779
0
    if (mode != DC_PRED) {
4780
      // Disable intra modes other than DC_PRED for blocks with low variance
4781
      // Threshold for intra skipping based on source variance
4782
      // TODO(debargha): Specialize the threshold for super block sizes
4783
0
      const unsigned int skip_intra_var_thresh = 64;
4784
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4785
0
          x->source_variance < skip_intra_var_thresh)
4786
0
        return 1;
4787
0
    }
4788
0
  }
4789
4790
5.25M
  if (skip_motion_mode) return 2;
4791
4792
5.25M
  return 0;
4793
5.25M
}
4794
4795
static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4796
                             const MV_REFERENCE_FRAME *ref_frames,
4797
47.0M
                             const AV1_COMMON *cm) {
4798
47.0M
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4799
47.0M
  mbmi->ref_mv_idx = 0;
4800
47.0M
  mbmi->mode = curr_mode;
4801
47.0M
  mbmi->uv_mode = UV_DC_PRED;
4802
47.0M
  mbmi->ref_frame[0] = ref_frames[0];
4803
47.0M
  mbmi->ref_frame[1] = ref_frames[1];
4804
47.0M
  pmi->palette_size[0] = 0;
4805
47.0M
  pmi->palette_size[1] = 0;
4806
47.0M
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4807
47.0M
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4808
47.0M
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4809
47.0M
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4810
47.0M
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4811
47.0M
}
4812
4813
static inline void collect_single_states(MACROBLOCK *x,
4814
                                         InterModeSearchState *search_state,
4815
144k
                                         const MB_MODE_INFO *const mbmi) {
4816
144k
  int i, j;
4817
144k
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4818
144k
  const PREDICTION_MODE this_mode = mbmi->mode;
4819
18.4E
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4820
144k
  const int mode_offset = INTER_OFFSET(this_mode);
4821
144k
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4822
4823
  // Simple rd
4824
144k
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4825
173k
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4826
28.8k
    const int64_t rd =
4827
28.8k
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4828
28.8k
    if (rd < simple_rd) simple_rd = rd;
4829
28.8k
  }
4830
4831
  // Insertion sort of single_state
4832
144k
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4833
144k
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4834
144k
  i = search_state->single_state_cnt[dir][mode_offset];
4835
144k
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4836
0
    state_s[j] = state_s[j - 1];
4837
144k
  state_s[j] = this_state_s;
4838
144k
  search_state->single_state_cnt[dir][mode_offset]++;
4839
4840
  // Modelled rd
4841
144k
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4842
173k
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4843
28.8k
    const int64_t rd =
4844
28.8k
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4845
28.8k
    if (rd < modelled_rd) modelled_rd = rd;
4846
28.8k
  }
4847
4848
  // Insertion sort of single_state_modelled
4849
144k
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4850
144k
  SingleInterModeState *state_m =
4851
144k
      search_state->single_state_modelled[dir][mode_offset];
4852
144k
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4853
144k
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4854
0
    state_m[j] = state_m[j - 1];
4855
144k
  state_m[j] = this_state_m;
4856
144k
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4857
144k
}
4858
4859
static inline void analyze_single_states(const AV1_COMP *cpi,
4860
0
                                         InterModeSearchState *search_state) {
4861
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4862
0
  assert(prune_level >= 1);
4863
0
  int i, j, dir, mode;
4864
4865
0
  for (dir = 0; dir < 2; ++dir) {
4866
0
    int64_t best_rd;
4867
0
    SingleInterModeState(*state)[FWD_REFS];
4868
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4869
4870
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4871
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4872
    // have same motion vectors). Always keep the best of each mode
4873
    // because it might form the best possible combination with other mode.
4874
0
    state = search_state->single_state[dir];
4875
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4876
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4877
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4878
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4879
0
        if (state[mode][i].rd != INT64_MAX &&
4880
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4881
0
          state[mode][i].valid = 0;
4882
0
        }
4883
0
      }
4884
0
    }
4885
4886
0
    state = search_state->single_state_modelled[dir];
4887
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4888
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4889
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4890
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4891
0
        if (state[mode][i].rd != INT64_MAX &&
4892
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4893
0
          state[mode][i].valid = 0;
4894
0
        }
4895
0
      }
4896
0
    }
4897
0
  }
4898
4899
  // Ordering by simple rd first, then by modelled rd
4900
0
  for (dir = 0; dir < 2; ++dir) {
4901
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4902
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4903
0
      const int state_cnt_m =
4904
0
          search_state->single_state_modelled_cnt[dir][mode];
4905
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4906
0
      SingleInterModeState *state_m =
4907
0
          search_state->single_state_modelled[dir][mode];
4908
0
      int count = 0;
4909
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4910
0
      for (i = 0; i < state_cnt_s; ++i) {
4911
0
        if (state_s[i].rd == INT64_MAX) break;
4912
0
        if (state_s[i].valid) {
4913
0
          search_state->single_rd_order[dir][mode][count++] =
4914
0
              state_s[i].ref_frame;
4915
0
        }
4916
0
      }
4917
0
      if (count >= max_candidates) continue;
4918
4919
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4920
0
        if (state_m[i].rd == INT64_MAX) break;
4921
0
        if (!state_m[i].valid) continue;
4922
0
        const int ref_frame = state_m[i].ref_frame;
4923
0
        int match = 0;
4924
        // Check if existing already
4925
0
        for (j = 0; j < count; ++j) {
4926
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4927
0
            match = 1;
4928
0
            break;
4929
0
          }
4930
0
        }
4931
0
        if (match) continue;
4932
        // Check if this ref_frame is removed in simple rd
4933
0
        int valid = 1;
4934
0
        for (j = 0; j < state_cnt_s; ++j) {
4935
0
          if (ref_frame == state_s[j].ref_frame) {
4936
0
            valid = state_s[j].valid;
4937
0
            break;
4938
0
          }
4939
0
        }
4940
0
        if (valid) {
4941
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4942
0
        }
4943
0
      }
4944
0
    }
4945
0
  }
4946
0
}
4947
4948
static int compound_skip_get_candidates(
4949
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4950
0
    const int dir, const PREDICTION_MODE mode) {
4951
0
  const int mode_offset = INTER_OFFSET(mode);
4952
0
  const SingleInterModeState *state =
4953
0
      search_state->single_state[dir][mode_offset];
4954
0
  const SingleInterModeState *state_modelled =
4955
0
      search_state->single_state_modelled[dir][mode_offset];
4956
4957
0
  int max_candidates = 0;
4958
0
  for (int i = 0; i < FWD_REFS; ++i) {
4959
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4960
0
    max_candidates++;
4961
0
  }
4962
4963
0
  int candidates = max_candidates;
4964
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4965
0
    candidates = AOMMIN(2, max_candidates);
4966
0
  }
4967
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4968
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4969
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4970
0
      candidates = 1;
4971
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4972
0
  }
4973
4974
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4975
    // Limit the number of candidates to 1 in each direction for compound
4976
    // prediction
4977
0
    candidates = AOMMIN(1, candidates);
4978
0
  }
4979
0
  return candidates;
4980
0
}
4981
4982
static AOM_FORCE_INLINE int compound_skip_by_single_states(
4983
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4984
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4985
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4986
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4987
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4988
0
                        compound_ref1_mode(this_mode) };
4989
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4990
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4991
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4992
0
  int ref_searched[2] = { 0, 0 };
4993
0
  int ref_mv_match[2] = { 1, 1 };
4994
0
  int i, j;
4995
4996
0
  for (i = 0; i < 2; ++i) {
4997
0
    const SingleInterModeState *state =
4998
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
4999
0
    const int state_cnt =
5000
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
5001
0
    for (j = 0; j < state_cnt; ++j) {
5002
0
      if (state[j].ref_frame == refs[i]) {
5003
0
        ref_searched[i] = 1;
5004
0
        break;
5005
0
      }
5006
0
    }
5007
0
  }
5008
5009
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
5010
0
  for (i = 0; i < 2; ++i) {
5011
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
5012
0
      continue;
5013
0
    }
5014
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
5015
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
5016
0
      int_mv single_mv;
5017
0
      int_mv comp_mv;
5018
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
5019
0
                  &x->mbmi_ext);
5020
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
5021
0
      if (single_mv.as_int != comp_mv.as_int) {
5022
0
        ref_mv_match[i] = 0;
5023
0
        break;
5024
0
      }
5025
0
    }
5026
0
  }
5027
5028
0
  for (i = 0; i < 2; ++i) {
5029
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
5030
0
    const int candidates =
5031
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
5032
0
    const MV_REFERENCE_FRAME *ref_order =
5033
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
5034
0
    int match = 0;
5035
0
    for (j = 0; j < candidates; ++j) {
5036
0
      if (refs[i] == ref_order[j]) {
5037
0
        match = 1;
5038
0
        break;
5039
0
      }
5040
0
    }
5041
0
    if (!match) return 1;
5042
0
  }
5043
5044
0
  return 0;
5045
0
}
5046
5047
// Check if ref frames of current block matches with given block.
5048
static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
5049
                                   const MV_REFERENCE_FRAME *ref_frames,
5050
0
                                   int *const is_ref_match) {
5051
0
  if (is_inter_block(mbmi)) {
5052
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
5053
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
5054
0
    if (has_second_ref(mbmi)) {
5055
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
5056
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
5057
0
    }
5058
0
  }
5059
0
}
5060
5061
// Prune compound mode using ref frames of neighbor blocks.
5062
static inline int compound_skip_using_neighbor_refs(
5063
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
5064
572k
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
5065
  // Exclude non-extended compound modes from pruning
5066
572k
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5067
411k
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5068
366k
    return 0;
5069
5070
205k
  if (prune_ext_comp_using_neighbors >= 3) return 1;
5071
5072
18.4E
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
5073
                                // 1 - match for backward refs
5074
  // Check if ref frames of this block matches with left neighbor.
5075
18.4E
  if (xd->left_available)
5076
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
5077
5078
  // Check if ref frames of this block matches with above neighbor.
5079
18.4E
  if (xd->up_available)
5080
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
5081
5082
  // Combine ref frame match with neighbors in forward and backward refs.
5083
18.4E
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
5084
5085
  // Pruning based on ref frame match with neighbors.
5086
18.4E
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
5087
18.4E
  return 1;
5088
18.4E
}
5089
5090
// Update best single mode for the given reference frame based on simple rd.
5091
static inline void update_best_single_mode(InterModeSearchState *search_state,
5092
                                           const PREDICTION_MODE this_mode,
5093
                                           const MV_REFERENCE_FRAME ref_frame,
5094
4.28M
                                           int64_t this_rd) {
5095
4.28M
  if (this_rd < search_state->best_single_rd[ref_frame]) {
5096
1.79M
    search_state->best_single_rd[ref_frame] = this_rd;
5097
1.79M
    search_state->best_single_mode[ref_frame] = this_mode;
5098
1.79M
  }
5099
4.28M
}
5100
5101
// Prune compound mode using best single mode for the same reference.
5102
static inline int skip_compound_using_best_single_mode_ref(
5103
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
5104
    const PREDICTION_MODE *best_single_mode,
5105
366k
    int prune_comp_using_best_single_mode_ref) {
5106
  // Exclude non-extended compound modes from pruning
5107
366k
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5108
205k
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5109
366k
    return 0;
5110
5111
366k
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
5112
25
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
5113
  // Get ref frame direction corresponding to NEWMV
5114
  // 0 - NEWMV corresponding to forward direction
5115
  // 1 - NEWMV corresponding to backward direction
5116
25
  const int newmv_dir = comp_mode_ref0 != NEWMV;
5117
5118
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
5119
  // have NEWMV as single mode winner.
5120
  // Example: For an extended-compound mode,
5121
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
5122
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
5123
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
5124
  //   ALTREF_FRAME is NEWMV
5125
25
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
5126
25
  if (single_mode == NEWMV) return 0;
5127
5128
  // Avoid pruning the compound mode when best single mode is not available
5129
25
  if (prune_comp_using_best_single_mode_ref == 1)
5130
0
    if (single_mode == MB_MODE_COUNT) return 0;
5131
25
  return 1;
5132
25
}
5133
5134
1.03M
static int compare_int64(const void *a, const void *b) {
5135
1.03M
  int64_t a64 = *((int64_t *)a);
5136
1.03M
  int64_t b64 = *((int64_t *)b);
5137
1.03M
  if (a64 < b64) {
5138
253k
    return -1;
5139
785k
  } else if (a64 == b64) {
5140
614k
    return 0;
5141
614k
  } else {
5142
170k
    return 1;
5143
170k
  }
5144
1.03M
}
5145
5146
static inline void update_search_state(
5147
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
5148
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
5149
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
5150
2.55M
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
5151
2.55M
  const MACROBLOCKD *xd = &x->e_mbd;
5152
2.55M
  const MB_MODE_INFO *mbmi = xd->mi[0];
5153
2.55M
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5154
2.55M
  const int skip_txfm =
5155
2.55M
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
5156
5157
2.55M
  search_state->best_rd = new_best_rd_stats->rdcost;
5158
2.55M
  search_state->best_mode_index = new_best_mode;
5159
2.55M
  *best_rd_stats_dst = *new_best_rd_stats;
5160
2.55M
  search_state->best_mbmode = *mbmi;
5161
2.55M
  search_state->best_skip2 = skip_txfm;
5162
2.55M
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
5163
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
5164
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
5165
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
5166
  // These two values will be updated when av1_txfm_search is called.
5167
2.55M
  if (txfm_search_done) {
5168
2.46M
    search_state->best_rate_y =
5169
2.46M
        new_best_rd_stats_y->rate +
5170
2.46M
        x->mode_costs.skip_txfm_cost[skip_ctx]
5171
2.46M
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
5172
2.46M
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
5173
2.46M
  }
5174
2.55M
  search_state->best_y_rdcost = *new_best_rd_stats_y;
5175
2.55M
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5176
2.55M
}
5177
5178
// Find the best RD for a reference frame (among single reference modes)
5179
// and store +10% of it in the 0-th element in ref_frame_rd.
5180
104k
static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
5181
104k
  assert(ref_frame_rd[0] == INT64_MAX);
5182
104k
  int64_t ref_copy[REF_FRAMES - 1];
5183
104k
  memcpy(ref_copy, ref_frame_rd + 1,
5184
104k
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
5185
104k
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
5186
5187
104k
  int64_t cutoff = ref_copy[0];
5188
  // The cut-off is within 10% of the best.
5189
104k
  if (cutoff != INT64_MAX) {
5190
104k
    assert(cutoff < INT64_MAX / 200);
5191
104k
    cutoff = (110 * cutoff) / 100;
5192
104k
  }
5193
104k
  ref_frame_rd[0] = cutoff;
5194
104k
}
5195
5196
// Check if either frame is within the cutoff.
5197
static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
5198
                                        MV_REFERENCE_FRAME frame1,
5199
789k
                                        MV_REFERENCE_FRAME frame2) {
5200
789k
  assert(frame2 > 0);
5201
789k
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
5202
96.0k
         ref_frame_rd[frame2] <= ref_frame_rd[0];
5203
789k
}
5204
5205
static inline void evaluate_motion_mode_for_winner_candidates(
5206
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
5207
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
5208
    PICK_MODE_CONTEXT *const ctx,
5209
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
5210
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
5211
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
5212
843k
    InterModeSearchState *const search_state, int64_t *yrd) {
5213
843k
  const AV1_COMMON *const cm = &cpi->common;
5214
843k
  const int num_planes = av1_num_planes(cm);
5215
843k
  MACROBLOCKD *const xd = &x->e_mbd;
5216
843k
  MB_MODE_INFO *const mbmi = xd->mi[0];
5217
843k
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
5218
843k
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
5219
5220
2.52M
  for (int cand = 0; cand < num_best_cand; cand++) {
5221
1.68M
    RD_STATS rd_stats;
5222
1.68M
    RD_STATS rd_stats_y;
5223
1.68M
    RD_STATS rd_stats_uv;
5224
1.68M
    av1_init_rd_stats(&rd_stats);
5225
1.68M
    av1_init_rd_stats(&rd_stats_y);
5226
1.68M
    av1_init_rd_stats(&rd_stats_uv);
5227
1.68M
    int rate_mv;
5228
5229
1.68M
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
5230
1.68M
    args->skip_motion_mode =
5231
1.68M
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
5232
1.68M
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
5233
1.68M
    rd_stats.rate =
5234
1.68M
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
5235
5236
    // Continue if the best candidate is compound.
5237
1.68M
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
5238
5239
1.64M
    x->txfm_search_info.skip_txfm = 0;
5240
1.64M
    struct macroblockd_plane *pd = xd->plane;
5241
1.64M
    const BUFFER_SET orig_dst = {
5242
1.64M
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
5243
1.64M
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
5244
1.64M
    };
5245
5246
1.64M
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5247
    // Initialize motion mode to simple translation
5248
    // Calculation of switchable rate depends on it.
5249
1.64M
    mbmi->motion_mode = 0;
5250
1.64M
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5251
4.62M
    for (int i = 0; i < num_planes; i++) {
5252
2.97M
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5253
2.97M
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5254
2.97M
    }
5255
5256
1.64M
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
5257
1.64M
                           search_state->best_skip_rd[1] };
5258
1.64M
    int64_t this_yrd = INT64_MAX;
5259
1.64M
    int64_t ret_value = motion_mode_rd(
5260
1.64M
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
5261
1.64M
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
5262
1.64M
        do_tx_search, inter_modes_info, 1, &this_yrd);
5263
5264
1.64M
    if (ret_value != INT64_MAX) {
5265
92.6k
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5266
92.6k
      const THR_MODES mode_enum = get_prediction_mode_idx(
5267
92.6k
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5268
      // Collect mode stats for multiwinner mode processing
5269
92.6k
      store_winner_mode_stats(
5270
92.6k
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
5271
92.6k
          mode_enum, NULL, bsize, rd_stats.rdcost,
5272
92.6k
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
5273
5274
92.6k
      int64_t best_scaled_rd = search_state->best_rd;
5275
92.6k
      int64_t this_scaled_rd = rd_stats.rdcost;
5276
92.6k
      if (search_state->best_mode_index != THR_INVALID)
5277
92.5k
        increase_motion_mode_rd(&search_state->best_mbmode, mbmi,
5278
92.5k
                                &best_scaled_rd, &this_scaled_rd,
5279
92.5k
                                cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
5280
92.5k
                                cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
5281
5282
92.6k
      if (this_scaled_rd < best_scaled_rd) {
5283
84.1k
        *yrd = this_yrd;
5284
84.1k
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5285
84.1k
                            &rd_stats_uv, mode_enum, x, do_tx_search);
5286
84.1k
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
5287
84.1k
      }
5288
92.6k
    }
5289
1.64M
  }
5290
843k
}
5291
5292
/*!\cond */
5293
// Arguments for speed feature pruning of inter mode search
5294
typedef struct {
5295
  int *skip_motion_mode;
5296
  mode_skip_mask_t *mode_skip_mask;
5297
  InterModeSearchState *search_state;
5298
  int skip_ref_frame_mask;
5299
  int reach_first_comp_mode;
5300
  int mode_thresh_mul_fact;
5301
  int num_single_modes_processed;
5302
  int prune_cpd_using_sr_stats_ready;
5303
} InterModeSFArgs;
5304
/*!\endcond */
5305
5306
static AOM_FORCE_INLINE int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x,
5307
                                            const BLOCK_SIZE bsize,
5308
                                            int64_t *ref_frame_rd, int midx,
5309
                                            InterModeSFArgs *args,
5310
139M
                                            int is_low_temp_var) {
5311
139M
  const SPEED_FEATURES *const sf = &cpi->sf;
5312
139M
  MACROBLOCKD *const xd = &x->e_mbd;
5313
  // Get the actual prediction mode we are trying in this iteration
5314
139M
  const THR_MODES mode_enum = av1_default_mode_order[midx];
5315
139M
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5316
139M
  const PREDICTION_MODE this_mode = mode_def->mode;
5317
139M
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5318
139M
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5319
139M
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5320
139M
  const int comp_pred = second_ref_frame > INTRA_FRAME;
5321
5322
139M
  if (ref_frame == INTRA_FRAME) return 1;
5323
5324
139M
  const FRAME_UPDATE_TYPE update_type =
5325
139M
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5326
139M
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5327
8.26M
      comp_pred) {
5328
6.78M
    return 1;
5329
6.78M
  }
5330
5331
  // This is for real time encoding.
5332
132M
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5333
0
      this_mode != NEARESTMV)
5334
0
    return 1;
5335
5336
  // Check if this mode should be skipped because it is incompatible with the
5337
  // current frame
5338
132M
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5339
86.2M
    return 1;
5340
46.1M
  const int ret = inter_mode_search_order_independent_skip(
5341
46.1M
      cpi, x, args->mode_skip_mask, args->search_state,
5342
46.1M
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5343
46.1M
  if (ret == 1) return 1;
5344
5.00M
  *(args->skip_motion_mode) = (ret == 2);
5345
5346
  // We've reached the first compound prediction mode, get stats from the
5347
  // single reference predictors to help with pruning.
5348
  // Disable this pruning logic if interpolation filter search was skipped for
5349
  // single prediction modes as it can result in aggressive pruning of compound
5350
  // prediction modes due to the absence of modelled_rd populated by
5351
  // av1_interpolation_filter_search().
5352
  // TODO(Remya): Check the impact of the sf
5353
  // 'prune_comp_search_by_single_result' if compound prediction modes are
5354
  // enabled in future for REALTIME encode.
5355
5.00M
  if (!sf->interp_sf.skip_interp_filter_search &&
5356
144k
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5357
0
      args->reach_first_comp_mode == 0) {
5358
0
    analyze_single_states(cpi, args->search_state);
5359
0
    args->reach_first_comp_mode = 1;
5360
0
  }
5361
5362
  // Prune aggressively when best mode is skippable.
5363
5.00M
  int mul_fact = args->search_state->best_mode_skippable
5364
5.00M
                     ? args->mode_thresh_mul_fact
5365
5.00M
                     : (1 << MODE_THRESH_QBITS);
5366
5.00M
  int64_t mode_threshold =
5367
5.00M
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5368
5.00M
      MODE_THRESH_QBITS;
5369
5370
5.00M
  if (args->search_state->best_rd < mode_threshold) return 1;
5371
5372
  // Skip this compound mode based on the RD results from the single prediction
5373
  // modes
5374
4.97M
  if (!sf->interp_sf.skip_interp_filter_search &&
5375
144k
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5376
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5377
0
                                       ref_frame, second_ref_frame, x))
5378
0
      return 1;
5379
0
  }
5380
5381
5.24M
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5382
    // After we done with single reference modes, find the 2nd best RD
5383
    // for a reference frame. Only search compound modes that have a reference
5384
    // frame at least as good as the 2nd best.
5385
789k
    if (!args->prune_cpd_using_sr_stats_ready &&
5386
104k
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5387
104k
      find_top_ref(ref_frame_rd);
5388
104k
      args->prune_cpd_using_sr_stats_ready = 1;
5389
104k
    }
5390
789k
    if (args->prune_cpd_using_sr_stats_ready &&
5391
789k
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5392
11.4k
      return 1;
5393
789k
  }
5394
5395
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5396
4.96M
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5397
5.23M
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5398
205k
    return 1;
5399
205k
  }
5400
5401
5.03M
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5402
572k
    if (compound_skip_using_neighbor_refs(
5403
572k
            xd, this_mode, ref_frames,
5404
572k
            sf->inter_sf.prune_ext_comp_using_neighbors))
5405
205k
      return 1;
5406
572k
  }
5407
5408
4.82M
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5409
366k
    if (skip_compound_using_best_single_mode_ref(
5410
366k
            this_mode, ref_frames, args->search_state->best_single_mode,
5411
366k
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5412
0
      return 1;
5413
366k
  }
5414
5415
4.68M
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5416
4.31M
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5417
4.31M
    if (skip_nearest_near_mv_using_refmv_weight(
5418
4.31M
            x, this_mode, ref_frame_type,
5419
4.31M
            args->search_state->best_mbmode.mode)) {
5420
      // Ensure the mode is pruned only when the current block has obtained a
5421
      // valid inter mode.
5422
181k
      assert(is_inter_mode(args->search_state->best_mbmode.mode));
5423
181k
      return 1;
5424
181k
    }
5425
4.31M
  }
5426
5427
4.36M
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5428
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5429
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5430
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5431
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5432
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5433
0
        return 1;
5434
0
    }
5435
0
  }
5436
5437
4.36M
  return 0;
5438
4.36M
}
5439
5440
static void record_best_compound(REFERENCE_MODE reference_mode,
5441
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5442
                                 InterModeSearchState *search_state,
5443
1.86M
                                 int compmode_cost) {
5444
1.86M
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5445
5446
1.86M
  if (reference_mode == REFERENCE_MODE_SELECT) {
5447
1.86M
    single_rate = rd_stats->rate - compmode_cost;
5448
1.86M
    hybrid_rate = rd_stats->rate;
5449
1.86M
  } else {
5450
36
    single_rate = rd_stats->rate;
5451
36
    hybrid_rate = rd_stats->rate + compmode_cost;
5452
36
  }
5453
5454
1.86M
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5455
1.86M
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5456
5457
1.86M
  if (!comp_pred) {
5458
1.82M
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5459
1.77M
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5460
1.82M
  } else {
5461
38.9k
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5462
38.2k
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5463
38.9k
  }
5464
1.86M
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5465
1.81M
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5466
1.86M
}
5467
5468
// Does a transform search over a list of the best inter mode candidates.
5469
// This is called if the original mode search computed an RD estimate
5470
// for the transform search rather than doing a full search.
5471
static void tx_search_best_inter_candidates(
5472
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5473
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5474
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5475
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5476
38.3k
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5477
38.3k
  AV1_COMMON *const cm = &cpi->common;
5478
38.3k
  MACROBLOCKD *const xd = &x->e_mbd;
5479
38.3k
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5480
38.3k
  const ModeCosts *mode_costs = &x->mode_costs;
5481
38.3k
  const int num_planes = av1_num_planes(cm);
5482
38.3k
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5483
38.3k
  MB_MODE_INFO *const mbmi = xd->mi[0];
5484
38.3k
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5485
38.3k
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5486
38.3k
  search_state->best_rd = best_rd_so_far;
5487
38.3k
  search_state->best_mode_index = THR_INVALID;
5488
  // Initialize best mode stats for winner mode processing
5489
38.3k
  x->winner_mode_count = 0;
5490
38.3k
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5491
38.3k
                          NULL, bsize, best_rd_so_far,
5492
38.3k
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5493
38.3k
  inter_modes_info->num =
5494
38.3k
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5495
38.3k
          ? inter_modes_info->num
5496
38.3k
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5497
38.3k
  const int64_t top_est_rd =
5498
38.3k
      inter_modes_info->num > 0
5499
38.3k
          ? inter_modes_info
5500
38.2k
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5501
38.3k
          : INT64_MAX;
5502
38.3k
  *yrd = INT64_MAX;
5503
38.3k
  int64_t best_rd_in_this_partition = INT64_MAX;
5504
38.3k
  int num_inter_mode_cands = inter_modes_info->num;
5505
38.3k
  int newmv_mode_evaled = 0;
5506
38.3k
  int max_allowed_cands = INT_MAX;
5507
38.3k
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5508
    // The bound on the no. of inter mode candidates, beyond which the
5509
    // candidates are limited if a newmv mode got evaluated, is set as
5510
    // max_allowed_cands + 1.
5511
34.1k
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5512
34.1k
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5513
34.1k
    max_allowed_cands =
5514
34.1k
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5515
34.1k
  }
5516
5517
38.3k
  int num_mode_thresh = INT_MAX;
5518
38.3k
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5519
    // Bound the no. of transform searches per prediction mode beyond a
5520
    // threshold.
5521
36.5k
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5522
36.5k
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5523
36.5k
    num_mode_thresh =
5524
36.5k
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5525
36.5k
  }
5526
5527
38.3k
  int num_tx_cands = 0;
5528
38.3k
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5529
  // Iterate over best inter mode candidates and perform tx search
5530
177k
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5531
139k
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5532
139k
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5533
139k
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5534
139k
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5535
139k
    if (curr_est_rd * 0.80 > top_est_rd) break;
5536
5537
139k
    if (num_tx_cands > num_mode_thresh) {
5538
17.4k
      if ((prediction_mode != NEARESTMV &&
5539
9.04k
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5540
10.7k
          (prediction_mode == NEARESTMV &&
5541
8.37k
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5542
7.89k
        continue;
5543
17.4k
    }
5544
5545
131k
    txfm_info->skip_txfm = 0;
5546
131k
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5547
5548
    // Select prediction reference frames.
5549
131k
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5550
365k
    for (int i = 0; i < num_planes; i++) {
5551
234k
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5552
234k
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5553
234k
    }
5554
5555
131k
    bool is_predictor_built = false;
5556
5557
    // Initialize RD stats
5558
131k
    RD_STATS rd_stats;
5559
131k
    RD_STATS rd_stats_y;
5560
131k
    RD_STATS rd_stats_uv;
5561
131k
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5562
131k
    int64_t skip_rd = INT64_MAX;
5563
131k
    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5564
131k
        cm->seq_params->enable_masked_compound,
5565
131k
        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5566
131k
        /*eval_motion_mode=*/0);
5567
131k
    if (txfm_rd_gate_level) {
5568
      // Check if the mode is good enough based on skip RD
5569
127k
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5570
127k
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5571
127k
      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5572
127k
                                      skip_rd, txfm_rd_gate_level, 0);
5573
127k
      if (!eval_txfm) continue;
5574
127k
    }
5575
5576
    // Build the prediction for this mode
5577
100k
    if (!is_predictor_built) {
5578
100k
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5579
100k
                                    av1_num_planes(cm) - 1);
5580
100k
    }
5581
100k
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5582
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5583
0
    }
5584
5585
100k
    num_tx_cands++;
5586
100k
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5587
100k
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5588
100k
    int64_t this_yrd = INT64_MAX;
5589
    // Do the transform search
5590
100k
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5591
100k
                         mode_rate, search_state->best_rd)) {
5592
53.3k
      continue;
5593
53.3k
    } else {
5594
47.5k
      const int y_rate =
5595
47.5k
          rd_stats.skip_txfm
5596
47.5k
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5597
47.5k
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5598
47.5k
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5599
5600
47.5k
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5601
33.3k
        inter_mode_data_push(
5602
33.3k
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5603
33.3k
            rd_stats_y.rate + rd_stats_uv.rate +
5604
33.3k
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5605
33.3k
      }
5606
47.5k
    }
5607
5608
47.5k
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5609
5610
47.5k
    const THR_MODES mode_enum = get_prediction_mode_idx(
5611
47.5k
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5612
5613
    // Collect mode stats for multiwinner mode processing
5614
47.5k
    const int txfm_search_done = 1;
5615
47.5k
    store_winner_mode_stats(
5616
47.5k
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5617
47.5k
        NULL, bsize, rd_stats.rdcost,
5618
47.5k
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5619
5620
47.5k
    int64_t best_scaled_rd = search_state->best_rd;
5621
47.5k
    int64_t this_scaled_rd = rd_stats.rdcost;
5622
47.5k
    increase_motion_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5623
47.5k
                            &this_scaled_rd,
5624
47.5k
                            cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
5625
47.5k
                            cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
5626
47.5k
    if (this_scaled_rd < best_rd_in_this_partition) {
5627
47.4k
      best_rd_in_this_partition = rd_stats.rdcost;
5628
47.4k
      *yrd = this_yrd;
5629
47.4k
    }
5630
5631
47.5k
    if (this_scaled_rd < best_scaled_rd) {
5632
47.4k
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5633
47.4k
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5634
47.4k
      search_state->best_skip_rd[0] = skip_rd;
5635
      // Limit the total number of modes to be evaluated if the first is valid
5636
      // and transform skip or compound
5637
47.4k
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5638
45.3k
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5639
          // Evaluate more candidates at high quantizers where occurrence of
5640
          // transform skip is high.
5641
2.69k
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5642
2.69k
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5643
2.69k
          num_inter_mode_cands =
5644
2.69k
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5645
42.6k
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5646
1.34k
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5647
          // Evaluate more candidates at low quantizers where occurrence of
5648
          // single reference mode is high.
5649
1.34k
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5650
1.34k
                                                { 10, 7, 5, 3 } };
5651
1.34k
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5652
1.34k
          num_inter_mode_cands = AOMMIN(
5653
1.34k
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5654
1.34k
        }
5655
45.3k
      }
5656
47.4k
    }
5657
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5658
    // a newmv mode was evaluated already.
5659
47.5k
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5660
47.5k
  }
5661
38.3k
}
5662
5663
// Indicates number of winner simple translation modes to be used
5664
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5665
5666
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5667
// speed feature. This list consists of modes that have only searched
5668
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5669
// modes after the initial RD search.
5670
static void handle_winner_cand(
5671
    MB_MODE_INFO *const mbmi,
5672
    motion_mode_best_st_candidate *best_motion_mode_cands,
5673
    int max_winner_motion_mode_cand, int64_t this_rd,
5674
1.76M
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5675
  // Number of current motion mode candidates in list
5676
1.76M
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5677
1.76M
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5678
5679
  // find the best location to insert new motion mode candidate
5680
1.85M
  for (int j = 0; j < num_motion_mode_cand; j++) {
5681
984k
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5682
899k
      valid_motion_mode_cand_loc = j;
5683
899k
      break;
5684
899k
    }
5685
984k
  }
5686
5687
  // Insert motion mode if location is found
5688
1.76M
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5689
1.75M
    if (num_motion_mode_cand > 0 &&
5690
920k
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5691
906k
      memmove(
5692
906k
          &best_motion_mode_cands
5693
906k
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5694
906k
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5695
906k
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5696
906k
           valid_motion_mode_cand_loc) *
5697
906k
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5698
1.75M
    motion_mode_cand->mbmi = *mbmi;
5699
1.75M
    motion_mode_cand->rd_cost = this_rd;
5700
1.75M
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5701
1.75M
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5702
1.75M
        *motion_mode_cand;
5703
1.75M
    best_motion_mode_cands->num_motion_mode_cand =
5704
1.75M
        AOMMIN(max_winner_motion_mode_cand,
5705
1.75M
               best_motion_mode_cands->num_motion_mode_cand + 1);
5706
1.75M
  }
5707
1.76M
}
5708
5709
/*!\brief Search intra modes in interframes
5710
 *
5711
 * \ingroup intra_mode_search
5712
 *
5713
 * This function searches for the best intra mode when the current frame is an
5714
 * interframe. This function however does *not* handle luma palette mode.
5715
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5716
 *
5717
 * This function will first iterate through the luma mode candidates to find the
5718
 * best luma intra mode. Once the best luma mode it's found, it will then search
5719
 * for the best chroma mode. Because palette mode is currently not handled by
5720
 * here, a cache of uv mode is stored in
5721
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5722
 * av1_search_palette_mode.
5723
 *
5724
 * \param[in,out] search_state      Struct keep track of the prediction mode
5725
 *                                  search state in interframe.
5726
 *
5727
 * \param[in]     cpi               Top-level encoder structure.
5728
 * \param[in,out] x                 Pointer to struct holding all the data for
5729
 *                                  the current prediction block.
5730
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5731
 *                                  prediction modes searched.
5732
 * \param[in]     bsize             Current block size.
5733
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5734
 *                                  copy the tx_type and txfm_skip arrays.
5735
 *                                  for only the Y plane.
5736
 * \param[in]     sf_args           Stores the list of intra mode candidates
5737
 *                                  to be searched.
5738
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5739
 *                                      current ref frame is an intra frame.
5740
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5741
 *                                  terminate chroma intra mode search.
5742
 *
5743
 * \remark If a new best mode is found, search_state and rd_costs are updated
5744
 * correspondingly. While x is also modified, it is only used as a temporary
5745
 * buffer, and the final decisions are stored in search_state.
5746
 */
5747
static inline void search_intra_modes_in_interframe(
5748
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5749
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5750
    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5751
896k
    int64_t yrd_threshold) {
5752
896k
  const AV1_COMMON *const cm = &cpi->common;
5753
896k
  const SPEED_FEATURES *const sf = &cpi->sf;
5754
896k
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5755
896k
  MACROBLOCKD *const xd = &x->e_mbd;
5756
896k
  MB_MODE_INFO *const mbmi = xd->mi[0];
5757
896k
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5758
5759
896k
  int is_best_y_mode_intra = 0;
5760
896k
  RD_STATS best_intra_rd_stats_y;
5761
896k
  int64_t best_rd_y = INT64_MAX;
5762
896k
  int best_mode_cost_y = -1;
5763
896k
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5764
896k
  THR_MODES best_mode_enum = THR_INVALID;
5765
896k
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5766
896k
  const int num_4x4 = bsize_to_num_blk(bsize);
5767
5768
  // Performs luma search
5769
896k
  int64_t best_model_rd = INT64_MAX;
5770
896k
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5771
4.48M
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5772
3.58M
    top_intra_model_rd[i] = INT64_MAX;
5773
3.58M
  }
5774
5775
896k
  if (cpi->oxcf.algo_cfg.sharpness) {
5776
0
    int bh = mi_size_high[bsize];
5777
0
    int bw = mi_size_wide[bsize];
5778
0
    if (bh > 4 || bw > 4) return;
5779
0
  }
5780
5781
896k
  mbmi->skip_txfm = 0;
5782
5783
46.2M
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5784
45.4M
    if (sf->intra_sf.skip_intra_in_interframe &&
5785
45.4M
        search_state->intra_search_state.skip_intra_modes)
5786
153k
      break;
5787
45.3M
    set_y_mode_and_delta_angle(
5788
45.3M
        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5789
45.3M
    assert(mbmi->mode < INTRA_MODE_END);
5790
5791
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5792
45.3M
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5793
695k
      continue;
5794
5795
44.6M
    const THR_MODES mode_enum =
5796
44.6M
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5797
44.6M
    if ((!intra_mode_cfg->enable_smooth_intra ||
5798
44.6M
         cpi->sf.intra_sf.disable_smooth_intra) &&
5799
44.6M
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5800
43.1M
         mbmi->mode == SMOOTH_V_PRED))
5801
2.19M
      continue;
5802
42.4M
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5803
0
      continue;
5804
42.4M
    if (av1_is_directional_mode(mbmi->mode) &&
5805
40.9M
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5806
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5807
0
      continue;
5808
42.4M
    const PREDICTION_MODE this_mode = mbmi->mode;
5809
5810
42.4M
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5811
42.4M
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5812
42.4M
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5813
42.4M
    x->txfm_search_info.skip_txfm = 0;
5814
5815
42.4M
    if (this_mode != DC_PRED) {
5816
      // Only search the oblique modes if the best so far is
5817
      // one of the neighboring directional modes
5818
41.6M
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5819
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5820
0
        if (search_state->best_mode_index != THR_INVALID &&
5821
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5822
0
          continue;
5823
0
      }
5824
41.6M
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5825
0
        if (conditional_skipintra(
5826
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5827
0
          continue;
5828
0
      }
5829
41.6M
    }
5830
5831
42.4M
    RD_STATS intra_rd_stats_y;
5832
42.4M
    int mode_cost_y;
5833
42.4M
    int64_t intra_rd_y = INT64_MAX;
5834
42.4M
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5835
42.4M
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5836
42.4M
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5837
42.4M
        &best_model_rd, top_intra_model_rd);
5838
5839
42.4M
    if (intra_rd_y < INT64_MAX) {
5840
1.44M
      adjust_cost(cpi, x, &intra_rd_y, /*is_inter_pred=*/false);
5841
1.44M
    }
5842
5843
42.4M
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5844
933k
      is_best_y_mode_intra = 1;
5845
933k
      if (intra_rd_y < best_rd_y) {
5846
629k
        best_intra_rd_stats_y = intra_rd_stats_y;
5847
629k
        best_mode_cost_y = mode_cost_y;
5848
629k
        best_rd_y = intra_rd_y;
5849
629k
        best_mbmi = *mbmi;
5850
629k
        best_mode_enum = mode_enum;
5851
629k
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5852
629k
      }
5853
933k
    }
5854
42.4M
  }
5855
5856
896k
  if (!is_best_y_mode_intra) {
5857
280k
    return;
5858
280k
  }
5859
5860
896k
  assert(best_rd_y < INT64_MAX);
5861
5862
  // Restores the best luma mode
5863
616k
  *mbmi = best_mbmi;
5864
616k
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5865
5866
  // Performs chroma search
5867
616k
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5868
616k
  av1_init_rd_stats(&intra_rd_stats);
5869
616k
  av1_init_rd_stats(&intra_rd_stats_uv);
5870
616k
  const int num_planes = av1_num_planes(cm);
5871
616k
  if (num_planes > 1) {
5872
260k
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5873
260k
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5874
260k
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5875
5876
260k
    if (!intra_uv_mode_valid) {
5877
282
      return;
5878
282
    }
5879
260k
  }
5880
5881
  // Merge the luma and chroma rd stats
5882
616k
  assert(best_mode_cost_y >= 0);
5883
616k
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5884
616k
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5885
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5886
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5887
    // (prediction granularity), so we account for it in the full rate,
5888
    // not the tokenonly rate.
5889
582k
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5890
582k
  }
5891
5892
616k
  const ModeCosts *mode_costs = &x->mode_costs;
5893
616k
  const PREDICTION_MODE mode = mbmi->mode;
5894
616k
  if (num_planes > 1 && xd->is_chroma_ref) {
5895
260k
    const int uv_mode_cost =
5896
260k
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5897
260k
    intra_rd_stats.rate +=
5898
260k
        intra_rd_stats_uv.rate +
5899
260k
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5900
260k
  }
5901
5902
  // Intra block is always coded as non-skip
5903
616k
  intra_rd_stats.skip_txfm = 0;
5904
616k
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5905
  // Add in the cost of the no skip flag.
5906
616k
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5907
616k
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5908
  // Calculate the final RD estimate for this mode.
5909
616k
  const int64_t this_rd =
5910
616k
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5911
  // Keep record of best intra rd
5912
616k
  if (this_rd < search_state->best_intra_rd) {
5913
616k
    search_state->best_intra_rd = this_rd;
5914
616k
    intra_search_state->best_intra_mode = mode;
5915
616k
  }
5916
5917
2.46M
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5918
1.84M
    search_state->best_pred_rd[i] =
5919
1.84M
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5920
1.84M
  }
5921
5922
616k
  intra_rd_stats.rdcost = this_rd;
5923
5924
616k
  adjust_rdcost(cpi, x, &intra_rd_stats, /*is_inter_pred=*/false);
5925
5926
  // Collect mode stats for multiwinner mode processing
5927
616k
  const int txfm_search_done = 1;
5928
616k
  store_winner_mode_stats(
5929
616k
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5930
616k
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5931
616k
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5932
616k
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5933
606k
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5934
606k
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5935
606k
                        best_mode_enum, x, txfm_search_done);
5936
606k
  }
5937
616k
}
5938
5939
// Initialize the table that stores best RD Costs of transform no-split.
5940
static inline void init_top_tx_no_split_rd_for_inter_modes(
5941
896k
    MACROBLOCK *x, int prune_inter_tx_split_rd_eval_lvl) {
5942
896k
  if (!prune_inter_tx_split_rd_eval_lvl) return;
5943
5944
4.47M
  for (int i = 0; i < MAX_TX_BLOCKS_IN_MAX_SB; i++) {
5945
17.9M
    for (int j = 0; j < TOP_INTER_TX_NO_SPLIT_COUNT; j++) {
5946
14.3M
      x->top_inter_tx_no_split_rd[i][j] = INT64_MAX;
5947
14.3M
    }
5948
3.58M
  }
5949
896k
}
5950
5951
#if !CONFIG_REALTIME_ONLY
5952
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5953
// features in intra mode pruning.
5954
static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5955
                                                MACROBLOCK *x, BLOCK_SIZE bsize,
5956
                                                int mi_row, int mi_col,
5957
                                                int64_t *inter_cost,
5958
896k
                                                int64_t *intra_cost) {
5959
896k
  const AV1_COMMON *const cm = &cpi->common;
5960
  // Only consider full SB.
5961
896k
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5962
896k
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5963
896k
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5964
896k
                  (block_size_high[sb_size] / tpl_bsize_1d);
5965
896k
  SuperBlockEnc *sb_enc = &x->sb_enc;
5966
896k
  if (sb_enc->tpl_data_count == len) {
5967
186k
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5968
186k
    const int tpl_stride = sb_enc->tpl_stride;
5969
186k
    const int tplw = mi_size_wide[tpl_bsize];
5970
186k
    const int tplh = mi_size_high[tpl_bsize];
5971
186k
    const int nw = mi_size_wide[bsize] / tplw;
5972
186k
    const int nh = mi_size_high[bsize] / tplh;
5973
186k
    if (nw >= 1 && nh >= 1) {
5974
1.26k
      const int of_h = mi_row % mi_size_high[sb_size];
5975
1.26k
      const int of_w = mi_col % mi_size_wide[sb_size];
5976
1.26k
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5977
5978
4.11k
      for (int k = 0; k < nh; k++) {
5979
10.8k
        for (int l = 0; l < nw; l++) {
5980
8.03k
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5981
8.03k
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5982
8.03k
        }
5983
2.85k
      }
5984
1.26k
      *inter_cost /= nw * nh;
5985
1.26k
      *intra_cost /= nw * nh;
5986
1.26k
    }
5987
186k
  }
5988
896k
}
5989
#endif  // !CONFIG_REALTIME_ONLY
5990
5991
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5992
// intra mode search.
5993
static inline void skip_intra_modes_in_interframe(
5994
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5995
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5996
897k
    int64_t inter_cost, int64_t intra_cost) {
5997
897k
  MACROBLOCKD *const xd = &x->e_mbd;
5998
897k
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5999
897k
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
6000
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
6001
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
6002
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
6003
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
6004
0
        x->source_variance > 128) {
6005
0
      search_state->intra_search_state.skip_intra_modes = 1;
6006
0
      return;
6007
0
    }
6008
0
  }
6009
6010
897k
  const unsigned int src_var_thresh_intra_skip = 1;
6011
897k
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
6012
897k
  if (!(skip_intra_in_interframe &&
6013
897k
        (x->source_variance > src_var_thresh_intra_skip)))
6014
2.34k
    return;
6015
6016
  // Prune intra search based on best inter mode being transfrom skip.
6017
894k
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
6018
152k
    const int qindex_thresh[2] = { 200, MAXQ };
6019
152k
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
6020
152k
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
6021
114k
        (x->qindex <= qindex_thresh[ind])) {
6022
114k
      search_state->intra_search_state.skip_intra_modes = 1;
6023
114k
      return;
6024
114k
    } else if ((skip_intra_in_interframe >= 4) &&
6025
38.2k
               (inter_cost < 0 || intra_cost < 0)) {
6026
37.9k
      search_state->intra_search_state.skip_intra_modes = 1;
6027
37.9k
      return;
6028
37.9k
    }
6029
152k
  }
6030
  // Use ML model to prune intra search.
6031
742k
  if (inter_cost >= 0 && intra_cost >= 0) {
6032
1.05k
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
6033
1.05k
                                     ? &av1_intrap_nn_config
6034
1.05k
                                     : &av1_intrap_hd_nn_config;
6035
1.05k
    float nn_features[6];
6036
1.05k
    float scores[2] = { 0.0f };
6037
6038
1.05k
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
6039
1.05k
    nn_features[1] = (float)mi_size_wide_log2[bsize];
6040
1.05k
    nn_features[2] = (float)mi_size_high_log2[bsize];
6041
1.05k
    nn_features[3] = (float)intra_cost;
6042
1.05k
    nn_features[4] = (float)inter_cost;
6043
1.05k
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
6044
1.05k
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
6045
1.05k
    nn_features[5] = (float)(ac_q_max / ac_q);
6046
6047
1.05k
    av1_nn_predict(nn_features, nn_config, 1, scores);
6048
6049
    // For two parameters, the max prob returned from av1_nn_softmax equals
6050
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
6051
    // calling of av1_nn_softmax.
6052
1.05k
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
6053
1.05k
    assert(skip_intra_in_interframe <= 5);
6054
1.05k
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
6055
0
      search_state->intra_search_state.skip_intra_modes = 1;
6056
0
    }
6057
1.05k
  }
6058
742k
}
6059
6060
static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
6061
4.64M
                                             int is_single_pred) {
6062
4.64M
  const MODE encoding_mode = cpi->oxcf.mode;
6063
4.64M
  if (encoding_mode == REALTIME) {
6064
0
    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
6065
0
            (cpi->sf.interp_sf.skip_interp_filter_search ||
6066
0
             cpi->sf.winner_mode_sf.winner_mode_ifs));
6067
4.64M
  } else if (encoding_mode == GOOD) {
6068
    // Skip interpolation filter search for single prediction modes.
6069
4.64M
    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
6070
4.64M
  }
6071
161
  return false;
6072
4.64M
}
6073
6074
static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
6075
896k
                                     BLOCK_SIZE bsize) {
6076
896k
  const AV1_COMMON *const cm = &cpi->common;
6077
896k
  const SPEED_FEATURES *const sf = &cpi->sf;
6078
6079
896k
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
6080
0
      !sf->rt_sf.short_circuit_low_temp_var ||
6081
896k
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
6082
896k
    return 0;
6083
896k
  }
6084
6085
18.4E
  const int mi_row = x->e_mbd.mi_row;
6086
18.4E
  const int mi_col = x->e_mbd.mi_col;
6087
18.4E
  int is_low_temp_var = 0;
6088
6089
18.4E
  if (cm->seq_params->sb_size == BLOCK_64X64)
6090
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
6091
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6092
18.4E
  else
6093
18.4E
    is_low_temp_var = av1_get_force_skip_low_temp_var(
6094
18.4E
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6095
6096
18.4E
  return is_low_temp_var;
6097
896k
}
6098
6099
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
6100
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
6101
                            struct macroblock *x, struct RD_STATS *rd_cost,
6102
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
6103
896k
                            int64_t best_rd_so_far) {
6104
896k
  AV1_COMMON *const cm = &cpi->common;
6105
896k
  const FeatureFlags *const features = &cm->features;
6106
896k
  const int num_planes = av1_num_planes(cm);
6107
896k
  const SPEED_FEATURES *const sf = &cpi->sf;
6108
896k
  MACROBLOCKD *const xd = &x->e_mbd;
6109
896k
  MB_MODE_INFO *const mbmi = xd->mi[0];
6110
896k
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
6111
896k
  int i;
6112
896k
  const ModeCosts *mode_costs = &x->mode_costs;
6113
896k
  const int *comp_inter_cost =
6114
896k
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6115
6116
896k
  InterModeSearchState search_state;
6117
896k
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
6118
896k
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
6119
896k
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
6120
896k
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
6121
896k
  };
6122
6123
896k
  init_top_tx_no_split_rd_for_inter_modes(
6124
896k
      x, sf->tx_sf.prune_inter_tx_split_rd_eval_lvl);
6125
6126
896k
  HandleInterModeArgs args = { { NULL },
6127
896k
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
6128
896k
                               { NULL },
6129
896k
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
6130
896k
                                 MAX_SB_SIZE >> 1 },
6131
896k
                               NULL,
6132
896k
                               NULL,
6133
896k
                               NULL,
6134
896k
                               search_state.modelled_rd,
6135
896k
                               INT_MAX,
6136
896k
                               INT_MAX,
6137
896k
                               search_state.simple_rd,
6138
896k
                               0,
6139
896k
                               false,
6140
896k
                               interintra_modes,
6141
896k
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
6142
896k
                               { { 0, 0 } },
6143
896k
                               { 0 },
6144
896k
                               0,
6145
896k
                               0,
6146
896k
                               -1,
6147
896k
                               -1,
6148
896k
                               -1,
6149
896k
                               { 0 },
6150
896k
                               { 0 },
6151
896k
                               UINT_MAX };
6152
  // Currently, is_low_temp_var is used in real time encoding.
6153
896k
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
6154
6155
26.8M
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
6156
  // Indicates the appropriate number of simple translation winner modes for
6157
  // exhaustive motion mode evaluation
6158
896k
  const int max_winner_motion_mode_cand =
6159
896k
      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
6160
896k
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
6161
896k
  motion_mode_candidate motion_mode_cand;
6162
896k
  motion_mode_best_st_candidate best_motion_mode_cands;
6163
  // Initializing the number of motion mode candidates to zero.
6164
896k
  best_motion_mode_cands.num_motion_mode_cand = 0;
6165
9.86M
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
6166
8.96M
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
6167
6168
8.06M
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6169
6170
896k
  av1_invalid_rd_stats(rd_cost);
6171
6172
8.06M
  for (i = 0; i < REF_FRAMES; ++i) {
6173
7.16M
    x->warp_sample_info[i].num = -1;
6174
7.16M
  }
6175
6176
  // Ref frames that are selected by square partition blocks.
6177
896k
  int picked_ref_frames_mask = 0;
6178
896k
  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
6179
895k
      mbmi->partition != PARTITION_NONE) {
6180
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
6181
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
6182
    // implies prune for vert, horiz and extended partition blocks.
6183
10.5k
    if ((mbmi->partition != PARTITION_VERT &&
6184
648
         mbmi->partition != PARTITION_HORZ) ||
6185
10.5k
        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
6186
9.74k
      picked_ref_frames_mask =
6187
9.74k
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
6188
9.74k
    }
6189
10.5k
  }
6190
6191
#if CONFIG_COLLECT_COMPONENT_TIMING
6192
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
6193
#endif
6194
  // Skip ref frames that never selected by square blocks.
6195
896k
  const int skip_ref_frame_mask =
6196
896k
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
6197
896k
  mode_skip_mask_t mode_skip_mask;
6198
896k
  unsigned int ref_costs_single[REF_FRAMES];
6199
896k
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6200
896k
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
6201
  // init params, set frame modes, speed features
6202
896k
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
6203
896k
                                skip_ref_frame_mask, ref_costs_single,
6204
896k
                                ref_costs_comp, yv12_mb);
6205
#if CONFIG_COLLECT_COMPONENT_TIMING
6206
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
6207
#endif
6208
6209
896k
  int64_t best_est_rd = INT64_MAX;
6210
896k
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
6211
  // If do_tx_search is 0, only estimated RD should be computed.
6212
  // If do_tx_search is 1, all modes have TX search performed.
6213
896k
  const int do_tx_search =
6214
896k
      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
6215
869k
        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
6216
685k
         num_pels_log2_lookup[bsize] > 8));
6217
896k
  InterModesInfo *inter_modes_info = x->inter_modes_info;
6218
896k
  inter_modes_info->num = 0;
6219
6220
  // Temporary buffers used by handle_inter_mode().
6221
896k
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
6222
6223
  // The best RD found for the reference frame, among single reference modes.
6224
  // Note that the 0-th element will contain a cut-off that is later used
6225
  // to determine if we should skip a compound mode.
6226
896k
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
6227
896k
                                       INT64_MAX, INT64_MAX, INT64_MAX,
6228
896k
                                       INT64_MAX, INT64_MAX };
6229
6230
  // Prepared stats used later to check if we could skip intra mode eval.
6231
896k
  int64_t inter_cost = -1;
6232
896k
  int64_t intra_cost = -1;
6233
  // Need to tweak the threshold for hdres speed 0 & 1.
6234
896k
  const int mi_row = xd->mi_row;
6235
896k
  const int mi_col = xd->mi_col;
6236
6237
  // Obtain the relevant tpl stats for pruning inter modes
6238
896k
  PruneInfoFromTpl inter_cost_info_from_tpl;
6239
896k
#if !CONFIG_REALTIME_ONLY
6240
896k
  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
6241
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
6242
    // prune_ref_by_selective_ref_frame()
6243
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
6244
    // prune_ref_by_selective_ref_frame()
6245
    // Populating valid_refs[idx] = 1 ensures that
6246
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
6247
    // pruned ref frame.
6248
894k
    int valid_refs[INTER_REFS_PER_FRAME];
6249
7.16M
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
6250
6.26M
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
6251
6.26M
      valid_refs[frame - 1] =
6252
6.26M
          x->tpl_keep_ref_frame[frame] ||
6253
6.05M
          !prune_ref_by_selective_ref_frame(
6254
6.05M
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
6255
6.26M
    }
6256
894k
    av1_zero(inter_cost_info_from_tpl);
6257
894k
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
6258
894k
                              &inter_cost_info_from_tpl);
6259
894k
  }
6260
6261
896k
  const int do_pruning =
6262
896k
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
6263
896k
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
6264
896k
      cpi->oxcf.algo_cfg.enable_tpl_model)
6265
896k
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
6266
896k
                                 &intra_cost);
6267
896k
#endif  // !CONFIG_REALTIME_ONLY
6268
6269
  // Initialize best mode stats for winner mode processing.
6270
896k
  const int max_winner_mode_count =
6271
896k
      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
6272
896k
  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
6273
896k
  x->winner_mode_count = 0;
6274
896k
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
6275
896k
                          NULL, bsize, best_rd_so_far,
6276
896k
                          sf->winner_mode_sf.multi_winner_mode_type, 0);
6277
6278
896k
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
6279
896k
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
6280
    // Higher multiplication factor values for lower quantizers.
6281
896k
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
6282
896k
  }
6283
6284
  // Initialize arguments for mode loop speed features
6285
896k
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
6286
896k
                              &mode_skip_mask,
6287
896k
                              &search_state,
6288
896k
                              skip_ref_frame_mask,
6289
896k
                              0,
6290
896k
                              mode_thresh_mul_fact,
6291
896k
                              0,
6292
896k
                              0 };
6293
896k
  int64_t best_inter_yrd = INT64_MAX;
6294
6295
  // This is the main loop of this function. It loops over all possible inter
6296
  // modes and calls handle_inter_mode() to compute the RD for each.
6297
  // Here midx is just an iterator index that should not be used by itself
6298
  // except to keep track of the number of modes searched. It should be used
6299
  // with av1_default_mode_order to get the enum that defines the mode, which
6300
  // can be used with av1_mode_defs to get the prediction mode and the ref
6301
  // frames.
6302
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
6303
  // good speedup for real time case. If we decide to use compound mode in real
6304
  // time, maybe we can modify av1_default_mode_order table.
6305
896k
  THR_MODES mode_start = THR_INTER_MODE_START;
6306
896k
  THR_MODES mode_end = THR_INTER_MODE_END;
6307
896k
  const CurrentFrame *const current_frame = &cm->current_frame;
6308
896k
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
6309
0
    mode_start = SINGLE_REF_MODE_START;
6310
0
    mode_end = SINGLE_REF_MODE_END;
6311
0
  }
6312
896k
  init_comp_avg_est_rd(x, sf->inter_sf.skip_cmp_using_top_cmp_avg_est_rd_lvl);
6313
140M
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
6314
    // Get the actual prediction mode we are trying in this iteration
6315
139M
    const THR_MODES mode_enum = av1_default_mode_order[midx];
6316
139M
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
6317
139M
    const PREDICTION_MODE this_mode = mode_def->mode;
6318
139M
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
6319
6320
139M
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
6321
139M
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
6322
139M
    const int is_single_pred =
6323
139M
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
6324
139M
    const int comp_pred = second_ref_frame > INTRA_FRAME;
6325
6326
139M
    txfm_info->skip_txfm = 0;
6327
139M
    sf_args.num_single_modes_processed += is_single_pred;
6328
#if CONFIG_COLLECT_COMPONENT_TIMING
6329
    start_timing(cpi, skip_inter_mode_time);
6330
#endif
6331
    // Apply speed features to decide if this inter mode can be skipped
6332
139M
    const int is_skip_inter_mode = skip_inter_mode(
6333
139M
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
6334
#if CONFIG_COLLECT_COMPONENT_TIMING
6335
    end_timing(cpi, skip_inter_mode_time);
6336
#endif
6337
139M
    if (is_skip_inter_mode) continue;
6338
6339
4.59M
    init_mbmi(mbmi, this_mode, ref_frames, cm);
6340
4.59M
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
6341
6342
    // Select prediction reference frames.
6343
13.2M
    for (i = 0; i < num_planes; i++) {
6344
8.68M
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
6345
8.68M
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
6346
8.68M
    }
6347
6348
4.59M
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
6349
4.59M
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
6350
4.59M
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
6351
4.59M
    mbmi->ref_mv_idx = 0;
6352
6353
4.59M
    const int64_t ref_best_rd = search_state.best_rd;
6354
4.59M
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
6355
4.59M
    av1_init_rd_stats(&rd_stats);
6356
6357
4.59M
    const int ref_frame_cost = comp_pred
6358
4.59M
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
6359
4.59M
                                   : ref_costs_single[ref_frame];
6360
4.59M
    const int compmode_cost =
6361
18.4E
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6362
4.59M
    const int real_compmode_cost =
6363
4.59M
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6364
4.64M
            ? compmode_cost
6365
18.4E
            : 0;
6366
    // Point to variables that are maintained between loop iterations
6367
4.59M
    args.single_newmv = search_state.single_newmv;
6368
4.59M
    args.single_newmv_rate = search_state.single_newmv_rate;
6369
4.59M
    args.single_newmv_valid = search_state.single_newmv_valid;
6370
4.59M
    args.single_comp_cost = real_compmode_cost;
6371
4.59M
    args.ref_frame_cost = ref_frame_cost;
6372
4.59M
    args.best_pred_sse = search_state.best_pred_sse;
6373
4.59M
    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6374
4.59M
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6375
4.59M
                           search_state.best_skip_rd[1] };
6376
4.59M
    int64_t this_yrd = INT64_MAX;
6377
#if CONFIG_COLLECT_COMPONENT_TIMING
6378
    start_timing(cpi, handle_inter_mode_time);
6379
#endif
6380
4.59M
    int64_t this_rd = handle_inter_mode(
6381
4.59M
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6382
4.59M
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6383
4.59M
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6384
4.59M
        &this_yrd);
6385
#if CONFIG_COLLECT_COMPONENT_TIMING
6386
    end_timing(cpi, handle_inter_mode_time);
6387
#endif
6388
4.64M
    if (current_frame->reference_mode != SINGLE_REFERENCE) {
6389
4.64M
      if (!args.skip_ifs &&
6390
510k
          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6391
510k
          is_inter_singleref_mode(this_mode)) {
6392
144k
        collect_single_states(x, &search_state, mbmi);
6393
144k
      }
6394
6395
4.64M
      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6396
4.64M
          is_inter_singleref_mode(this_mode))
6397
4.28M
        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6398
4.64M
    }
6399
6400
4.59M
    if (this_rd == INT64_MAX) continue;
6401
6402
1.81M
    if (mbmi->skip_txfm) {
6403
241k
      rd_stats_y.rate = 0;
6404
241k
      rd_stats_uv.rate = 0;
6405
241k
    }
6406
6407
1.86M
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6408
1.82M
        this_rd < ref_frame_rd[ref_frame]) {
6409
1.79M
      ref_frame_rd[ref_frame] = this_rd;
6410
1.79M
    }
6411
6412
1.81M
    adjust_cost(cpi, x, &this_rd, /*is_inter_pred=*/true);
6413
1.81M
    adjust_rdcost(cpi, x, &rd_stats, /*is_inter_pred=*/true);
6414
6415
    // Did this mode help, i.e., is it the new best mode
6416
1.81M
    if (this_rd < search_state.best_rd) {
6417
1.81M
      assert(IMPLIES(comp_pred,
6418
1.81M
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
6419
1.81M
      search_state.best_pred_sse = x->pred_sse[ref_frame];
6420
1.81M
      best_inter_yrd = this_yrd;
6421
1.81M
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6422
1.81M
                          &rd_stats_uv, mode_enum, x, do_tx_search);
6423
1.81M
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6424
      // skip_rd[0] is the best total rd for a skip mode so far.
6425
      // skip_rd[1] is the best total rd for a skip mode so far in luma.
6426
      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6427
      // When do_tx_search = 0, skip_rd[1] is updated.
6428
1.81M
      search_state.best_skip_rd[1] = skip_rd[1];
6429
1.81M
    }
6430
1.81M
    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6431
      // Add this mode to motion mode candidate list for motion mode search
6432
      // if using motion_mode_for_winner_cand speed feature
6433
1.76M
      handle_winner_cand(mbmi, &best_motion_mode_cands,
6434
1.76M
                         max_winner_motion_mode_cand, this_rd,
6435
1.76M
                         &motion_mode_cand, args.skip_motion_mode);
6436
1.76M
    }
6437
6438
    /* keep record of best compound/single-only prediction */
6439
1.81M
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6440
1.81M
                         x->rdmult, &search_state, compmode_cost);
6441
1.81M
  }
6442
6443
#if CONFIG_COLLECT_COMPONENT_TIMING
6444
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6445
#endif
6446
896k
  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6447
    // For the single ref winner candidates, evaluate other motion modes (non
6448
    // simple translation).
6449
844k
    evaluate_motion_mode_for_winner_candidates(
6450
844k
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6451
844k
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6452
844k
        &search_state, &best_inter_yrd);
6453
844k
  }
6454
#if CONFIG_COLLECT_COMPONENT_TIMING
6455
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6456
#endif
6457
6458
#if CONFIG_COLLECT_COMPONENT_TIMING
6459
  start_timing(cpi, do_tx_search_time);
6460
#endif
6461
896k
  if (do_tx_search != 1) {
6462
    // A full tx search has not yet been done, do tx search for
6463
    // top mode candidates
6464
38.3k
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6465
38.3k
                                    yv12_mb, mi_row, mi_col, &search_state,
6466
38.3k
                                    rd_cost, ctx, &best_inter_yrd);
6467
38.3k
  }
6468
#if CONFIG_COLLECT_COMPONENT_TIMING
6469
  end_timing(cpi, do_tx_search_time);
6470
#endif
6471
6472
#if CONFIG_COLLECT_COMPONENT_TIMING
6473
  start_timing(cpi, handle_intra_mode_time);
6474
#endif
6475
  // Gate intra mode evaluation if best of inter is skip except when source
6476
  // variance is extremely low and also based on max intra bsize.
6477
896k
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6478
896k
                                 intra_cost);
6479
6480
896k
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6481
896k
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6482
896k
                                   &sf_args, intra_ref_frame_cost,
6483
896k
                                   best_inter_yrd);
6484
#if CONFIG_COLLECT_COMPONENT_TIMING
6485
  end_timing(cpi, handle_intra_mode_time);
6486
#endif
6487
6488
#if CONFIG_COLLECT_COMPONENT_TIMING
6489
  start_timing(cpi, refine_winner_mode_tx_time);
6490
#endif
6491
896k
  int winner_mode_count =
6492
896k
      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6493
  // In effect only when fast tx search speed features are enabled.
6494
896k
  refine_winner_mode_tx(
6495
896k
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6496
896k
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6497
896k
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6498
#if CONFIG_COLLECT_COMPONENT_TIMING
6499
  end_timing(cpi, refine_winner_mode_tx_time);
6500
#endif
6501
6502
  // Initialize default mode evaluation params
6503
896k
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6504
6505
  // Only try palette mode when the best mode so far is an intra mode.
6506
896k
  const int try_palette =
6507
896k
      cpi->oxcf.tool_cfg.enable_palette &&
6508
896k
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6509
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6510
896k
  RD_STATS this_rd_cost;
6511
896k
  int this_skippable = 0;
6512
896k
  if (try_palette) {
6513
#if CONFIG_COLLECT_COMPONENT_TIMING
6514
    start_timing(cpi, av1_search_palette_mode_time);
6515
#endif
6516
0
    this_skippable = av1_search_palette_mode(
6517
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6518
0
        ctx, &this_rd_cost, search_state.best_rd);
6519
#if CONFIG_COLLECT_COMPONENT_TIMING
6520
    end_timing(cpi, av1_search_palette_mode_time);
6521
#endif
6522
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6523
0
      search_state.best_mode_index = THR_DC;
6524
0
      mbmi->mv[0].as_int = 0;
6525
0
      rd_cost->rate = this_rd_cost.rate;
6526
0
      rd_cost->dist = this_rd_cost.dist;
6527
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6528
0
      search_state.best_rd = rd_cost->rdcost;
6529
0
      search_state.best_mbmode = *mbmi;
6530
0
      search_state.best_skip2 = 0;
6531
0
      search_state.best_mode_skippable = this_skippable;
6532
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6533
0
    }
6534
0
  }
6535
6536
896k
  search_state.best_mbmode.skip_mode = 0;
6537
896k
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6538
197k
      cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) {
6539
197k
    const struct segmentation *const seg = &cm->seg;
6540
197k
    unsigned char segment_id = mbmi->segment_id;
6541
197k
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6542
197k
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6543
197k
    }
6544
197k
  }
6545
6546
  // Make sure that the ref_mv_idx is only nonzero when we're
6547
  // using a mode which can support ref_mv_idx
6548
896k
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6549
31.2k
      !(search_state.best_mbmode.mode == NEWMV ||
6550
8.88k
        search_state.best_mbmode.mode == NEW_NEWMV ||
6551
8.64k
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6552
0
    search_state.best_mbmode.ref_mv_idx = 0;
6553
0
  }
6554
6555
896k
  if (search_state.best_mode_index == THR_INVALID ||
6556
890k
      search_state.best_rd >= best_rd_so_far) {
6557
7.23k
    rd_cost->rate = INT_MAX;
6558
7.23k
    rd_cost->rdcost = INT64_MAX;
6559
7.23k
    return;
6560
7.23k
  }
6561
6562
889k
  const InterpFilter interp_filter = features->interp_filter;
6563
889k
  assert((interp_filter == SWITCHABLE) ||
6564
889k
         (interp_filter ==
6565
889k
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6566
889k
         !is_inter_block(&search_state.best_mbmode));
6567
889k
  assert((interp_filter == SWITCHABLE) ||
6568
889k
         (interp_filter ==
6569
889k
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6570
889k
         !is_inter_block(&search_state.best_mbmode));
6571
6572
889k
  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6573
858k
    av1_update_rd_thresh_fact(
6574
858k
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6575
858k
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6576
858k
  }
6577
6578
  // macroblock modes
6579
889k
  *mbmi = search_state.best_mbmode;
6580
889k
  txfm_info->skip_txfm |= search_state.best_skip2;
6581
6582
  // Note: this section is needed since the mode may have been forced to
6583
  // GLOBALMV by the all-zero mode handling of ref-mv.
6584
889k
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6585
    // Correct the interp filters for GLOBALMV
6586
1.49k
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6587
1.49k
      int_interpfilters filters =
6588
1.49k
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6589
1.49k
      assert(mbmi->interp_filters.as_int == filters.as_int);
6590
1.49k
      (void)filters;
6591
1.49k
    }
6592
1.49k
  }
6593
6594
889k
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6595
6596
889k
  assert(search_state.best_mode_index != THR_INVALID);
6597
6598
#if CONFIG_INTERNAL_STATS
6599
  store_coding_context(x, ctx, search_state.best_mode_index,
6600
                       search_state.best_mode_skippable);
6601
#else
6602
889k
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6603
889k
#endif  // CONFIG_INTERNAL_STATS
6604
6605
889k
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6606
0
    assert(try_palette);
6607
0
    av1_restore_uv_color_map(cpi, x);
6608
0
  }
6609
889k
}
6610
6611
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6612
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6613
                                        int mi_row, int mi_col,
6614
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6615
                                        PICK_MODE_CONTEXT *ctx,
6616
0
                                        int64_t best_rd_so_far) {
6617
0
  const AV1_COMMON *const cm = &cpi->common;
6618
0
  const FeatureFlags *const features = &cm->features;
6619
0
  MACROBLOCKD *const xd = &x->e_mbd;
6620
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6621
0
  unsigned char segment_id = mbmi->segment_id;
6622
0
  const int comp_pred = 0;
6623
0
  int i;
6624
0
  unsigned int ref_costs_single[REF_FRAMES];
6625
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6626
0
  const ModeCosts *mode_costs = &x->mode_costs;
6627
0
  const int *comp_inter_cost =
6628
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6629
0
  InterpFilter best_filter = SWITCHABLE;
6630
0
  int64_t this_rd = INT64_MAX;
6631
0
  int rate2 = 0;
6632
0
  const int64_t distortion2 = 0;
6633
0
  (void)mi_row;
6634
0
  (void)mi_col;
6635
0
  (void)tile_data;
6636
6637
0
  av1_collect_neighbors_ref_counts(xd);
6638
6639
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6640
0
                           ref_costs_comp);
6641
6642
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6643
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6644
6645
0
  rd_cost->rate = INT_MAX;
6646
6647
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6648
6649
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6650
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6651
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6652
0
  mbmi->mode = GLOBALMV;
6653
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6654
0
  mbmi->uv_mode = UV_DC_PRED;
6655
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6656
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6657
0
  else
6658
0
    mbmi->ref_frame[0] = LAST_FRAME;
6659
0
  mbmi->ref_frame[1] = NONE_FRAME;
6660
0
  mbmi->mv[0].as_int =
6661
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6662
0
                           features->allow_high_precision_mv, bsize, mi_col,
6663
0
                           mi_row, features->cur_frame_force_integer_mv)
6664
0
          .as_int;
6665
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6666
0
  x->txfm_search_info.skip_txfm = 1;
6667
6668
0
  mbmi->ref_mv_idx = 0;
6669
6670
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6671
0
  av1_count_overlappable_neighbors(cm, xd);
6672
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6673
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6674
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6675
    // Select the samples according to motion vector difference
6676
0
    if (mbmi->num_proj_ref > 1) {
6677
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6678
0
                                             mbmi->num_proj_ref, bsize);
6679
0
    }
6680
0
  }
6681
6682
0
  const InterpFilter interp_filter = features->interp_filter;
6683
0
  set_default_interp_filters(mbmi, interp_filter);
6684
6685
0
  if (interp_filter != SWITCHABLE) {
6686
0
    best_filter = interp_filter;
6687
0
  } else {
6688
0
    best_filter = EIGHTTAP_REGULAR;
6689
0
    if (av1_is_interp_needed(xd)) {
6690
0
      int rs;
6691
0
      int best_rs = INT_MAX;
6692
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6693
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6694
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6695
0
                                     cm->seq_params->enable_dual_filter);
6696
0
        if (rs < best_rs) {
6697
0
          best_rs = rs;
6698
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6699
0
        }
6700
0
      }
6701
0
    }
6702
0
  }
6703
  // Set the appropriate filter
6704
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6705
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6706
0
                                   cm->seq_params->enable_dual_filter);
6707
6708
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6709
0
    rate2 += comp_inter_cost[comp_pred];
6710
6711
  // Estimate the reference frame signaling cost and add it
6712
  // to the rolling cost variable.
6713
0
  rate2 += ref_costs_single[LAST_FRAME];
6714
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6715
6716
0
  rd_cost->rate = rate2;
6717
0
  rd_cost->dist = distortion2;
6718
0
  rd_cost->rdcost = this_rd;
6719
6720
0
  if (this_rd >= best_rd_so_far) {
6721
0
    rd_cost->rate = INT_MAX;
6722
0
    rd_cost->rdcost = INT64_MAX;
6723
0
    return;
6724
0
  }
6725
6726
0
  assert((interp_filter == SWITCHABLE) ||
6727
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6728
6729
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6730
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6731
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6732
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6733
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6734
0
  }
6735
6736
#if CONFIG_INTERNAL_STATS
6737
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6738
#else
6739
0
  store_coding_context(x, ctx, 0);
6740
0
#endif  // CONFIG_INTERNAL_STATS
6741
0
}
6742
6743
/*!\cond */
6744
struct calc_target_weighted_pred_ctxt {
6745
  const OBMCBuffer *obmc_buffer;
6746
  const uint8_t *tmp;
6747
  int tmp_stride;
6748
  int overlap;
6749
};
6750
/*!\endcond */
6751
6752
static inline void calc_target_weighted_pred_above(
6753
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6754
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6755
0
  (void)nb_mi;
6756
0
  (void)num_planes;
6757
0
  (void)rel_mi_row;
6758
0
  (void)dir;
6759
6760
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6761
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6762
6763
0
  const int bw = xd->width << MI_SIZE_LOG2;
6764
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6765
6766
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6767
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6768
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6769
0
  const int is_hbd = is_cur_buf_hbd(xd);
6770
6771
0
  if (!is_hbd) {
6772
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6773
0
      const uint8_t m0 = mask1d[row];
6774
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6775
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6776
0
        wsrc[col] = m1 * tmp[col];
6777
0
        mask[col] = m0;
6778
0
      }
6779
0
      wsrc += bw;
6780
0
      mask += bw;
6781
0
      tmp += ctxt->tmp_stride;
6782
0
    }
6783
0
  } else {
6784
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6785
6786
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6787
0
      const uint8_t m0 = mask1d[row];
6788
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6789
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6790
0
        wsrc[col] = m1 * tmp16[col];
6791
0
        mask[col] = m0;
6792
0
      }
6793
0
      wsrc += bw;
6794
0
      mask += bw;
6795
0
      tmp16 += ctxt->tmp_stride;
6796
0
    }
6797
0
  }
6798
0
}
6799
6800
static inline void calc_target_weighted_pred_left(
6801
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6802
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6803
0
  (void)nb_mi;
6804
0
  (void)num_planes;
6805
0
  (void)rel_mi_col;
6806
0
  (void)dir;
6807
6808
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6809
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6810
6811
0
  const int bw = xd->width << MI_SIZE_LOG2;
6812
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6813
6814
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6815
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6816
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6817
0
  const int is_hbd = is_cur_buf_hbd(xd);
6818
6819
0
  if (!is_hbd) {
6820
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6821
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6822
0
        const uint8_t m0 = mask1d[col];
6823
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6824
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6825
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6826
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6827
0
      }
6828
0
      wsrc += bw;
6829
0
      mask += bw;
6830
0
      tmp += ctxt->tmp_stride;
6831
0
    }
6832
0
  } else {
6833
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6834
6835
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6836
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6837
0
        const uint8_t m0 = mask1d[col];
6838
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6839
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6840
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6841
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6842
0
      }
6843
0
      wsrc += bw;
6844
0
      mask += bw;
6845
0
      tmp16 += ctxt->tmp_stride;
6846
0
    }
6847
0
  }
6848
0
}
6849
6850
// This function has a structure similar to av1_build_obmc_inter_prediction
6851
//
6852
// The OBMC predictor is computed as:
6853
//
6854
//  PObmc(x,y) =
6855
//    AOM_BLEND_A64(Mh(x),
6856
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6857
//                  PLeft(x, y))
6858
//
6859
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6860
// rounding, this can be written as:
6861
//
6862
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6863
//    Mh(x) * Mv(y) * P(x,y) +
6864
//      Mh(x) * Cv(y) * Pabove(x,y) +
6865
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6866
//
6867
// Where :
6868
//
6869
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6870
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6871
//
6872
// This function computes 'wsrc' and 'mask' as:
6873
//
6874
//  wsrc(x, y) =
6875
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6876
//      Mh(x) * Cv(y) * Pabove(x,y) +
6877
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6878
//
6879
//  mask(x, y) = Mh(x) * Mv(y)
6880
//
6881
// These can then be used to efficiently approximate the error for any
6882
// predictor P in the context of the provided neighbouring predictors by
6883
// computing:
6884
//
6885
//  error(x, y) =
6886
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6887
//
6888
static inline void calc_target_weighted_pred(
6889
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6890
    const uint8_t *above, int above_stride, const uint8_t *left,
6891
0
    int left_stride) {
6892
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6893
0
  const int bw = xd->width << MI_SIZE_LOG2;
6894
0
  const int bh = xd->height << MI_SIZE_LOG2;
6895
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6896
0
  int32_t *mask_buf = obmc_buffer->mask;
6897
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6898
6899
0
  const int is_hbd = is_cur_buf_hbd(xd);
6900
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6901
6902
  // plane 0 should not be sub-sampled
6903
0
  assert(xd->plane[0].subsampling_x == 0);
6904
0
  assert(xd->plane[0].subsampling_y == 0);
6905
6906
0
  av1_zero_array(wsrc_buf, bw * bh);
6907
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6908
6909
  // handle above row
6910
0
  if (xd->up_available) {
6911
0
    const int overlap =
6912
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6913
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6914
0
                                                   above_stride, overlap };
6915
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6916
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6917
0
                                  calc_target_weighted_pred_above, &ctxt);
6918
0
  }
6919
6920
0
  for (int i = 0; i < bw * bh; ++i) {
6921
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6922
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6923
0
  }
6924
6925
  // handle left column
6926
0
  if (xd->left_available) {
6927
0
    const int overlap =
6928
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6929
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6930
0
                                                   left_stride, overlap };
6931
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6932
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6933
0
                                 calc_target_weighted_pred_left, &ctxt);
6934
0
  }
6935
6936
0
  if (!is_hbd) {
6937
0
    const uint8_t *src = x->plane[0].src.buf;
6938
6939
0
    for (int row = 0; row < bh; ++row) {
6940
0
      for (int col = 0; col < bw; ++col) {
6941
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6942
0
      }
6943
0
      wsrc_buf += bw;
6944
0
      src += x->plane[0].src.stride;
6945
0
    }
6946
0
  } else {
6947
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6948
6949
0
    for (int row = 0; row < bh; ++row) {
6950
0
      for (int col = 0; col < bw; ++col) {
6951
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6952
0
      }
6953
0
      wsrc_buf += bw;
6954
0
      src += x->plane[0].src.stride;
6955
0
    }
6956
0
  }
6957
0
}