Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/encoder/rdopt.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdint.h>
16
#include <string.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/blend.h"
24
#include "aom_mem/aom_mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_ports/mem.h"
27
28
#include "av1/common/av1_common_int.h"
29
#include "av1/common/cfl.h"
30
#include "av1/common/blockd.h"
31
#include "av1/common/common.h"
32
#include "av1/common/common_data.h"
33
#include "av1/common/entropy.h"
34
#include "av1/common/entropymode.h"
35
#include "av1/common/enums.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mvref_common.h"
38
#include "av1/common/obmc.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconinter.h"
42
#include "av1/common/reconintra.h"
43
#include "av1/common/scan.h"
44
#include "av1/common/seg_common.h"
45
#include "av1/common/txb_common.h"
46
#include "av1/common/warped_motion.h"
47
48
#include "av1/encoder/aq_variance.h"
49
#include "av1/encoder/av1_quantize.h"
50
#include "av1/encoder/block.h"
51
#include "av1/encoder/cost.h"
52
#include "av1/encoder/compound_type.h"
53
#include "av1/encoder/encodemb.h"
54
#include "av1/encoder/encodemv.h"
55
#include "av1/encoder/encoder.h"
56
#include "av1/encoder/encodetxb.h"
57
#include "av1/encoder/hybrid_fwd_txfm.h"
58
#include "av1/encoder/interp_search.h"
59
#include "av1/encoder/intra_mode_search.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/mcomp.h"
62
#include "av1/encoder/ml.h"
63
#include "av1/encoder/mode_prune_model_weights.h"
64
#include "av1/encoder/model_rd.h"
65
#include "av1/encoder/motion_search_facade.h"
66
#include "av1/encoder/palette.h"
67
#include "av1/encoder/pustats.h"
68
#include "av1/encoder/random.h"
69
#include "av1/encoder/ratectrl.h"
70
#include "av1/encoder/rd.h"
71
#include "av1/encoder/rdopt.h"
72
#include "av1/encoder/reconinter_enc.h"
73
#include "av1/encoder/tokenize.h"
74
#include "av1/encoder/tpl_model.h"
75
#include "av1/encoder/tx_search.h"
76
#include "av1/encoder/var_based_part.h"
77
78
0
#define LAST_NEW_MV_INDEX 6
79
80
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
81
// The values are kept in Q12 format and equation used to derive is
82
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
83
0
#define MODE_THRESH_QBITS 12
84
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
85
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
86
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
87
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
88
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
89
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
90
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
91
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
92
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
93
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
94
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
95
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
96
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
97
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
98
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
99
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
100
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
101
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
102
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
103
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
104
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
105
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
106
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
107
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
108
  4144,  4120,  4096
109
};
110
111
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
112
  THR_NEARESTMV,
113
  THR_NEARESTL2,
114
  THR_NEARESTL3,
115
  THR_NEARESTB,
116
  THR_NEARESTA2,
117
  THR_NEARESTA,
118
  THR_NEARESTG,
119
120
  THR_NEWMV,
121
  THR_NEWL2,
122
  THR_NEWL3,
123
  THR_NEWB,
124
  THR_NEWA2,
125
  THR_NEWA,
126
  THR_NEWG,
127
128
  THR_NEARMV,
129
  THR_NEARL2,
130
  THR_NEARL3,
131
  THR_NEARB,
132
  THR_NEARA2,
133
  THR_NEARA,
134
  THR_NEARG,
135
136
  THR_GLOBALMV,
137
  THR_GLOBALL2,
138
  THR_GLOBALL3,
139
  THR_GLOBALB,
140
  THR_GLOBALA2,
141
  THR_GLOBALA,
142
  THR_GLOBALG,
143
144
  THR_COMP_NEAREST_NEARESTLA,
145
  THR_COMP_NEAREST_NEARESTL2A,
146
  THR_COMP_NEAREST_NEARESTL3A,
147
  THR_COMP_NEAREST_NEARESTGA,
148
  THR_COMP_NEAREST_NEARESTLB,
149
  THR_COMP_NEAREST_NEARESTL2B,
150
  THR_COMP_NEAREST_NEARESTL3B,
151
  THR_COMP_NEAREST_NEARESTGB,
152
  THR_COMP_NEAREST_NEARESTLA2,
153
  THR_COMP_NEAREST_NEARESTL2A2,
154
  THR_COMP_NEAREST_NEARESTL3A2,
155
  THR_COMP_NEAREST_NEARESTGA2,
156
  THR_COMP_NEAREST_NEARESTLL2,
157
  THR_COMP_NEAREST_NEARESTLL3,
158
  THR_COMP_NEAREST_NEARESTLG,
159
  THR_COMP_NEAREST_NEARESTBA,
160
161
  THR_COMP_NEAR_NEARLB,
162
  THR_COMP_NEW_NEWLB,
163
  THR_COMP_NEW_NEARESTLB,
164
  THR_COMP_NEAREST_NEWLB,
165
  THR_COMP_NEW_NEARLB,
166
  THR_COMP_NEAR_NEWLB,
167
  THR_COMP_GLOBAL_GLOBALLB,
168
169
  THR_COMP_NEAR_NEARLA,
170
  THR_COMP_NEW_NEWLA,
171
  THR_COMP_NEW_NEARESTLA,
172
  THR_COMP_NEAREST_NEWLA,
173
  THR_COMP_NEW_NEARLA,
174
  THR_COMP_NEAR_NEWLA,
175
  THR_COMP_GLOBAL_GLOBALLA,
176
177
  THR_COMP_NEAR_NEARL2A,
178
  THR_COMP_NEW_NEWL2A,
179
  THR_COMP_NEW_NEARESTL2A,
180
  THR_COMP_NEAREST_NEWL2A,
181
  THR_COMP_NEW_NEARL2A,
182
  THR_COMP_NEAR_NEWL2A,
183
  THR_COMP_GLOBAL_GLOBALL2A,
184
185
  THR_COMP_NEAR_NEARL3A,
186
  THR_COMP_NEW_NEWL3A,
187
  THR_COMP_NEW_NEARESTL3A,
188
  THR_COMP_NEAREST_NEWL3A,
189
  THR_COMP_NEW_NEARL3A,
190
  THR_COMP_NEAR_NEWL3A,
191
  THR_COMP_GLOBAL_GLOBALL3A,
192
193
  THR_COMP_NEAR_NEARGA,
194
  THR_COMP_NEW_NEWGA,
195
  THR_COMP_NEW_NEARESTGA,
196
  THR_COMP_NEAREST_NEWGA,
197
  THR_COMP_NEW_NEARGA,
198
  THR_COMP_NEAR_NEWGA,
199
  THR_COMP_GLOBAL_GLOBALGA,
200
201
  THR_COMP_NEAR_NEARL2B,
202
  THR_COMP_NEW_NEWL2B,
203
  THR_COMP_NEW_NEARESTL2B,
204
  THR_COMP_NEAREST_NEWL2B,
205
  THR_COMP_NEW_NEARL2B,
206
  THR_COMP_NEAR_NEWL2B,
207
  THR_COMP_GLOBAL_GLOBALL2B,
208
209
  THR_COMP_NEAR_NEARL3B,
210
  THR_COMP_NEW_NEWL3B,
211
  THR_COMP_NEW_NEARESTL3B,
212
  THR_COMP_NEAREST_NEWL3B,
213
  THR_COMP_NEW_NEARL3B,
214
  THR_COMP_NEAR_NEWL3B,
215
  THR_COMP_GLOBAL_GLOBALL3B,
216
217
  THR_COMP_NEAR_NEARGB,
218
  THR_COMP_NEW_NEWGB,
219
  THR_COMP_NEW_NEARESTGB,
220
  THR_COMP_NEAREST_NEWGB,
221
  THR_COMP_NEW_NEARGB,
222
  THR_COMP_NEAR_NEWGB,
223
  THR_COMP_GLOBAL_GLOBALGB,
224
225
  THR_COMP_NEAR_NEARLA2,
226
  THR_COMP_NEW_NEWLA2,
227
  THR_COMP_NEW_NEARESTLA2,
228
  THR_COMP_NEAREST_NEWLA2,
229
  THR_COMP_NEW_NEARLA2,
230
  THR_COMP_NEAR_NEWLA2,
231
  THR_COMP_GLOBAL_GLOBALLA2,
232
233
  THR_COMP_NEAR_NEARL2A2,
234
  THR_COMP_NEW_NEWL2A2,
235
  THR_COMP_NEW_NEARESTL2A2,
236
  THR_COMP_NEAREST_NEWL2A2,
237
  THR_COMP_NEW_NEARL2A2,
238
  THR_COMP_NEAR_NEWL2A2,
239
  THR_COMP_GLOBAL_GLOBALL2A2,
240
241
  THR_COMP_NEAR_NEARL3A2,
242
  THR_COMP_NEW_NEWL3A2,
243
  THR_COMP_NEW_NEARESTL3A2,
244
  THR_COMP_NEAREST_NEWL3A2,
245
  THR_COMP_NEW_NEARL3A2,
246
  THR_COMP_NEAR_NEWL3A2,
247
  THR_COMP_GLOBAL_GLOBALL3A2,
248
249
  THR_COMP_NEAR_NEARGA2,
250
  THR_COMP_NEW_NEWGA2,
251
  THR_COMP_NEW_NEARESTGA2,
252
  THR_COMP_NEAREST_NEWGA2,
253
  THR_COMP_NEW_NEARGA2,
254
  THR_COMP_NEAR_NEWGA2,
255
  THR_COMP_GLOBAL_GLOBALGA2,
256
257
  THR_COMP_NEAR_NEARLL2,
258
  THR_COMP_NEW_NEWLL2,
259
  THR_COMP_NEW_NEARESTLL2,
260
  THR_COMP_NEAREST_NEWLL2,
261
  THR_COMP_NEW_NEARLL2,
262
  THR_COMP_NEAR_NEWLL2,
263
  THR_COMP_GLOBAL_GLOBALLL2,
264
265
  THR_COMP_NEAR_NEARLL3,
266
  THR_COMP_NEW_NEWLL3,
267
  THR_COMP_NEW_NEARESTLL3,
268
  THR_COMP_NEAREST_NEWLL3,
269
  THR_COMP_NEW_NEARLL3,
270
  THR_COMP_NEAR_NEWLL3,
271
  THR_COMP_GLOBAL_GLOBALLL3,
272
273
  THR_COMP_NEAR_NEARLG,
274
  THR_COMP_NEW_NEWLG,
275
  THR_COMP_NEW_NEARESTLG,
276
  THR_COMP_NEAREST_NEWLG,
277
  THR_COMP_NEW_NEARLG,
278
  THR_COMP_NEAR_NEWLG,
279
  THR_COMP_GLOBAL_GLOBALLG,
280
281
  THR_COMP_NEAR_NEARBA,
282
  THR_COMP_NEW_NEWBA,
283
  THR_COMP_NEW_NEARESTBA,
284
  THR_COMP_NEAREST_NEWBA,
285
  THR_COMP_NEW_NEARBA,
286
  THR_COMP_NEAR_NEWBA,
287
  THR_COMP_GLOBAL_GLOBALBA,
288
289
  THR_DC,
290
  THR_PAETH,
291
  THR_SMOOTH,
292
  THR_SMOOTH_V,
293
  THR_SMOOTH_H,
294
  THR_H_PRED,
295
  THR_V_PRED,
296
  THR_D135_PRED,
297
  THR_D203_PRED,
298
  THR_D157_PRED,
299
  THR_D67_PRED,
300
  THR_D113_PRED,
301
  THR_D45_PRED,
302
};
303
304
/*!\cond */
305
typedef struct SingleInterModeState {
306
  int64_t rd;
307
  MV_REFERENCE_FRAME ref_frame;
308
  int valid;
309
} SingleInterModeState;
310
311
typedef struct InterModeSearchState {
312
  int64_t best_rd;
313
  int64_t best_skip_rd[2];
314
  MB_MODE_INFO best_mbmode;
315
  int best_rate_y;
316
  int best_rate_uv;
317
  int best_mode_skippable;
318
  int best_skip2;
319
  THR_MODES best_mode_index;
320
  int num_available_refs;
321
  int64_t dist_refs[REF_FRAMES];
322
  int dist_order_refs[REF_FRAMES];
323
  int64_t mode_threshold[MAX_MODES];
324
  int64_t best_intra_rd;
325
  unsigned int best_pred_sse;
326
327
  /*!
328
   * \brief Keep track of best intra rd for use in compound mode.
329
   */
330
  int64_t best_pred_rd[REFERENCE_MODES];
331
  // Save a set of single_newmv for each checked ref_mv.
332
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
333
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
335
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
336
  // The rd of simple translation in single inter modes
337
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
338
  int64_t best_single_rd[REF_FRAMES];
339
  PREDICTION_MODE best_single_mode[REF_FRAMES];
340
341
  // Single search results by [directions][modes][reference frames]
342
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
343
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
344
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
345
                                            [FWD_REFS];
346
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
347
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
348
  IntraModeSearchState intra_search_state;
349
  RD_STATS best_y_rdcost;
350
} InterModeSearchState;
351
/*!\endcond */
352
353
0
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
354
0
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
355
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
356
0
    md->ready = 0;
357
0
    md->num = 0;
358
0
    md->dist_sum = 0;
359
0
    md->ld_sum = 0;
360
0
    md->sse_sum = 0;
361
0
    md->sse_sse_sum = 0;
362
0
    md->sse_ld_sum = 0;
363
0
  }
364
0
}
365
366
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
367
                             int64_t sse, int *est_residue_cost,
368
0
                             int64_t *est_dist) {
369
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
370
0
  if (md->ready) {
371
0
    if (sse < md->dist_mean) {
372
0
      *est_residue_cost = 0;
373
0
      *est_dist = sse;
374
0
    } else {
375
0
      *est_dist = (int64_t)round(md->dist_mean);
376
0
      const double est_ld = md->a * sse + md->b;
377
      // Clamp estimated rate cost by INT_MAX / 2.
378
      // TODO(angiebird@google.com): find better solution than clamping.
379
0
      if (fabs(est_ld) < 1e-2) {
380
0
        *est_residue_cost = INT_MAX / 2;
381
0
      } else {
382
0
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
383
0
        if (est_residue_cost_dbl < 0) {
384
0
          *est_residue_cost = 0;
385
0
        } else {
386
0
          *est_residue_cost =
387
0
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
388
0
        }
389
0
      }
390
0
      if (*est_residue_cost <= 0) {
391
0
        *est_residue_cost = 0;
392
0
        *est_dist = sse;
393
0
      }
394
0
    }
395
0
    return 1;
396
0
  }
397
0
  return 0;
398
0
}
399
400
0
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
401
0
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
402
0
    const int block_idx = inter_mode_data_block_idx(bsize);
403
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
404
0
    if (block_idx == -1) continue;
405
0
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
406
0
      continue;
407
0
    } else {
408
0
      if (md->ready == 0) {
409
0
        md->dist_mean = md->dist_sum / md->num;
410
0
        md->ld_mean = md->ld_sum / md->num;
411
0
        md->sse_mean = md->sse_sum / md->num;
412
0
        md->sse_sse_mean = md->sse_sse_sum / md->num;
413
0
        md->sse_ld_mean = md->sse_ld_sum / md->num;
414
0
      } else {
415
0
        const double factor = 3;
416
0
        md->dist_mean =
417
0
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
418
0
        md->ld_mean =
419
0
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
420
0
        md->sse_mean =
421
0
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
422
0
        md->sse_sse_mean =
423
0
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
424
0
            (factor + 1);
425
0
        md->sse_ld_mean =
426
0
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
427
0
            (factor + 1);
428
0
      }
429
430
0
      const double my = md->ld_mean;
431
0
      const double mx = md->sse_mean;
432
0
      const double dx = sqrt(md->sse_sse_mean);
433
0
      const double dxy = md->sse_ld_mean;
434
435
0
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
436
0
      md->b = my - md->a * mx;
437
0
      md->ready = 1;
438
439
0
      md->num = 0;
440
0
      md->dist_sum = 0;
441
0
      md->ld_sum = 0;
442
0
      md->sse_sum = 0;
443
0
      md->sse_sse_sum = 0;
444
0
      md->sse_ld_sum = 0;
445
0
    }
446
0
    (void)rdmult;
447
0
  }
448
0
}
449
450
static inline void inter_mode_data_push(TileDataEnc *tile_data,
451
                                        BLOCK_SIZE bsize, int64_t sse,
452
0
                                        int64_t dist, int residue_cost) {
453
0
  if (residue_cost == 0 || sse == dist) return;
454
0
  const int block_idx = inter_mode_data_block_idx(bsize);
455
0
  if (block_idx == -1) return;
456
0
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
457
0
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
458
0
    const double ld = (sse - dist) * 1. / residue_cost;
459
0
    ++rd_model->num;
460
0
    rd_model->dist_sum += dist;
461
0
    rd_model->ld_sum += ld;
462
0
    rd_model->sse_sum += sse;
463
0
    rd_model->sse_sse_sum += (double)sse * (double)sse;
464
0
    rd_model->sse_ld_sum += sse * ld;
465
0
  }
466
0
}
467
468
static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
469
                                         int mode_rate, int64_t sse, int64_t rd,
470
                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
471
                                         RD_STATS *rd_cost_uv,
472
0
                                         const MB_MODE_INFO *mbmi) {
473
0
  const int num = inter_modes_info->num;
474
0
  assert(num < MAX_INTER_MODES);
475
0
  inter_modes_info->mbmi_arr[num] = *mbmi;
476
0
  inter_modes_info->mode_rate_arr[num] = mode_rate;
477
0
  inter_modes_info->sse_arr[num] = sse;
478
0
  inter_modes_info->est_rd_arr[num] = rd;
479
0
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
480
0
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
481
0
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
482
0
  ++inter_modes_info->num;
483
0
}
484
485
0
static int compare_rd_idx_pair(const void *a, const void *b) {
486
0
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
487
    // To avoid inconsistency in qsort() ordering when two elements are equal,
488
    // using idx as tie breaker. Refer aomedia:2928
489
0
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
490
0
      return 0;
491
0
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
492
0
      return 1;
493
0
    else
494
0
      return -1;
495
0
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
496
0
    return 1;
497
0
  } else {
498
0
    return -1;
499
0
  }
500
0
}
501
502
static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
503
0
                                         RdIdxPair *rd_idx_pair_arr) {
504
0
  if (inter_modes_info->num == 0) {
505
0
    return;
506
0
  }
507
0
  for (int i = 0; i < inter_modes_info->num; ++i) {
508
0
    rd_idx_pair_arr[i].idx = i;
509
0
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
510
0
  }
511
0
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
512
0
        compare_rd_idx_pair);
513
0
}
514
515
// Initialize estimated RD Cost records of compound average.
516
static inline void init_comp_avg_est_rd(
517
0
    struct macroblock *x, bool skip_comp_eval_using_top_comp_avg_est_rd) {
518
0
  if (!skip_comp_eval_using_top_comp_avg_est_rd) return;
519
520
0
  for (int j = 0; j < TOP_COMP_AVG_EST_RD_COUNT; j++) {
521
0
    x->top_comp_avg_est_rd[j] = INT64_MAX;
522
0
  }
523
0
}
524
525
// Similar to get_horver_correlation, but also takes into account first
526
// row/column, when computing horizontal/vertical correlation.
527
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
528
                                       int width, int height, float *hcorr,
529
0
                                       float *vcorr) {
530
  // The following notation is used:
531
  // x - current pixel
532
  // y - left neighbor pixel
533
  // z - top neighbor pixel
534
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
535
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
536
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
537
538
  // First, process horizontal correlation on just the first row
539
0
  x_sum += diff[0];
540
0
  x2_sum += diff[0] * diff[0];
541
0
  x_firstrow += diff[0];
542
0
  x2_firstrow += diff[0] * diff[0];
543
0
  for (int j = 1; j < width; ++j) {
544
0
    const int16_t x = diff[j];
545
0
    const int16_t y = diff[j - 1];
546
0
    x_sum += x;
547
0
    x_firstrow += x;
548
0
    x2_sum += x * x;
549
0
    x2_firstrow += x * x;
550
0
    xy_sum += x * y;
551
0
  }
552
553
  // Process vertical correlation in the first column
554
0
  x_firstcol += diff[0];
555
0
  x2_firstcol += diff[0] * diff[0];
556
0
  for (int i = 1; i < height; ++i) {
557
0
    const int16_t x = diff[i * stride];
558
0
    const int16_t z = diff[(i - 1) * stride];
559
0
    x_sum += x;
560
0
    x_firstcol += x;
561
0
    x2_sum += x * x;
562
0
    x2_firstcol += x * x;
563
0
    xz_sum += x * z;
564
0
  }
565
566
  // Now process horiz and vert correlation through the rest unit
567
0
  for (int i = 1; i < height; ++i) {
568
0
    for (int j = 1; j < width; ++j) {
569
0
      const int16_t x = diff[i * stride + j];
570
0
      const int16_t y = diff[i * stride + j - 1];
571
0
      const int16_t z = diff[(i - 1) * stride + j];
572
0
      x_sum += x;
573
0
      x2_sum += x * x;
574
0
      xy_sum += x * y;
575
0
      xz_sum += x * z;
576
0
    }
577
0
  }
578
579
0
  for (int j = 0; j < width; ++j) {
580
0
    x_finalrow += diff[(height - 1) * stride + j];
581
0
    x2_finalrow +=
582
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
583
0
  }
584
0
  for (int i = 0; i < height; ++i) {
585
0
    x_finalcol += diff[i * stride + width - 1];
586
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
587
0
  }
588
589
0
  int64_t xhor_sum = x_sum - x_finalcol;
590
0
  int64_t xver_sum = x_sum - x_finalrow;
591
0
  int64_t y_sum = x_sum - x_firstcol;
592
0
  int64_t z_sum = x_sum - x_firstrow;
593
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
594
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
595
0
  int64_t y2_sum = x2_sum - x2_firstcol;
596
0
  int64_t z2_sum = x2_sum - x2_firstrow;
597
598
0
  const float num_hor = (float)(height * (width - 1));
599
0
  const float num_ver = (float)((height - 1) * width);
600
601
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
602
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
603
604
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
605
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
606
607
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
608
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
609
610
0
  if (xhor_var_n > 0 && y_var_n > 0) {
611
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
612
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
613
0
  } else {
614
0
    *hcorr = 1.0;
615
0
  }
616
0
  if (xver_var_n > 0 && z_var_n > 0) {
617
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
618
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
619
0
  } else {
620
0
    *vcorr = 1.0;
621
0
  }
622
0
}
623
624
static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var,
625
0
                                   int64_t *rec_var) {
626
0
  const MACROBLOCKD *xd = &x->e_mbd;
627
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
628
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
629
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
630
631
0
  BLOCK_SIZE bsize = mbmi->bsize;
632
0
  int bw = block_size_wide[bsize];
633
0
  int bh = block_size_high[bsize];
634
635
0
  static const int gau_filter[3][3] = {
636
0
    { 1, 2, 1 },
637
0
    { 2, 4, 2 },
638
0
    { 1, 2, 1 },
639
0
  };
640
641
0
  DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
642
643
0
  uint16_t *pred_ptr = &dclevel[bw + 1];
644
0
  int pred_stride = xd->plane[0].dst.stride;
645
646
0
  for (int idy = -1; idy < bh + 1; ++idy) {
647
0
    for (int idx = -1; idx < bw + 1; ++idx) {
648
0
      int offset_idy = idy;
649
0
      int offset_idx = idx;
650
0
      if (idy == -1) offset_idy = 0;
651
0
      if (idy == bh) offset_idy = bh - 1;
652
0
      if (idx == -1) offset_idx = 0;
653
0
      if (idx == bw) offset_idx = bw - 1;
654
655
0
      int offset = offset_idy * pred_stride + offset_idx;
656
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset];
657
0
    }
658
0
  }
659
660
0
  *rec_var = 0;
661
0
  for (int idy = 0; idy < bh; ++idy) {
662
0
    for (int idx = 0; idx < bw; ++idx) {
663
0
      int sum = 0;
664
0
      for (int iy = 0; iy < 3; ++iy)
665
0
        for (int ix = 0; ix < 3; ++ix)
666
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
667
0
                 gau_filter[iy][ix];
668
669
0
      sum = sum >> 4;
670
671
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
672
0
      *rec_var += diff * diff;
673
0
    }
674
0
  }
675
0
  *rec_var <<= 4;
676
677
0
  int src_stride = p->src.stride;
678
0
  for (int idy = -1; idy < bh + 1; ++idy) {
679
0
    for (int idx = -1; idx < bw + 1; ++idx) {
680
0
      int offset_idy = idy;
681
0
      int offset_idx = idx;
682
0
      if (idy == -1) offset_idy = 0;
683
0
      if (idy == bh) offset_idy = bh - 1;
684
0
      if (idx == -1) offset_idx = 0;
685
0
      if (idx == bw) offset_idx = bw - 1;
686
687
0
      int offset = offset_idy * src_stride + offset_idx;
688
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset];
689
0
    }
690
0
  }
691
692
0
  *src_var = 0;
693
0
  for (int idy = 0; idy < bh; ++idy) {
694
0
    for (int idx = 0; idx < bw; ++idx) {
695
0
      int sum = 0;
696
0
      for (int iy = 0; iy < 3; ++iy)
697
0
        for (int ix = 0; ix < 3; ++ix)
698
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
699
0
                 gau_filter[iy][ix];
700
701
0
      sum = sum >> 4;
702
703
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
704
0
      *src_var += diff * diff;
705
0
    }
706
0
  }
707
0
  *src_var <<= 4;
708
0
}
709
710
static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
711
0
                               int64_t *rec_var) {
712
0
  const MACROBLOCKD *xd = &x->e_mbd;
713
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
714
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
715
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
716
717
0
  BLOCK_SIZE bsize = mbmi->bsize;
718
0
  int bw = block_size_wide[bsize];
719
0
  int bh = block_size_high[bsize];
720
721
0
  static const int gau_filter[3][3] = {
722
0
    { 1, 2, 1 },
723
0
    { 2, 4, 2 },
724
0
    { 1, 2, 1 },
725
0
  };
726
727
0
  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
728
729
0
  uint8_t *pred_ptr = &dclevel[bw + 1];
730
0
  int pred_stride = xd->plane[0].dst.stride;
731
732
0
  for (int idy = -1; idy < bh + 1; ++idy) {
733
0
    for (int idx = -1; idx < bw + 1; ++idx) {
734
0
      int offset_idy = idy;
735
0
      int offset_idx = idx;
736
0
      if (idy == -1) offset_idy = 0;
737
0
      if (idy == bh) offset_idy = bh - 1;
738
0
      if (idx == -1) offset_idx = 0;
739
0
      if (idx == bw) offset_idx = bw - 1;
740
741
0
      int offset = offset_idy * pred_stride + offset_idx;
742
0
      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
743
0
    }
744
0
  }
745
746
0
  *rec_var = 0;
747
0
  for (int idy = 0; idy < bh; ++idy) {
748
0
    for (int idx = 0; idx < bw; ++idx) {
749
0
      int sum = 0;
750
0
      for (int iy = 0; iy < 3; ++iy)
751
0
        for (int ix = 0; ix < 3; ++ix)
752
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
753
0
                 gau_filter[iy][ix];
754
755
0
      sum = sum >> 4;
756
757
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
758
0
      *rec_var += diff * diff;
759
0
    }
760
0
  }
761
0
  *rec_var <<= 4;
762
763
0
  int src_stride = p->src.stride;
764
0
  for (int idy = -1; idy < bh + 1; ++idy) {
765
0
    for (int idx = -1; idx < bw + 1; ++idx) {
766
0
      int offset_idy = idy;
767
0
      int offset_idx = idx;
768
0
      if (idy == -1) offset_idy = 0;
769
0
      if (idy == bh) offset_idy = bh - 1;
770
0
      if (idx == -1) offset_idx = 0;
771
0
      if (idx == bw) offset_idx = bw - 1;
772
773
0
      int offset = offset_idy * src_stride + offset_idx;
774
0
      pred_ptr[idy * bw + idx] = p->src.buf[offset];
775
0
    }
776
0
  }
777
778
0
  *src_var = 0;
779
0
  for (int idy = 0; idy < bh; ++idy) {
780
0
    for (int idx = 0; idx < bw; ++idx) {
781
0
      int sum = 0;
782
0
      for (int iy = 0; iy < 3; ++iy)
783
0
        for (int ix = 0; ix < 3; ++ix)
784
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
785
0
                 gau_filter[iy][ix];
786
787
0
      sum = sum >> 4;
788
789
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
790
0
      *src_var += diff * diff;
791
0
    }
792
0
  }
793
0
  *src_var <<= 4;
794
0
}
795
796
static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
797
0
                          RD_STATS *rd_cost, bool is_inter_pred) {
798
0
  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
799
0
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
800
0
      is_inter_pred) {
801
    // Tune IQ and SSIMULACRA2 can be used to encode layered images, where
802
    // keyframes could be encoded at a lower or similar quality (i.e. higher
803
    // QP) than inter-coded frames.
804
    // In this case, libaom tends to underestimate the true RD cost of inter
805
    // prediction candidates, causing encoded file size to increase without a
806
    // corresponding increase in quality.
807
    // When both intra and inter encoded block candidates are available (with
808
    // rdcosts close to each other), the intra-coded candidate was subjectively
809
    // observed to be a bit less blurry, with a corresponding increase in
810
    // SSIMULACRA 2 scores.
811
    // Apply a 1.125x inter block bias to increase overall perceptual
812
    // compression efficiency, while still allowing the encoder to pick inter
813
    // prediction when it's beneficial.
814
0
    rd_cost->dist += rd_cost->dist >> 3;
815
0
    rd_cost->rdcost += rd_cost->rdcost >> 3;
816
0
    return;
817
0
  }
818
819
0
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
820
821
0
  if (frame_is_kf_gf_arf(cpi)) return;
822
823
0
  int64_t src_var, rec_var;
824
825
0
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
826
0
  if (is_hbd)
827
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
828
0
  else
829
0
    get_variance_stats(x, &src_var, &rec_var);
830
831
0
  if (src_var <= rec_var) return;
832
833
0
  int64_t var_offset = src_var - rec_var;
834
835
0
  rd_cost->dist += var_offset;
836
837
0
  rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
838
0
}
839
840
static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
841
0
                        int64_t *rd_cost, bool is_inter_pred) {
842
0
  if ((cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ ||
843
0
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) &&
844
0
      is_inter_pred) {
845
0
    *rd_cost += *rd_cost >> 3;
846
0
    return;
847
0
  }
848
849
0
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
850
851
0
  if (frame_is_kf_gf_arf(cpi)) return;
852
853
0
  int64_t src_var, rec_var;
854
0
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
855
856
0
  if (is_hbd)
857
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
858
0
  else
859
0
    get_variance_stats(x, &src_var, &rec_var);
860
861
0
  if (src_var <= rec_var) return;
862
863
0
  int64_t var_offset = src_var - rec_var;
864
865
0
  *rd_cost += RDCOST(x->rdmult, 0, var_offset);
866
0
}
867
868
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
869
0
                       int64_t *sse_y) {
870
0
  const AV1_COMMON *cm = &cpi->common;
871
0
  const int num_planes = av1_num_planes(cm);
872
0
  const MACROBLOCKD *xd = &x->e_mbd;
873
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
874
0
  int64_t total_sse = 0;
875
0
  for (int plane = 0; plane < num_planes; ++plane) {
876
0
    if (plane && !xd->is_chroma_ref) break;
877
0
    const struct macroblock_plane *const p = &x->plane[plane];
878
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
879
0
    const BLOCK_SIZE bs =
880
0
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
881
0
    unsigned int sse;
882
883
0
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
884
0
                            pd->dst.stride, &sse);
885
0
    total_sse += sse;
886
0
    if (!plane && sse_y) *sse_y = sse;
887
0
  }
888
0
  total_sse <<= 4;
889
0
  return total_sse;
890
0
}
891
892
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
893
0
                          intptr_t block_size, int64_t *ssz) {
894
0
  int i;
895
0
  int64_t error = 0, sqcoeff = 0;
896
897
0
  for (i = 0; i < block_size; i++) {
898
0
    const int diff = coeff[i] - dqcoeff[i];
899
0
    error += diff * diff;
900
0
    sqcoeff += coeff[i] * coeff[i];
901
0
  }
902
903
0
  *ssz = sqcoeff;
904
0
  return error;
905
0
}
906
907
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
908
0
                             intptr_t block_size) {
909
0
  int64_t error = 0;
910
911
0
  for (int i = 0; i < block_size; i++) {
912
0
    const int diff = coeff[i] - dqcoeff[i];
913
0
    error += diff * diff;
914
0
  }
915
916
0
  return error;
917
0
}
918
919
#if CONFIG_AV1_HIGHBITDEPTH
920
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
921
                                 const tran_low_t *dqcoeff, intptr_t block_size,
922
0
                                 int64_t *ssz, int bd) {
923
0
  int i;
924
0
  int64_t error = 0, sqcoeff = 0;
925
0
  int shift = 2 * (bd - 8);
926
0
  int rounding = (1 << shift) >> 1;
927
928
0
  for (i = 0; i < block_size; i++) {
929
0
    const int64_t diff = coeff[i] - dqcoeff[i];
930
0
    error += diff * diff;
931
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
932
0
  }
933
0
  error = (error + rounding) >> shift;
934
0
  sqcoeff = (sqcoeff + rounding) >> shift;
935
936
0
  *ssz = sqcoeff;
937
0
  return error;
938
0
}
939
#endif
940
941
static int conditional_skipintra(PREDICTION_MODE mode,
942
0
                                 PREDICTION_MODE best_intra_mode) {
943
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
944
0
      best_intra_mode != D135_PRED)
945
0
    return 1;
946
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
947
0
      best_intra_mode != D45_PRED)
948
0
    return 1;
949
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
950
0
      best_intra_mode != D45_PRED)
951
0
    return 1;
952
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
953
0
      best_intra_mode != D135_PRED)
954
0
    return 1;
955
0
  return 0;
956
0
}
957
958
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
959
0
                       int16_t mode_context) {
960
0
  if (is_inter_compound_mode(mode)) {
961
0
    return mode_costs
962
0
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
963
0
  }
964
965
0
  int mode_cost = 0;
966
0
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
967
968
0
  assert(is_inter_mode(mode));
969
970
0
  if (mode == NEWMV) {
971
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
972
0
    return mode_cost;
973
0
  } else {
974
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
975
0
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
976
977
0
    if (mode == GLOBALMV) {
978
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
979
0
      return mode_cost;
980
0
    } else {
981
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
982
0
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
983
0
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
984
0
      return mode_cost;
985
0
    }
986
0
  }
987
0
}
988
989
static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
990
0
                                              int ref_idx) {
991
0
  return ref_idx ? compound_ref1_mode(this_mode)
992
0
                 : compound_ref0_mode(this_mode);
993
0
}
994
995
static inline void estimate_ref_frame_costs(
996
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
997
    int segment_id, unsigned int *ref_costs_single,
998
0
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
999
0
  int seg_ref_active =
1000
0
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
1001
0
  if (seg_ref_active) {
1002
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
1003
0
    int ref_frame;
1004
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
1005
0
      memset(ref_costs_comp[ref_frame], 0,
1006
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
1007
0
  } else {
1008
0
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
1009
0
    ref_costs_single[INTRA_FRAME] =
1010
0
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
1011
0
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
1012
1013
0
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
1014
0
      ref_costs_single[i] = base_cost;
1015
1016
0
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
1017
0
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
1018
0
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
1019
0
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
1020
0
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
1021
0
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
1022
1023
    // Determine cost of a single ref frame, where frame types are represented
1024
    // by a tree:
1025
    // Level 0: add cost whether this ref is a forward or backward ref
1026
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1027
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1028
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1029
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
1030
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
1031
0
    ref_costs_single[ALTREF2_FRAME] +=
1032
0
        mode_costs->single_ref_cost[ctx_p1][0][1];
1033
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
1034
1035
    // Level 1: if this ref is forward ref,
1036
    // add cost whether it is last/last2 or last3/golden
1037
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1038
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1039
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1040
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1041
1042
    // Level 1: if this ref is backward ref
1043
    // then add cost whether this ref is altref or backward ref
1044
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
1045
0
    ref_costs_single[ALTREF2_FRAME] +=
1046
0
        mode_costs->single_ref_cost[ctx_p2][1][0];
1047
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
1048
1049
    // Level 2: further add cost whether this ref is last or last2
1050
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
1051
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
1052
1053
    // Level 2: last3 or golden
1054
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
1055
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
1056
1057
    // Level 2: bwdref or altref2
1058
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
1059
0
    ref_costs_single[ALTREF2_FRAME] +=
1060
0
        mode_costs->single_ref_cost[ctx_p6][5][1];
1061
1062
0
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
1063
      // Similar to single ref, determine cost of compound ref frames.
1064
      // cost_compound_refs = cost_first_ref + cost_second_ref
1065
0
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
1066
0
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
1067
0
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
1068
0
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
1069
0
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
1070
1071
0
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
1072
0
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
1073
1074
0
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
1075
0
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
1076
0
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
1077
0
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
1078
0
      ref_bicomp_costs[ALTREF_FRAME] = 0;
1079
1080
      // cost of first ref frame
1081
0
      ref_bicomp_costs[LAST_FRAME] +=
1082
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1083
0
      ref_bicomp_costs[LAST2_FRAME] +=
1084
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1085
0
      ref_bicomp_costs[LAST3_FRAME] +=
1086
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1087
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
1088
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1089
1090
0
      ref_bicomp_costs[LAST_FRAME] +=
1091
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
1092
0
      ref_bicomp_costs[LAST2_FRAME] +=
1093
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
1094
1095
0
      ref_bicomp_costs[LAST3_FRAME] +=
1096
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
1097
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
1098
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
1099
1100
      // cost of second ref frame
1101
0
      ref_bicomp_costs[BWDREF_FRAME] +=
1102
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1103
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
1104
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1105
0
      ref_bicomp_costs[ALTREF_FRAME] +=
1106
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
1107
1108
0
      ref_bicomp_costs[BWDREF_FRAME] +=
1109
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
1110
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
1111
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
1112
1113
      // cost: if one ref frame is forward ref, the other ref is backward ref
1114
0
      int ref0, ref1;
1115
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1116
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
1117
0
          ref_costs_comp[ref0][ref1] =
1118
0
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
1119
0
        }
1120
0
      }
1121
1122
      // cost: if both ref frames are the same side.
1123
0
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
1124
0
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
1125
0
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
1126
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
1127
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1128
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1129
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
1130
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
1131
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1132
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1133
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1134
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
1135
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
1136
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1137
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1138
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1139
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
1140
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
1141
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1142
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
1143
0
    } else {
1144
0
      int ref0, ref1;
1145
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1146
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
1147
0
          ref_costs_comp[ref0][ref1] = 512;
1148
0
      }
1149
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
1150
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
1151
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
1152
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
1153
0
    }
1154
0
  }
1155
0
}
1156
1157
static inline void store_coding_context(
1158
#if CONFIG_INTERNAL_STATS
1159
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
1160
#else
1161
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1162
#endif  // CONFIG_INTERNAL_STATS
1163
0
    int skippable) {
1164
0
  MACROBLOCKD *const xd = &x->e_mbd;
1165
1166
  // Take a snapshot of the coding context so it can be
1167
  // restored if we decide to encode this way
1168
0
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
1169
0
  ctx->skippable = skippable;
1170
#if CONFIG_INTERNAL_STATS
1171
  ctx->best_mode_index = mode_index;
1172
#endif  // CONFIG_INTERNAL_STATS
1173
0
  ctx->mic = *xd->mi[0];
1174
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
1175
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1176
0
}
1177
1178
static inline void setup_buffer_ref_mvs_inter(
1179
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
1180
0
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
1181
0
  const AV1_COMMON *cm = &cpi->common;
1182
0
  const int num_planes = av1_num_planes(cm);
1183
0
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
1184
0
      av1_get_scaled_ref_frame(cpi, ref_frame);
1185
0
  MACROBLOCKD *const xd = &x->e_mbd;
1186
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1187
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1188
0
  const struct scale_factors *const sf =
1189
0
      get_ref_scale_factors_const(cm, ref_frame);
1190
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
1191
0
  assert(yv12 != NULL);
1192
1193
0
  if (scaled_ref_frame) {
1194
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
1195
    // support scaling.
1196
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
1197
0
                         num_planes);
1198
0
  } else {
1199
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1200
0
  }
1201
1202
  // Gets an initial list of candidate vectors from neighbours and orders them
1203
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
1204
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
1205
0
                   mbmi_ext->mode_context);
1206
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
1207
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
1208
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
1209
  // Further refinement that is encode side only to test the top few candidates
1210
  // in full and choose the best as the center point for subsequent searches.
1211
  // The current implementation doesn't support scaling.
1212
0
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
1213
0
              ref_frame, block_size);
1214
1215
  // Go back to unscaled reference.
1216
0
  if (scaled_ref_frame) {
1217
    // We had temporarily setup pred block based on scaled reference above. Go
1218
    // back to unscaled reference now, for subsequent use.
1219
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1220
0
  }
1221
0
}
1222
1223
0
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1224
0
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1225
1226
// TODO(jingning): this mv clamping function should be block size dependent.
1227
0
static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
1228
0
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
1229
0
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
1230
0
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
1231
0
                                     xd->mb_to_bottom_edge +
1232
0
                                         RIGHT_BOTTOM_MARGIN };
1233
0
  clamp_mv(mv, &mv_limits);
1234
0
}
1235
1236
/* If the current mode shares the same mv with other modes with higher cost,
1237
 * skip this mode. */
1238
static int skip_repeated_mv(const AV1_COMMON *const cm,
1239
                            const MACROBLOCK *const x,
1240
                            PREDICTION_MODE this_mode,
1241
                            const MV_REFERENCE_FRAME ref_frames[2],
1242
0
                            InterModeSearchState *search_state) {
1243
0
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
1244
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
1245
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1246
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1247
0
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
1248
0
  if (!is_comp_pred) {
1249
0
    if (this_mode == NEARMV) {
1250
0
      if (ref_mv_count == 0) {
1251
        // NEARMV has the same motion vector as NEARESTMV
1252
0
        compare_mode = NEARESTMV;
1253
0
      }
1254
0
      if (ref_mv_count == 1 &&
1255
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1256
        // NEARMV has the same motion vector as GLOBALMV
1257
0
        compare_mode = GLOBALMV;
1258
0
      }
1259
0
    }
1260
0
    if (this_mode == GLOBALMV) {
1261
0
      if (ref_mv_count == 0 &&
1262
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1263
        // GLOBALMV has the same motion vector as NEARESTMV
1264
0
        compare_mode = NEARESTMV;
1265
0
      }
1266
0
      if (ref_mv_count == 1) {
1267
        // GLOBALMV has the same motion vector as NEARMV
1268
0
        compare_mode = NEARMV;
1269
0
      }
1270
0
    }
1271
1272
0
    if (compare_mode != MB_MODE_COUNT) {
1273
      // Use modelled_rd to check whether compare mode was searched
1274
0
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1275
0
          INT64_MAX) {
1276
0
        const int16_t mode_ctx =
1277
0
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1278
0
        const int compare_cost =
1279
0
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1280
0
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1281
1282
        // Only skip if the mode cost is larger than compare mode cost
1283
0
        if (this_cost > compare_cost) {
1284
0
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1285
0
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1286
0
          return 1;
1287
0
        }
1288
0
      }
1289
0
    }
1290
0
  }
1291
0
  return 0;
1292
0
}
1293
1294
static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1295
                                     const AV1_COMMON *cm,
1296
0
                                     const MACROBLOCK *x) {
1297
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1298
0
  *out_mv = in_mv;
1299
0
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1300
0
                     cm->features.cur_frame_force_integer_mv);
1301
0
  clamp_mv2(&out_mv->as_mv, xd);
1302
0
  return av1_is_fullmv_in_range(&x->mv_limits,
1303
0
                                get_fullmv_from_mv(&out_mv->as_mv));
1304
0
}
1305
1306
// To use single newmv directly for compound modes, need to clamp the mv to the
1307
// valid mv range. Without this, encoder would generate out of range mv, and
1308
// this is seen in 8k encoding.
1309
static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1310
0
                                     int ref_idx) {
1311
0
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1312
0
  SubpelMvLimits mv_limits;
1313
1314
0
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1315
0
  clamp_mv(&mv->as_mv, &mv_limits);
1316
0
}
1317
1318
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1319
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1320
                            int *const rate_mv, HandleInterModeArgs *const args,
1321
0
                            inter_mode_info *mode_info) {
1322
0
  MACROBLOCKD *const xd = &x->e_mbd;
1323
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1324
0
  const int is_comp_pred = has_second_ref(mbmi);
1325
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1326
0
  const int refs[2] = { mbmi->ref_frame[0],
1327
0
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1328
0
  const int ref_mv_idx = mbmi->ref_mv_idx;
1329
1330
0
  if (is_comp_pred) {
1331
0
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1332
0
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1333
0
    if (this_mode == NEW_NEWMV) {
1334
0
      if (valid_mv0) {
1335
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1336
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1337
0
      }
1338
0
      if (valid_mv1) {
1339
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1340
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1341
0
      }
1342
0
      *rate_mv = 0;
1343
0
      for (int i = 0; i < 2; ++i) {
1344
0
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1345
0
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1346
0
                                    x->mv_costs->nmv_joint_cost,
1347
0
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1348
0
      }
1349
0
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1350
0
      if (valid_mv1) {
1351
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1352
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1353
0
      }
1354
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1355
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1356
0
                                 x->mv_costs->nmv_joint_cost,
1357
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1358
0
    } else {
1359
0
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1360
0
      if (valid_mv0) {
1361
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1362
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1363
0
      }
1364
0
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1365
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1366
0
                                 x->mv_costs->nmv_joint_cost,
1367
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1368
0
    }
1369
0
  } else {
1370
    // Single ref case.
1371
0
    const int ref_idx = 0;
1372
0
    int search_range = INT_MAX;
1373
1374
0
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1375
0
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1376
0
      int min_mv_diff = INT_MAX;
1377
0
      int best_match = -1;
1378
0
      MV prev_ref_mv[2] = { { 0 } };
1379
0
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1380
0
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1381
0
                                                     idx, &x->mbmi_ext)
1382
0
                               .as_mv;
1383
0
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1384
0
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1385
1386
0
        if (min_mv_diff > ref_mv_diff) {
1387
0
          min_mv_diff = ref_mv_diff;
1388
0
          best_match = idx;
1389
0
        }
1390
0
      }
1391
1392
0
      if (min_mv_diff < (16 << 3)) {
1393
0
        if (args->single_newmv_valid[best_match][refs[0]]) {
1394
0
          search_range = min_mv_diff;
1395
0
          search_range +=
1396
0
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1397
0
                         prev_ref_mv[best_match].row),
1398
0
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1399
0
                         prev_ref_mv[best_match].col));
1400
          // Get full pixel search range.
1401
0
          search_range = (search_range + 4) >> 3;
1402
0
        }
1403
0
      }
1404
0
    }
1405
1406
0
    int_mv best_mv;
1407
0
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1408
0
                             mode_info, &best_mv, args);
1409
0
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1410
1411
0
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1412
0
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1413
0
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1414
0
    cur_mv[0].as_int = best_mv.as_int;
1415
1416
    // Return after single_newmv is set.
1417
0
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1418
0
  }
1419
1420
0
  return 0;
1421
0
}
1422
1423
static inline void update_mode_start_end_index(
1424
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1425
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1426
0
    int interintra_allowed, int eval_motion_mode) {
1427
0
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1428
0
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1429
0
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1430
0
    if (!eval_motion_mode) {
1431
0
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1432
0
    } else {
1433
      // Set the start index appropriately to process motion modes other than
1434
      // simple translation
1435
0
      *mode_index_start = 1;
1436
0
    }
1437
0
  }
1438
0
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1439
0
    *mode_index_end = SIMPLE_TRANSLATION;
1440
0
}
1441
1442
// Increase rd cost of warp and obmc motion modes for low complexity decoding.
1443
static inline void increase_motion_mode_rd(const MB_MODE_INFO *const best_mbmi,
1444
                                           const MB_MODE_INFO *const this_mbmi,
1445
                                           int64_t *const best_scaled_rd,
1446
                                           int64_t *const this_scaled_rd,
1447
                                           int rd_warp_bias_scale_pct,
1448
0
                                           float rd_obmc_bias_scale_pct) {
1449
0
  if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return;
1450
1451
  // Experiments have been performed with increasing the RD cost of warp and
1452
  // obmc motion modes at the below locations of inter mode evaluation.
1453
  // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode().
1454
  // (2). Motion mode evaluation during handle_inter_mode() call.
1455
  // (3). Motion mode evaluation for winner motion modes.
1456
  // (4). Tx search for best inter candidates.
1457
  // Based on the speed quality trade-off results of this speed feature, the rd
1458
  // bias logic is enabled only at (2), (3) and (4).
1459
0
  const double rd_warp_bias_scale = rd_warp_bias_scale_pct / 100.0;
1460
0
  const double rd_obmc_bias_scale = rd_obmc_bias_scale_pct / 100.0;
1461
0
  if (best_mbmi->motion_mode == WARPED_CAUSAL)
1462
0
    *best_scaled_rd += (int64_t)(rd_warp_bias_scale * *best_scaled_rd);
1463
0
  else if (best_mbmi->motion_mode == OBMC_CAUSAL)
1464
0
    *best_scaled_rd += (int64_t)(rd_obmc_bias_scale * *best_scaled_rd);
1465
1466
0
  if (this_mbmi->motion_mode == WARPED_CAUSAL)
1467
0
    *this_scaled_rd += (int64_t)(rd_warp_bias_scale * *this_scaled_rd);
1468
0
  else if (this_mbmi->motion_mode == OBMC_CAUSAL)
1469
0
    *this_scaled_rd += (int64_t)(rd_obmc_bias_scale * *this_scaled_rd);
1470
0
}
1471
1472
/*!\brief AV1 motion mode search
1473
 *
1474
 * \ingroup inter_mode_search
1475
 * Function to search over and determine the motion mode. It will update
1476
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1477
 * WARPED_CAUSAL and determine any necessary side information for the selected
1478
 * motion mode. It will also perform the full transform search, unless the
1479
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1480
 * than an RD corresponding to a full transform search. It will return the
1481
 * RD for the final motion_mode.
1482
 * Do the RD search for a given inter mode and compute all information relevant
1483
 * to the input mode. It will compute the best MV,
1484
 * compound parameters (if the mode is a compound mode) and interpolation filter
1485
 * parameters.
1486
 *
1487
 * \param[in]     cpi               Top-level encoder structure.
1488
 * \param[in]     tile_data         Pointer to struct holding adaptive
1489
 *                                  data/contexts/models for the tile during
1490
 *                                  encoding.
1491
 * \param[in]     x                 Pointer to struct holding all the data for
1492
 *                                  the current macroblock.
1493
 * \param[in]     bsize             Current block size.
1494
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1495
 *                                  information.
1496
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1497
 *                                  for only the Y plane.
1498
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1499
 *                                  for only the UV planes.
1500
 * \param[in]     args              HandleInterModeArgs struct holding
1501
 *                                  miscellaneous arguments for inter mode
1502
 *                                  search. See the documentation for this
1503
 *                                  struct for a description of each member.
1504
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1505
 *                                  It is used for early termination of this
1506
 *                                  search if the RD exceeds this value.
1507
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1508
 *                                  best total RD for a skip mode so far, and
1509
 *                                  skip_rd[1] is the best RD for a skip mode so
1510
 *                                  far in luma. This is used as a speed feature
1511
 *                                  to skip the transform search if the computed
1512
 *                                  skip RD for the current mode is not better
1513
 *                                  than the best skip_rd so far.
1514
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1515
 *                                  This will be modified if a motion search is
1516
 *                                  done in the motion mode search.
1517
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1518
 *                                  prediction. This will eventually hold the
1519
 *                                  final prediction, and the tmp_dst info will
1520
 *                                  be copied here.
1521
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1522
 *                                  do_tx_search (see below) is 0.
1523
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1524
 *                                  a full transform search. This will compute
1525
 *                                  an estimated RD for the modes without the
1526
 *                                  transform search and later perform the full
1527
 *                                  transform search on the best candidates.
1528
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1529
 *                                  information to perform a full transform
1530
 *                                  search only on winning candidates searched
1531
 *                                  with an estimate for transform coding RD.
1532
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1533
 *                                  motion modes other than SIMPLE_TRANSLATION.
1534
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1535
 *                                  the luma plane.
1536
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1537
 * current motion mode being tested should be skipped. It returns 0 if the
1538
 * motion mode search is a success.
1539
 */
1540
static int64_t motion_mode_rd(
1541
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1542
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1543
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1544
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1545
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1546
0
    int eval_motion_mode, int64_t *yrd) {
1547
0
  const AV1_COMMON *const cm = &cpi->common;
1548
0
  const FeatureFlags *const features = &cm->features;
1549
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1550
0
  const int num_planes = av1_num_planes(cm);
1551
0
  MACROBLOCKD *xd = &x->e_mbd;
1552
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1553
0
  const int is_comp_pred = has_second_ref(mbmi);
1554
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1555
0
  const int rate2_nocoeff = rd_stats->rate;
1556
0
  int best_xskip_txfm = 0;
1557
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1558
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1559
0
  const int rate_mv0 = *rate_mv;
1560
0
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1561
0
                                 is_interintra_allowed(mbmi) &&
1562
0
                                 mbmi->compound_idx;
1563
0
  WARP_SAMPLE_INFO *const warp_sample_info =
1564
0
      &x->warp_sample_info[mbmi->ref_frame[0]];
1565
0
  int *pts0 = warp_sample_info->pts;
1566
0
  int *pts_inref0 = warp_sample_info->pts_inref;
1567
1568
0
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1569
0
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1570
0
  av1_invalid_rd_stats(&best_rd_stats);
1571
0
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1572
0
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1573
0
  *yrd = INT64_MAX;
1574
0
  if (features->switchable_motion_mode) {
1575
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1576
    // is allowed.
1577
0
    last_motion_mode_allowed = motion_mode_allowed(
1578
0
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1579
0
  }
1580
1581
0
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1582
    // Collect projection samples used in least squares approximation of
1583
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1584
0
    if (warp_sample_info->num < 0) {
1585
0
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1586
0
    }
1587
0
    mbmi->num_proj_ref = warp_sample_info->num;
1588
0
  }
1589
0
  const int total_samples = mbmi->num_proj_ref;
1590
0
  if (total_samples == 0) {
1591
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1592
    // warped parameters.
1593
0
    last_motion_mode_allowed = OBMC_CAUSAL;
1594
0
  }
1595
1596
0
  const MB_MODE_INFO base_mbmi = *mbmi;
1597
0
  MB_MODE_INFO best_mbmi;
1598
0
  const int interp_filter = features->interp_filter;
1599
0
  const int switchable_rate =
1600
0
      av1_is_interp_needed(xd)
1601
0
          ? av1_get_switchable_rate(x, xd, interp_filter,
1602
0
                                    cm->seq_params->enable_dual_filter)
1603
0
          : 0;
1604
0
  int64_t best_rd = INT64_MAX;
1605
0
  int best_rate_mv = rate_mv0;
1606
0
  const int mi_row = xd->mi_row;
1607
0
  const int mi_col = xd->mi_col;
1608
0
  int mode_index_start, mode_index_end;
1609
0
  const int txfm_rd_gate_level =
1610
0
      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1611
0
                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1612
0
                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
1613
1614
  // Modify the start and end index according to speed features. For example,
1615
  // if SIMPLE_TRANSLATION has already been searched according to
1616
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1617
  // to avoid searching it again.
1618
0
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1619
0
                              last_motion_mode_allowed, interintra_allowed,
1620
0
                              eval_motion_mode);
1621
  // Main function loop. This loops over all of the possible motion modes and
1622
  // computes RD to determine the best one. This process includes computing
1623
  // any necessary side information for the motion mode and performing the
1624
  // transform search.
1625
0
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1626
0
       mode_index++) {
1627
0
    if (args->skip_motion_mode && mode_index) continue;
1628
0
    int tmp_rate2 = rate2_nocoeff;
1629
0
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1630
0
    int tmp_rate_mv = rate_mv0;
1631
1632
0
    *mbmi = base_mbmi;
1633
0
    if (is_interintra_mode) {
1634
      // Only use SIMPLE_TRANSLATION for interintra
1635
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1636
0
    } else {
1637
0
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1638
0
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1639
0
    }
1640
1641
0
    if (cpi->oxcf.algo_cfg.sharpness == 3 &&
1642
0
        (mbmi->motion_mode == OBMC_CAUSAL ||
1643
0
         mbmi->motion_mode == WARPED_CAUSAL))
1644
0
      continue;
1645
1646
    // Do not search OBMC if the probability of selecting it is below a
1647
    // predetermined threshold for this update_type and block size.
1648
0
    const FRAME_UPDATE_TYPE update_type =
1649
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1650
0
    int use_actual_frame_probs = 1;
1651
0
    int prune_obmc;
1652
#if CONFIG_FPMT_TEST
1653
    use_actual_frame_probs =
1654
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1655
    if (!use_actual_frame_probs) {
1656
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1657
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1658
    }
1659
#endif
1660
0
    if (use_actual_frame_probs) {
1661
0
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1662
0
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1663
0
    }
1664
0
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1665
0
        mbmi->motion_mode == OBMC_CAUSAL)
1666
0
      continue;
1667
1668
0
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1669
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1670
      // The prediction is calculated before motion_mode_rd() is called in
1671
      // handle_inter_mode()
1672
0
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1673
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1674
      // OBMC_CAUSAL not allowed for compound prediction
1675
0
      assert(!is_comp_pred);
1676
0
      if (have_newmv_in_inter_mode(this_mode)) {
1677
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1678
0
                                 &mbmi->mv[0], NULL);
1679
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1680
0
      }
1681
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1682
        // Build the predictor according to the current motion vector if it has
1683
        // not already been built
1684
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1685
0
                                      0, av1_num_planes(cm) - 1);
1686
0
      }
1687
      // Build the inter predictor by blending the predictor corresponding to
1688
      // this MV, and the neighboring blocks using the OBMC model
1689
0
      av1_build_obmc_inter_prediction(
1690
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1691
0
          args->left_pred_buf, args->left_pred_stride);
1692
0
#if !CONFIG_REALTIME_ONLY
1693
0
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1694
0
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1695
0
      mbmi->motion_mode = WARPED_CAUSAL;
1696
0
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1697
0
      mbmi->interp_filters =
1698
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1699
1700
0
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1701
0
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1702
      // Select the samples according to motion vector difference
1703
0
      if (mbmi->num_proj_ref > 1) {
1704
0
        mbmi->num_proj_ref = av1_selectSamples(
1705
0
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1706
0
      }
1707
1708
      // Compute the warped motion parameters with a least squares fit
1709
      //  using the collected samples
1710
0
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1711
0
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1712
0
                               &mbmi->wm_params, mi_row, mi_col)) {
1713
0
        assert(!is_comp_pred);
1714
0
        if (have_newmv_in_inter_mode(this_mode)) {
1715
          // Refine MV for NEWMV mode
1716
0
          const int_mv mv0 = mbmi->mv[0];
1717
0
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1718
0
          const int num_proj_ref0 = mbmi->num_proj_ref;
1719
1720
0
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1721
0
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1722
0
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1723
0
                                            &ref_mv.as_mv, NULL);
1724
1725
          // Refine MV in a small range.
1726
0
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1727
0
                               total_samples, cpi->sf.mv_sf.warp_search_method,
1728
0
                               cpi->sf.mv_sf.warp_search_iters);
1729
1730
0
          if (mv0.as_int != mbmi->mv[0].as_int) {
1731
            // Keep the refined MV and WM parameters.
1732
0
            tmp_rate_mv = av1_mv_bit_cost(
1733
0
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1734
0
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1735
0
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1736
0
          } else {
1737
            // Restore the old MV and WM parameters.
1738
0
            mbmi->mv[0] = mv0;
1739
0
            mbmi->wm_params = wm_params0;
1740
0
            mbmi->num_proj_ref = num_proj_ref0;
1741
0
          }
1742
0
        }
1743
1744
        // Build the warped predictor
1745
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1746
0
                                      av1_num_planes(cm) - 1);
1747
0
      } else {
1748
0
        continue;
1749
0
      }
1750
0
#endif  // !CONFIG_REALTIME_ONLY
1751
0
    } else if (is_interintra_mode) {
1752
0
      const int ret =
1753
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1754
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1755
0
      if (ret < 0) continue;
1756
0
    }
1757
1758
    // If we are searching newmv and the mv is the same as refmv, skip the
1759
    // current mode
1760
0
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1761
1762
    // Update rd_stats for the current motion mode
1763
0
    txfm_info->skip_txfm = 0;
1764
0
    rd_stats->dist = 0;
1765
0
    rd_stats->sse = 0;
1766
0
    rd_stats->skip_txfm = 1;
1767
0
    rd_stats->rate = tmp_rate2;
1768
0
    const ModeCosts *mode_costs = &x->mode_costs;
1769
0
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1770
0
    if (interintra_allowed) {
1771
0
      rd_stats->rate +=
1772
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1773
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1774
0
    }
1775
0
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1776
0
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1777
0
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1778
0
        rd_stats->rate +=
1779
0
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1780
0
      } else {
1781
0
        rd_stats->rate +=
1782
0
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1783
0
      }
1784
0
    }
1785
1786
0
    int64_t this_yrd = INT64_MAX;
1787
1788
0
    if (!do_tx_search) {
1789
      // Avoid doing a transform search here to speed up the overall mode
1790
      // search. It will be done later in the mode search if the current
1791
      // motion mode seems promising.
1792
0
      int64_t curr_sse = -1;
1793
0
      int64_t sse_y = -1;
1794
0
      int est_residue_cost = 0;
1795
0
      int64_t est_dist = 0;
1796
0
      int64_t est_rd = 0;
1797
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1798
0
        curr_sse = get_sse(cpi, x, &sse_y);
1799
0
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1800
0
                                                 &est_residue_cost, &est_dist);
1801
0
        (void)has_est_rd;
1802
0
        assert(has_est_rd);
1803
0
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1804
0
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1805
0
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1806
0
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1807
0
            NULL, &curr_sse, NULL, NULL, NULL);
1808
0
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1809
0
      }
1810
0
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1811
0
      if (est_rd * 0.80 > *best_est_rd) {
1812
0
        mbmi->ref_frame[1] = ref_frame_1;
1813
0
        continue;
1814
0
      }
1815
0
      const int mode_rate = rd_stats->rate;
1816
0
      rd_stats->rate += est_residue_cost;
1817
0
      rd_stats->dist = est_dist;
1818
0
      rd_stats->rdcost = est_rd;
1819
0
      if (rd_stats->rdcost < *best_est_rd) {
1820
0
        *best_est_rd = rd_stats->rdcost;
1821
0
        assert(sse_y >= 0);
1822
0
        ref_skip_rd[1] = txfm_rd_gate_level
1823
0
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1824
0
                             : INT64_MAX;
1825
0
      }
1826
0
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1827
0
        if (!is_comp_pred) {
1828
0
          assert(curr_sse >= 0);
1829
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1830
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1831
0
                                rd_stats_uv, mbmi);
1832
0
        }
1833
0
      } else {
1834
0
        assert(curr_sse >= 0);
1835
0
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1836
0
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1837
0
                              rd_stats_uv, mbmi);
1838
0
      }
1839
0
      mbmi->skip_txfm = 0;
1840
0
    } else {
1841
      // Perform full transform search
1842
0
      int64_t skip_rd = INT64_MAX;
1843
0
      int64_t skip_rdy = INT64_MAX;
1844
0
      if (txfm_rd_gate_level) {
1845
        // Check if the mode is good enough based on skip RD
1846
0
        int64_t sse_y = INT64_MAX;
1847
0
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1848
0
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1849
0
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1850
0
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1851
0
                                        txfm_rd_gate_level, 0);
1852
0
        if (!eval_txfm) continue;
1853
0
      }
1854
1855
      // Do transform search
1856
0
      const int mode_rate = rd_stats->rate;
1857
0
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1858
0
                           rd_stats->rate, ref_best_rd)) {
1859
0
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1860
0
          return INT64_MAX;
1861
0
        }
1862
0
        continue;
1863
0
      }
1864
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1865
0
      const int y_rate =
1866
0
          rd_stats->skip_txfm
1867
0
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1868
0
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1869
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1870
1871
0
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1872
0
      if (curr_rd < ref_best_rd) {
1873
0
        ref_best_rd = curr_rd;
1874
0
        ref_skip_rd[0] = skip_rd;
1875
0
        ref_skip_rd[1] = skip_rdy;
1876
0
      }
1877
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1878
0
        inter_mode_data_push(
1879
0
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1880
0
            rd_stats_y->rate + rd_stats_uv->rate +
1881
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1882
0
      }
1883
0
    }
1884
1885
0
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1886
0
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1887
0
        mbmi->interp_filters =
1888
0
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1889
0
      }
1890
0
    }
1891
1892
0
    if (this_yrd < INT64_MAX) {
1893
0
      adjust_cost(cpi, x, &this_yrd, /*is_inter_pred=*/true);
1894
0
    }
1895
0
    adjust_rdcost(cpi, x, rd_stats, /*is_inter_pred=*/true);
1896
    // Bug 494653438: If do_tx_search is 0, rd_stats_y is uninitialized, so
1897
    // valgrind will warn if we use rd_stats_y->rdcost in a conditional.
1898
0
    if (!do_tx_search || rd_stats_y->rdcost < INT64_MAX) {
1899
0
      adjust_rdcost(cpi, x, rd_stats_y, /*is_inter_pred=*/true);
1900
0
    }
1901
1902
0
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1903
0
    if (mode_index == 0) {
1904
0
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1905
0
    }
1906
0
    int64_t best_scaled_rd = best_rd;
1907
0
    int64_t this_scaled_rd = tmp_rd;
1908
0
    if (mode_index != 0)
1909
0
      increase_motion_mode_rd(&best_mbmi, mbmi, &best_scaled_rd,
1910
0
                              &this_scaled_rd,
1911
0
                              cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
1912
0
                              cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
1913
1914
0
    if (mode_index == 0 || this_scaled_rd < best_scaled_rd) {
1915
      // Update best_rd data if this is the best motion mode so far
1916
0
      best_mbmi = *mbmi;
1917
0
      best_rd = tmp_rd;
1918
0
      best_rd_stats = *rd_stats;
1919
0
      best_rd_stats_y = *rd_stats_y;
1920
0
      best_rate_mv = tmp_rate_mv;
1921
0
      *yrd = this_yrd;
1922
0
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1923
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1924
0
      best_xskip_txfm = mbmi->skip_txfm;
1925
0
    }
1926
0
  }
1927
  // Update RD and mbmi stats for selected motion mode
1928
0
  mbmi->ref_frame[1] = ref_frame_1;
1929
0
  *rate_mv = best_rate_mv;
1930
0
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1931
0
    av1_invalid_rd_stats(rd_stats);
1932
0
    restore_dst_buf(xd, *orig_dst, num_planes);
1933
0
    return INT64_MAX;
1934
0
  }
1935
0
  *mbmi = best_mbmi;
1936
0
  *rd_stats = best_rd_stats;
1937
0
  *rd_stats_y = best_rd_stats_y;
1938
0
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1939
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1940
0
  txfm_info->skip_txfm = best_xskip_txfm;
1941
1942
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1943
0
  return 0;
1944
0
}
1945
1946
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1947
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1948
0
                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
1949
0
  assert(bsize < BLOCK_SIZES_ALL);
1950
0
  const AV1_COMMON *cm = &cpi->common;
1951
0
  const int num_planes = av1_num_planes(cm);
1952
0
  MACROBLOCKD *const xd = &x->e_mbd;
1953
0
  const int mi_row = xd->mi_row;
1954
0
  const int mi_col = xd->mi_col;
1955
0
  int64_t total_sse = 0;
1956
0
  int64_t this_rd = INT64_MAX;
1957
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1958
0
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1959
1960
0
  for (int plane = 0; plane < num_planes; ++plane) {
1961
    // Call av1_enc_build_inter_predictor() for one plane at a time.
1962
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1963
0
                                  plane, plane);
1964
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1965
0
    const BLOCK_SIZE plane_bsize =
1966
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1967
1968
0
    av1_subtract_plane(x, plane_bsize, plane);
1969
1970
0
    int64_t sse =
1971
0
        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1972
0
    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1973
0
    sse <<= 4;
1974
0
    total_sse += sse;
1975
    // When current rd cost is more than the best rd, skip evaluation of
1976
    // remaining planes.
1977
0
    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1978
0
    if (this_rd > best_rd) break;
1979
0
  }
1980
1981
0
  rd_stats->dist = rd_stats->sse = total_sse;
1982
0
  rd_stats->rdcost = this_rd;
1983
1984
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1985
0
  return 0;
1986
0
}
1987
1988
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1989
// mode
1990
// Note(rachelbarker): This speed feature currently does not interact correctly
1991
// with global motion. The issue is that, when global motion is used, GLOBALMV
1992
// produces a different prediction to NEARESTMV/NEARMV even if the motion
1993
// vectors are the same. Thus GLOBALMV should not be pruned in this case.
1994
static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1995
                                      int ref_idx,
1996
                                      const MV_REFERENCE_FRAME *ref_frame,
1997
0
                                      PREDICTION_MODE single_mode) {
1998
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1999
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
2000
0
  assert(single_mode != NEWMV);
2001
0
  if (single_mode == NEARESTMV) {
2002
0
    return 0;
2003
0
  } else if (single_mode == NEARMV) {
2004
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
2005
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
2006
0
    if (ref_mv_count < 2) return 1;
2007
0
  } else if (single_mode == GLOBALMV) {
2008
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
2009
0
    if (ref_mv_count == 0) return 1;
2010
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
2011
0
    else if (ref_mv_count == 1)
2012
0
      return 0;
2013
2014
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
2015
    // Check GLOBALMV is matching with any mv in ref_mv_stack
2016
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
2017
0
      int_mv this_mv;
2018
2019
0
      if (ref_idx == 0)
2020
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
2021
0
      else
2022
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
2023
2024
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
2025
0
        return 1;
2026
0
    }
2027
0
  }
2028
0
  return 0;
2029
0
}
2030
2031
static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
2032
                              int ref_idx, int ref_mv_idx,
2033
                              int skip_repeated_ref_mv,
2034
                              const MV_REFERENCE_FRAME *ref_frame,
2035
0
                              const MB_MODE_INFO_EXT *mbmi_ext) {
2036
0
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2037
0
  assert(is_inter_singleref_mode(single_mode));
2038
0
  if (single_mode == NEWMV) {
2039
0
    this_mv->as_int = INVALID_MV;
2040
0
  } else if (single_mode == GLOBALMV) {
2041
0
    if (skip_repeated_ref_mv &&
2042
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2043
0
      return 0;
2044
0
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2045
0
  } else {
2046
0
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
2047
0
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2048
0
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
2049
0
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
2050
0
      assert(ref_mv_offset >= 0);
2051
0
      if (ref_idx == 0) {
2052
0
        *this_mv =
2053
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
2054
0
      } else {
2055
0
        *this_mv =
2056
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
2057
0
      }
2058
0
    } else {
2059
0
      if (skip_repeated_ref_mv &&
2060
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2061
0
        return 0;
2062
0
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2063
0
    }
2064
0
  }
2065
0
  return 1;
2066
0
}
2067
2068
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
2069
// population
2070
static inline int skip_nearest_near_mv_using_refmv_weight(
2071
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
2072
0
    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
2073
0
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
2074
  // Do not skip the mode if the current block has not yet obtained a valid
2075
  // inter mode.
2076
0
  if (!is_inter_mode(best_mode)) return 0;
2077
2078
0
  const MACROBLOCKD *xd = &x->e_mbd;
2079
  // Do not skip the mode if both the top and left neighboring blocks are not
2080
  // available.
2081
0
  if (!xd->left_available || !xd->up_available) return 0;
2082
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2083
0
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
2084
0
  const int ref_mv_count =
2085
0
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
2086
2087
0
  if (ref_mv_count == 0) return 0;
2088
  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
2089
0
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
2090
2091
  // Count number of ref mvs populated from nearest candidates
2092
0
  int nearest_refmv_count = 0;
2093
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
2094
0
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
2095
0
  }
2096
2097
  // nearest_refmv_count indicates the closeness of block motion characteristics
2098
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
2099
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
2100
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
2101
  // mode since these modes work well for blocks that shares similar motion
2102
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
2103
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
2104
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
2105
0
  const int prune_thresh = 1 + (ref_mv_count >= 2);
2106
0
  if (nearest_refmv_count < prune_thresh) return 1;
2107
0
  return 0;
2108
0
}
2109
2110
// This function update the non-new mv for the current prediction mode
2111
static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
2112
                               const AV1_COMMON *cm, const MACROBLOCK *x,
2113
0
                               int skip_repeated_ref_mv) {
2114
0
  const MACROBLOCKD *xd = &x->e_mbd;
2115
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2116
0
  const int is_comp_pred = has_second_ref(mbmi);
2117
2118
0
  int ret = 1;
2119
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2120
0
    int_mv this_mv;
2121
0
    this_mv.as_int = INVALID_MV;
2122
0
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
2123
0
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
2124
0
    if (!ret) return 0;
2125
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
2126
0
    if (single_mode == NEWMV) {
2127
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2128
0
      cur_mv[i] =
2129
0
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2130
0
                         .this_mv
2131
0
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2132
0
                         .comp_mv;
2133
0
    } else {
2134
0
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
2135
0
    }
2136
0
  }
2137
0
  return ret;
2138
0
}
2139
2140
static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
2141
                               const MB_MODE_INFO_EXT *mbmi_ext,
2142
                               const int (*const drl_mode_cost0)[2],
2143
0
                               int8_t ref_frame_type) {
2144
0
  int cost = 0;
2145
0
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
2146
0
    for (int idx = 0; idx < 2; ++idx) {
2147
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2148
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2149
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
2150
0
        if (mbmi->ref_mv_idx == idx) return cost;
2151
0
      }
2152
0
    }
2153
0
    return cost;
2154
0
  }
2155
2156
0
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
2157
0
    for (int idx = 1; idx < 3; ++idx) {
2158
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2159
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2160
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
2161
0
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
2162
0
      }
2163
0
    }
2164
0
    return cost;
2165
0
  }
2166
0
  return cost;
2167
0
}
2168
2169
static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
2170
                                        const MB_MODE_INFO *const mbmi,
2171
0
                                        PREDICTION_MODE this_mode) {
2172
0
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
2173
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2174
0
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
2175
0
    if (single_mode == NEWMV &&
2176
0
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
2177
0
      return 0;
2178
0
    }
2179
0
  }
2180
0
  return 1;
2181
0
}
2182
2183
static int get_drl_refmv_count(const MACROBLOCK *const x,
2184
                               const MV_REFERENCE_FRAME *ref_frame,
2185
0
                               PREDICTION_MODE mode) {
2186
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2187
0
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2188
0
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
2189
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
2190
0
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
2191
0
  const int has_drl =
2192
0
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
2193
0
  const int ref_set =
2194
0
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
2195
2196
0
  return ref_set;
2197
0
}
2198
2199
// Checks if particular ref_mv_idx should be pruned.
2200
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
2201
                                         const int qindex,
2202
0
                                         const int ref_mv_idx) {
2203
0
  if (reduce_inter_modes >= 3) return 1;
2204
  // Q-index logic based pruning is enabled only for
2205
  // reduce_inter_modes = 2.
2206
0
  assert(reduce_inter_modes == 2);
2207
  // When reduce_inter_modes=2, pruning happens as below based on q index.
2208
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
2209
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
2210
  // For q index range between 171 and 255: no pruning.
2211
0
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
2212
0
  return (ref_mv_idx >= min_prune_ref_mv_idx);
2213
0
}
2214
2215
// Whether this reference motion vector can be skipped, based on initial
2216
// heuristics.
2217
static bool ref_mv_idx_early_breakout(
2218
    const SPEED_FEATURES *const sf,
2219
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
2220
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
2221
0
    int ref_mv_idx) {
2222
0
  MACROBLOCKD *xd = &x->e_mbd;
2223
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2224
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2225
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2226
0
  const int is_comp_pred = has_second_ref(mbmi);
2227
0
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
2228
0
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
2229
0
        mbmi->ref_frame[0] == LAST3_FRAME ||
2230
0
        mbmi->ref_frame[1] == LAST2_FRAME ||
2231
0
        mbmi->ref_frame[1] == LAST3_FRAME) {
2232
0
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2233
0
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2234
0
          REF_CAT_LEVEL) {
2235
0
        return true;
2236
0
      }
2237
0
    }
2238
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
2239
0
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
2240
0
        have_newmv_in_inter_mode(mbmi->mode)) {
2241
0
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
2242
0
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
2243
0
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2244
0
        const int do_prune = prune_ref_mv_idx_using_qindex(
2245
0
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
2246
0
        if (do_prune &&
2247
0
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2248
0
             REF_CAT_LEVEL)) {
2249
0
          return true;
2250
0
        }
2251
0
      }
2252
0
    }
2253
0
  }
2254
2255
0
  mbmi->ref_mv_idx = ref_mv_idx;
2256
0
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
2257
0
    return true;
2258
0
  }
2259
0
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
2260
0
  const int drl_cost = get_drl_cost(
2261
0
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
2262
0
  est_rd_rate += drl_cost;
2263
0
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
2264
0
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2265
0
    return true;
2266
0
  }
2267
0
  return false;
2268
0
}
2269
2270
// Compute the estimated RD cost for the motion vector with simple translation.
2271
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
2272
                                          HandleInterModeArgs *args,
2273
                                          int ref_mv_idx, int64_t ref_best_rd,
2274
0
                                          BLOCK_SIZE bsize) {
2275
0
  MACROBLOCKD *xd = &x->e_mbd;
2276
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2277
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2278
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2279
0
  const AV1_COMMON *cm = &cpi->common;
2280
0
  const int is_comp_pred = has_second_ref(mbmi);
2281
0
  const ModeCosts *mode_costs = &x->mode_costs;
2282
2283
0
  struct macroblockd_plane *p = xd->plane;
2284
0
  const BUFFER_SET orig_dst = {
2285
0
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2286
0
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2287
0
  };
2288
0
  RD_STATS rd_stats;
2289
0
  av1_init_rd_stats(&rd_stats);
2290
2291
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2292
0
  mbmi->comp_group_idx = 0;
2293
0
  mbmi->compound_idx = 1;
2294
0
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
2295
0
    mbmi->ref_frame[1] = NONE_FRAME;
2296
0
  }
2297
0
  int16_t mode_ctx =
2298
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2299
2300
0
  mbmi->num_proj_ref = 0;
2301
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2302
0
  mbmi->ref_mv_idx = ref_mv_idx;
2303
2304
0
  rd_stats.rate += args->ref_frame_cost + args->single_comp_cost;
2305
0
  const int drl_cost =
2306
0
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2307
0
  rd_stats.rate += drl_cost;
2308
2309
0
  int_mv cur_mv[2];
2310
0
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2311
0
    return INT64_MAX;
2312
0
  }
2313
0
  assert(have_nearmv_in_inter_mode(mbmi->mode));
2314
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2315
0
    mbmi->mv[i].as_int = cur_mv[i].as_int;
2316
0
  }
2317
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2318
0
  rd_stats.rate += ref_mv_cost;
2319
2320
0
  if (RDCOST(x->rdmult, rd_stats.rate, 0) > ref_best_rd) {
2321
0
    return INT64_MAX;
2322
0
  }
2323
2324
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2325
0
  mbmi->num_proj_ref = 0;
2326
0
  if (is_comp_pred) {
2327
    // Only compound_average
2328
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2329
0
    mbmi->comp_group_idx = 0;
2330
0
    mbmi->compound_idx = 1;
2331
0
  }
2332
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2333
2334
0
  const int mi_row = xd->mi_row;
2335
0
  const int mi_col = xd->mi_col;
2336
0
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2337
0
                                AOM_PLANE_Y, AOM_PLANE_Y);
2338
0
  int est_rate;
2339
0
  int64_t est_dist;
2340
0
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2341
0
                                  NULL, NULL, NULL, NULL, NULL);
2342
0
  return RDCOST(x->rdmult, rd_stats.rate + est_rate, est_dist);
2343
0
}
2344
2345
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2346
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2347
// it is included.
2348
0
static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2349
2350
0
static inline bool mask_check_bit(int mask, int index) {
2351
0
  return (mask >> index) & 0x1;
2352
0
}
2353
2354
// Before performing the full MV search in handle_inter_mode, do a simple
2355
// translation search and see if we can eliminate any motion vectors.
2356
// Returns an integer where, if the i-th bit is set, it means that the i-th
2357
// motion vector should be searched. This is only set for NEAR_MV.
2358
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2359
                                HandleInterModeArgs *const args,
2360
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2361
0
                                const int ref_set) {
2362
  // If the number of ref mv count is equal to 1, do not prune the same. It
2363
  // is better to evaluate the same than to prune it.
2364
0
  if (ref_set == 1) return 1;
2365
0
  AV1_COMMON *const cm = &cpi->common;
2366
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2367
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2368
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2369
2370
  // Only search indices if they have some chance of being good.
2371
0
  int good_indices = 0;
2372
0
  for (int i = 0; i < ref_set; ++i) {
2373
0
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2374
0
                                  ref_best_rd, i)) {
2375
0
      continue;
2376
0
    }
2377
0
    mask_set_bit(&good_indices, i);
2378
0
  }
2379
2380
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2381
  // is large enough. If these conditions are not met, return all good indices
2382
  // found so far.
2383
0
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2384
0
    return good_indices;
2385
0
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2386
0
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2387
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2388
  // so b/2384 can be resolved.
2389
0
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2390
0
      (mbmi->ref_frame[1] > 0 &&
2391
0
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2392
0
    return good_indices;
2393
0
  }
2394
2395
  // Calculate the RD cost for the motion vectors using simple translation.
2396
0
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2397
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2398
    // If this index is bad, ignore it.
2399
0
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2400
0
      continue;
2401
0
    }
2402
0
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2403
0
        cpi, x, args, ref_mv_idx, ref_best_rd, bsize);
2404
0
  }
2405
  // Find the index with the best RD cost.
2406
0
  int best_idx = 0;
2407
0
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2408
0
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2409
0
      best_idx = i;
2410
0
    }
2411
0
  }
2412
  // Only include indices that are good and within a % of the best.
2413
0
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2414
  // If the simple translation cost is not within this multiple of the
2415
  // best RD, skip it. Note that the cutoff is derived experimentally.
2416
0
  const double ref_dth = 5;
2417
0
  int result = 0;
2418
0
  for (int i = 0; i < ref_set; ++i) {
2419
0
    if (mask_check_bit(good_indices, i) &&
2420
0
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2421
0
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2422
0
      mask_set_bit(&result, i);
2423
0
    }
2424
0
  }
2425
0
  return result;
2426
0
}
2427
2428
/*!\brief Motion mode information for inter mode search speedup.
2429
 *
2430
 * Used in a speed feature to search motion modes other than
2431
 * SIMPLE_TRANSLATION only on winning candidates.
2432
 */
2433
typedef struct motion_mode_candidate {
2434
  /*!
2435
   * Mode info for the motion mode candidate.
2436
   */
2437
  MB_MODE_INFO mbmi;
2438
  /*!
2439
   * Rate describing the cost of the motion vectors for this candidate.
2440
   */
2441
  int rate_mv;
2442
  /*!
2443
   * Rate before motion mode search and transform coding is applied.
2444
   */
2445
  int rate2_nocoeff;
2446
  /*!
2447
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2448
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2449
   * candidate.
2450
   */
2451
  int skip_motion_mode;
2452
  /*!
2453
   * Total RD cost for this candidate.
2454
   */
2455
  int64_t rd_cost;
2456
} motion_mode_candidate;
2457
2458
/*!\cond */
2459
typedef struct motion_mode_best_st_candidate {
2460
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2461
  int num_motion_mode_cand;
2462
} motion_mode_best_st_candidate;
2463
2464
// Checks if the current reference frame matches with neighbouring block's
2465
// (top/left) reference frames
2466
static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2467
0
                                               MB_MODE_INFO *nb_mbmi) {
2468
0
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2469
0
                                          nb_mbmi->ref_frame[1] };
2470
0
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2471
0
                                           cur_mbmi->ref_frame[1] };
2472
0
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2473
0
  int match_found = 0;
2474
2475
0
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2476
0
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2477
0
        (cur_ref_frames[i] == nb_ref_frames[1]))
2478
0
      match_found = 1;
2479
0
  }
2480
0
  return match_found;
2481
0
}
2482
2483
static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2484
0
                                              MACROBLOCKD *xd) {
2485
0
  if (!xd->up_available) return 1;
2486
0
  const int mi_col = xd->mi_col;
2487
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2488
  // prev_row_mi points into the mi array, starting at the beginning of the
2489
  // previous row.
2490
0
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2491
0
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2492
0
  uint8_t mi_step;
2493
0
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2494
0
       above_mi_col += mi_step) {
2495
0
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2496
0
    mi_step = mi_size_wide[above_mi[0]->bsize];
2497
0
    int match_found = 0;
2498
0
    if (is_inter_block(*above_mi))
2499
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2500
0
    if (match_found) return 1;
2501
0
  }
2502
0
  return 0;
2503
0
}
2504
2505
static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2506
0
                                             MACROBLOCKD *xd) {
2507
0
  if (!xd->left_available) return 1;
2508
0
  const int mi_row = xd->mi_row;
2509
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2510
  // prev_col_mi points into the mi array, starting at the top of the
2511
  // previous column
2512
0
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2513
0
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2514
0
  uint8_t mi_step;
2515
0
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2516
0
       left_mi_row += mi_step) {
2517
0
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2518
0
    mi_step = mi_size_high[left_mi[0]->bsize];
2519
0
    int match_found = 0;
2520
0
    if (is_inter_block(*left_mi))
2521
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2522
0
    if (match_found) return 1;
2523
0
  }
2524
0
  return 0;
2525
0
}
2526
/*!\endcond */
2527
2528
/*! \brief Struct used to hold TPL data to
2529
 * narrow down parts of the inter mode search.
2530
 */
2531
typedef struct {
2532
  /*!
2533
   * The best inter cost out of all of the reference frames.
2534
   */
2535
  int64_t best_inter_cost;
2536
  /*!
2537
   * The inter cost for each reference frame.
2538
   */
2539
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2540
} PruneInfoFromTpl;
2541
2542
#if !CONFIG_REALTIME_ONLY
2543
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2544
static inline void get_block_level_tpl_stats(
2545
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2546
0
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2547
0
  AV1_COMMON *const cm = &cpi->common;
2548
2549
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2550
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2551
0
  const int tpl_idx = cpi->gf_frame_index;
2552
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2553
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2554
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2555
0
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2556
0
  const int mi_wide = mi_size_wide[bsize];
2557
0
  const int mi_high = mi_size_high[bsize];
2558
0
  const int tpl_stride = tpl_frame->stride;
2559
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2560
0
  const int mi_col_sr =
2561
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2562
0
  const int mi_col_end_sr =
2563
0
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2564
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2565
2566
0
  const int row_step = step;
2567
0
  const int col_step_sr =
2568
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2569
0
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2570
0
       row += row_step) {
2571
0
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2572
0
         col += col_step_sr) {
2573
0
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2574
0
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2575
2576
      // Sums up the inter cost of corresponding ref frames
2577
0
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2578
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2579
0
            this_stats->pred_error[ref_idx];
2580
0
      }
2581
0
    }
2582
0
  }
2583
2584
  // Computes the best inter cost (minimum inter_cost)
2585
0
  int64_t best_inter_cost = INT64_MAX;
2586
0
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2587
0
    const int64_t cur_inter_cost =
2588
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2589
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2590
    // calculating the minimum inter_cost
2591
0
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2592
0
        valid_refs[ref_idx])
2593
0
      best_inter_cost = cur_inter_cost;
2594
0
  }
2595
0
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2596
0
}
2597
#endif
2598
2599
static inline int prune_modes_based_on_tpl_stats(
2600
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2601
0
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2602
0
  const int is_ref_last2 = refs[0] == LAST2_FRAME || refs[1] == LAST2_FRAME;
2603
0
  if (prune_mode_level == 1 && !is_ref_last2) return 0;
2604
2605
0
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2606
0
  if ((prune_mode_level == 2) && have_newmv) return 0;
2607
2608
0
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2609
0
  if (best_inter_cost == INT64_MAX) return 0;
2610
2611
0
  int64_t cur_inter_cost;
2612
2613
0
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2614
0
  if (!is_comp_pred) {
2615
0
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2616
0
  } else {
2617
0
    const int64_t inter_cost_ref0 =
2618
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2619
0
    const int64_t inter_cost_ref1 =
2620
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2621
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2622
    // more aggressive pruning
2623
0
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2624
0
  }
2625
2626
0
  if (is_ref_last2) return (cur_inter_cost > best_inter_cost);
2627
2628
0
  const int is_globalmv =
2629
0
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2630
0
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2631
0
  const int prune_level = prune_mode_level - 2;
2632
2633
  // Thresholds used for pruning:
2634
  // Lower value indicates aggressive pruning and higher value indicates
2635
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2636
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2.
2637
  // prune_index 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2638
0
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2639
0
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2640
0
  };
2641
2642
  // Prune the mode if cur_inter_cost is greater than threshold times
2643
  // best_inter_cost
2644
0
  if (cur_inter_cost >
2645
0
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2646
0
        best_inter_cost) >>
2647
0
       2))
2648
0
    return 1;
2649
0
  return 0;
2650
0
}
2651
2652
/*!\brief High level function to select parameters for compound mode.
2653
 *
2654
 * \ingroup inter_mode_search
2655
 * The main search functionality is done in the call to av1_compound_type_rd().
2656
 *
2657
 * \param[in]     cpi               Top-level encoder structure.
2658
 * \param[in]     x                 Pointer to struct holding all the data for
2659
 *                                  the current macroblock.
2660
 * \param[in]     args              HandleInterModeArgs struct holding
2661
 *                                  miscellaneous arguments for inter mode
2662
 *                                  search. See the documentation for this
2663
 *                                  struct for a description of each member.
2664
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2665
 *                                  It is used for early termination of this
2666
 *                                  search if the RD exceeds this value.
2667
 * \param[in,out] cur_mv            Current motion vector.
2668
 * \param[in]     bsize             Current block size.
2669
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2670
                                    compound mode.
2671
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2672
 *                                  allocated buffers for the compound
2673
 *                                  predictors and masks in the compound type
2674
 *                                  search.
2675
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2676
 *                                  prediction. This will eventually hold the
2677
 *                                  final prediction, and the tmp_dst info will
2678
 *                                  be copied here.
2679
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2680
 *                                  computed prediction.
2681
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2682
 *                                  This will be modified if a motion search is
2683
 *                                  done in the motion mode search.
2684
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2685
 *                                  information.
2686
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2687
 *                                  best total RD for a skip mode so far, and
2688
 *                                  skip_rd[1] is the best RD for a skip mode so
2689
 *                                  far in luma. This is used as a speed feature
2690
 *                                  to skip the transform search if the computed
2691
 *                                  skip RD for the current mode is not better
2692
 *                                  than the best skip_rd so far.
2693
 * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2694
 *                                  predictor. If this is 0, the inter predictor
2695
 *                                  has already been built and thus we can avoid
2696
 *                                  repeating computation.
2697
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2698
 * a viable candidate.
2699
 */
2700
static int process_compound_inter_mode(
2701
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2702
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2703
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2704
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2705
0
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2706
0
  MACROBLOCKD *xd = &x->e_mbd;
2707
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2708
0
  const AV1_COMMON *cm = &cpi->common;
2709
0
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2710
0
                                   cm->seq_params->enable_masked_compound;
2711
0
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2712
0
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2713
2714
0
  const int num_planes = av1_num_planes(cm);
2715
0
  const int mi_row = xd->mi_row;
2716
0
  const int mi_col = xd->mi_col;
2717
0
  int is_luma_interp_done = 0;
2718
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2719
2720
0
  int64_t best_rd_compound;
2721
0
  int64_t rd_thresh;
2722
0
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2723
0
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2724
0
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2725
0
                                         comp_type_rd_scale);
2726
  // Select compound type and any parameters related to that type
2727
  // (for example, the mask parameters if it is a masked mode) and compute
2728
  // the RD
2729
0
  *compmode_interinter_cost = av1_compound_type_rd(
2730
0
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2731
0
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2732
0
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2733
0
  if (ref_best_rd < INT64_MAX &&
2734
0
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2735
0
          ref_best_rd) {
2736
0
    restore_dst_buf(xd, *orig_dst, num_planes);
2737
0
    return 1;
2738
0
  }
2739
2740
  // Build only uv predictor for COMPOUND_AVERAGE.
2741
  // Note there is no need to call av1_enc_build_inter_predictor
2742
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2743
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2744
  // rather than the tmp_buf.
2745
0
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2746
0
    if (num_planes > 1) {
2747
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2748
0
                                    AOM_PLANE_U, num_planes - 1);
2749
0
    }
2750
0
    *skip_build_pred = 1;
2751
0
  }
2752
0
  return 0;
2753
0
}
2754
2755
// Speed feature to prune out MVs that are similar to previous MVs if they
2756
// don't achieve the best RD advantage.
2757
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2758
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2759
0
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2760
0
  int i;
2761
0
  const int is_comp_pred = has_second_ref(mbmi);
2762
0
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2763
2764
  // Skip the evaluation if an MV match is found.
2765
0
  if (ref_mv_idx > 0) {
2766
0
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2767
0
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2768
2769
0
      int mv_diff = 0;
2770
0
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2771
0
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2772
0
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2773
0
      }
2774
2775
      // If this mode is not the best one, and current MV is similar to
2776
      // previous stored MV, terminate this ref_mv_idx evaluation.
2777
0
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2778
0
    }
2779
0
  }
2780
2781
0
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2782
0
    for (i = 0; i < is_comp_pred + 1; ++i)
2783
0
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2784
0
  }
2785
2786
0
  return 0;
2787
0
}
2788
2789
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2790
 *
2791
 * \ingroup inter_mode_search
2792
 *
2793
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2794
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2795
 * Else returns 0.
2796
 *
2797
 * Note that the sse of here comes from single_motion_search. So it is
2798
 * interpolated with the filter in motion search, not the actual interpolation
2799
 * filter used in encoding.
2800
 *
2801
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2802
 * \param[in]     x                 Pointer to struct holding all the data for
2803
 *                                  the current macroblock.
2804
 * \param[in]     bsize             The current block_size.
2805
 * \param[in]     args              The args to handle_inter_mode, used to track
2806
 *                                  the best SSE.
2807
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2808
 *                                       prune_zero_mv_with_sse value
2809
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2810
 */
2811
static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2812
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
2813
                                         const HandleInterModeArgs *args,
2814
0
                                         int prune_zero_mv_with_sse) {
2815
0
  const MACROBLOCKD *xd = &x->e_mbd;
2816
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2817
2818
0
  const int is_comp_pred = has_second_ref(mbmi);
2819
0
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2820
2821
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2822
0
    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2823
      // Pruning logic only works for IDENTITY type models
2824
      // Note: In theory we could apply similar logic for TRANSLATION
2825
      // type models, but we do not code these due to a spec bug
2826
      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2827
0
      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2828
0
      return 0;
2829
0
    }
2830
2831
    // Don't prune if we have invalid data
2832
0
    assert(mbmi->mv[idx].as_int == 0);
2833
0
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2834
0
      return 0;
2835
0
    }
2836
0
  }
2837
2838
  // Sum up the sse of ZEROMV and best NEWMV
2839
0
  unsigned int this_sse_sum = 0;
2840
0
  unsigned int best_sse_sum = 0;
2841
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2842
0
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2843
0
    const struct macroblockd_plane *pd = xd->plane;
2844
0
    const struct buf_2d *src_buf = &p->src;
2845
0
    const struct buf_2d *ref_buf = &pd->pre[idx];
2846
0
    const uint8_t *src = src_buf->buf;
2847
0
    const uint8_t *ref = ref_buf->buf;
2848
0
    const int src_stride = src_buf->stride;
2849
0
    const int ref_stride = ref_buf->stride;
2850
2851
0
    unsigned int this_sse;
2852
0
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2853
0
    this_sse_sum += this_sse;
2854
2855
0
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2856
0
    best_sse_sum += best_sse;
2857
0
  }
2858
2859
0
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2860
0
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2861
0
    return 1;
2862
0
  }
2863
2864
0
  return 0;
2865
0
}
2866
2867
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2868
 *
2869
 * \ingroup inter_mode_search
2870
 *
2871
 * Does a simple interpolation filter search during winner mode evaluation. This
2872
 * is currently only used by realtime mode as \ref
2873
 * av1_interpolation_filter_search is not called during realtime encoding.
2874
 *
2875
 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2876
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2877
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2878
 *  *
2879
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2880
 *                                  flags.
2881
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2882
 *                                  used to access the buffers.
2883
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2884
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2885
 * \param[in]     bsize             The current block_size.
2886
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2887
 */
2888
static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2889
                                      int mi_row, int mi_col,
2890
0
                                      BLOCK_SIZE bsize) {
2891
0
  static const InterpFilters filters_ref_set[3] = {
2892
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2893
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2894
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2895
0
  };
2896
2897
0
  const AV1_COMMON *const cm = &cpi->common;
2898
0
  MACROBLOCKD *const xd = &x->e_mbd;
2899
0
  MB_MODE_INFO *const mi = xd->mi[0];
2900
0
  int64_t best_cost = INT64_MAX;
2901
0
  int best_filter_index = -1;
2902
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2903
0
  const int num_planes = av1_num_planes(cm);
2904
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2905
0
  assert(is_inter_mode(mi->mode));
2906
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2907
0
  assert(!is_inter_compound_mode(mi->mode));
2908
2909
0
  if (!av1_is_interp_needed(xd)) {
2910
0
    return false;
2911
0
  }
2912
2913
0
  struct macroblockd_plane *pd = xd->plane;
2914
0
  const BUFFER_SET orig_dst = {
2915
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2916
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2917
0
  };
2918
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2919
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2920
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2921
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2922
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2923
2924
0
  for (int i = 0; i < 3; ++i) {
2925
0
    if (is_240p_or_lesser) {
2926
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2927
0
        continue;
2928
0
      }
2929
0
    } else {
2930
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2931
0
        continue;
2932
0
      }
2933
0
    }
2934
0
    int64_t cost;
2935
0
    RD_STATS tmp_rd = { 0 };
2936
2937
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2938
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2939
2940
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2941
0
                       ? MODELRD_LEGACY
2942
0
                       : MODELRD_TYPE_INTERP_FILTER](
2943
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2944
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2945
2946
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2947
0
                                           cm->seq_params->enable_dual_filter);
2948
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2949
0
    if (cost < best_cost) {
2950
0
      best_filter_index = i;
2951
0
      best_cost = cost;
2952
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2953
0
    }
2954
0
  }
2955
0
  assert(best_filter_index >= 0);
2956
2957
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2958
2959
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2960
2961
0
  if (is_best_pred_in_orig) {
2962
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2963
0
  } else {
2964
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2965
    // is_best_pred_in_orig is false, that means the current buffer is the
2966
    // original one.
2967
0
    assert(&orig_dst == dst_bufs[0]);
2968
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2969
0
    const int width = block_size_wide[bsize];
2970
0
    const int height = block_size_high[bsize];
2971
0
#if CONFIG_AV1_HIGHBITDEPTH
2972
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2973
0
    if (is_hbd) {
2974
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2975
0
                               tmp_dst.stride[AOM_PLANE_Y],
2976
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2977
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2978
0
    } else {
2979
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2980
0
                        orig_dst.plane[AOM_PLANE_Y],
2981
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2982
0
    }
2983
#else
2984
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2985
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2986
                      width, height);
2987
#endif
2988
0
  }
2989
2990
  // Build the YUV predictor.
2991
0
  if (num_planes > 1) {
2992
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2993
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2994
0
  }
2995
2996
0
  return true;
2997
0
}
2998
2999
/*!\brief AV1 inter mode RD computation
3000
 *
3001
 * \ingroup inter_mode_search
3002
 * Do the RD search for a given inter mode and compute all information relevant
3003
 * to the input mode. It will compute the best MV,
3004
 * compound parameters (if the mode is a compound mode) and interpolation filter
3005
 * parameters.
3006
 *
3007
 * \param[in]     cpi               Top-level encoder structure.
3008
 * \param[in]     tile_data         Pointer to struct holding adaptive
3009
 *                                  data/contexts/models for the tile during
3010
 *                                  encoding.
3011
 * \param[in]     x                 Pointer to structure holding all the data
3012
 *                                  for the current macroblock.
3013
 * \param[in]     bsize             Current block size.
3014
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
3015
 *                                  information.
3016
 * \param[out]    rd_stats_y        Struct to keep track of the RD information
3017
 *                                  for only the Y plane.
3018
 * \param[out]    rd_stats_uv       Struct to keep track of the RD information
3019
 *                                  for only the UV planes.
3020
 * \param[in]     args              HandleInterModeArgs struct holding
3021
 *                                  miscellaneous arguments for inter mode
3022
 *                                  search. See the documentation for this
3023
 *                                  struct for a description of each member.
3024
 * \param[in]     ref_best_rd       Best RD found so far for this block.
3025
 *                                  It is used for early termination of this
3026
 *                                  search if the RD exceeds this value.
3027
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
3028
 *                                  built in this search.
3029
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
3030
 *                                  allocated buffers for the compound
3031
 *                                  predictors and masks in the compound type
3032
 *                                  search.
3033
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
3034
 *                                  do_tx_search (see below) is 0.
3035
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
3036
 *                                  a full transform search. This will compute
3037
 *                                  an estimated RD for the modes without the
3038
 *                                  transform search and later perform the full
3039
 *                                  transform search on the best candidates.
3040
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
3041
 *                                  information to perform a full transform
3042
 *                                  search only on winning candidates searched
3043
 *                                  with an estimate for transform coding RD.
3044
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
3045
 *                                  motion mode information used in a speed
3046
 *                                  feature to search motion modes other than
3047
 *                                  SIMPLE_TRANSLATION only on winning
3048
 *                                  candidates.
3049
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
3050
 *                                  best total RD for a skip mode so far, and
3051
 *                                  skip_rd[1] is the best RD for a skip mode so
3052
 *                                  far in luma. This is used as a speed feature
3053
 *                                  to skip the transform search if the computed
3054
 *                                  skip RD for the current mode is not better
3055
 *                                  than the best skip_rd so far.
3056
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
3057
 *                                         narrow down the search based on data
3058
 *                                         collected in the TPL model.
3059
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
3060
 *                                  the luma plane.
3061
 *
3062
 * \return The RD cost for the mode being searched. If the return value is
3063
 *         INT64_MAX, the output parameters are not set; do not use them.
3064
 */
3065
static int64_t handle_inter_mode(
3066
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
3067
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
3068
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
3069
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
3070
    int64_t *best_est_rd, const int do_tx_search,
3071
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
3072
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
3073
0
    int64_t *yrd) {
3074
0
  const AV1_COMMON *cm = &cpi->common;
3075
0
  const int num_planes = av1_num_planes(cm);
3076
0
  MACROBLOCKD *xd = &x->e_mbd;
3077
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3078
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3079
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3080
0
  const int is_comp_pred = has_second_ref(mbmi);
3081
0
  const PREDICTION_MODE this_mode = mbmi->mode;
3082
3083
#if CONFIG_REALTIME_ONLY
3084
  const int prune_modes_based_on_tpl = 0;
3085
#else   // CONFIG_REALTIME_ONLY
3086
0
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
3087
0
  const int prune_modes_based_on_tpl =
3088
0
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
3089
0
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
3090
0
#endif  // CONFIG_REALTIME_ONLY
3091
0
  int i;
3092
  // Reference frames for this mode
3093
0
  const int refs[2] = { mbmi->ref_frame[0],
3094
0
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3095
0
  int rate_mv = 0;
3096
0
  int64_t rd = INT64_MAX;
3097
  // Do first prediction into the destination buffer. Do the next
3098
  // prediction into a temporary buffer. Then keep track of which one
3099
  // of these currently holds the best predictor, and use the other
3100
  // one for future predictions. In the end, copy from tmp_buf to
3101
  // dst if necessary.
3102
0
  struct macroblockd_plane *pd = xd->plane;
3103
0
  const BUFFER_SET orig_dst = {
3104
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
3105
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
3106
0
  };
3107
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
3108
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
3109
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
3110
3111
0
  int64_t ret_val = INT64_MAX;
3112
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3113
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
3114
0
  int64_t best_rd = INT64_MAX;
3115
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3116
0
  int64_t best_yrd = INT64_MAX;
3117
0
  MB_MODE_INFO best_mbmi = *mbmi;
3118
0
  int best_xskip_txfm = 0;
3119
0
  int64_t newmv_ret_val = INT64_MAX;
3120
0
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
3121
3122
  // Do not prune the mode based on inter cost from tpl if the current ref frame
3123
  // is the winner ref in neighbouring blocks.
3124
0
  int ref_match_found_in_above_nb = 0;
3125
0
  int ref_match_found_in_left_nb = 0;
3126
0
  if (prune_modes_based_on_tpl) {
3127
0
    ref_match_found_in_above_nb =
3128
0
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
3129
0
    ref_match_found_in_left_nb =
3130
0
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
3131
0
  }
3132
3133
  // First, perform a simple translation search for each of the indices. If
3134
  // an index performs well, it will be fully searched in the main loop
3135
  // of this function.
3136
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
3137
  // Save MV results from first 2 ref_mv_idx.
3138
0
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
3139
0
  int best_ref_mv_idx = -1;
3140
0
  const int idx_mask =
3141
0
      ref_mv_idx_to_search(cpi, x, args, ref_best_rd, bsize, ref_set);
3142
0
  const int16_t mode_ctx =
3143
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
3144
0
  const ModeCosts *mode_costs = &x->mode_costs;
3145
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
3146
0
  const int base_rate =
3147
0
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
3148
3149
0
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
3150
0
    save_mv[i][0].as_int = INVALID_MV;
3151
0
    save_mv[i][1].as_int = INVALID_MV;
3152
0
  }
3153
0
  args->start_mv_cnt = 0;
3154
3155
  // Main loop of this function. This will  iterate over all of the ref mvs
3156
  // in the dynamic reference list and do the following:
3157
  //    1.) Get the current MV. Create newmv MV if necessary
3158
  //    2.) Search compound type and parameters if applicable
3159
  //    3.) Do interpolation filter search
3160
  //    4.) Build the inter predictor
3161
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
3162
  //        WARPED_CAUSAL)
3163
  //    6.) Update stats if best so far
3164
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
3165
0
    mbmi->ref_mv_idx = ref_mv_idx;
3166
3167
0
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
3168
0
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
3169
0
    const int drl_cost = get_drl_cost(
3170
0
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
3171
0
    mode_info[ref_mv_idx].drl_cost = drl_cost;
3172
0
    mode_info[ref_mv_idx].skip = 0;
3173
3174
0
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
3175
      // MV did not perform well in simple translation search. Skip it.
3176
0
      continue;
3177
0
    }
3178
0
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
3179
0
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
3180
      // Skip mode if TPL model indicates it will not be beneficial.
3181
0
      if (prune_modes_based_on_tpl_stats(
3182
0
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
3183
0
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
3184
0
        continue;
3185
0
    }
3186
0
    av1_init_rd_stats(rd_stats);
3187
3188
    // Initialize compound mode data
3189
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3190
0
    mbmi->comp_group_idx = 0;
3191
0
    mbmi->compound_idx = 1;
3192
0
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
3193
3194
0
    mbmi->num_proj_ref = 0;
3195
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3196
3197
    // Compute cost for signalling this DRL index
3198
0
    rd_stats->rate = base_rate;
3199
0
    rd_stats->rate += drl_cost;
3200
3201
0
    int rs = 0;
3202
0
    int compmode_interinter_cost = 0;
3203
3204
0
    int_mv cur_mv[2];
3205
3206
    // TODO(Cherma): Extend this speed feature to support compound mode
3207
0
    int skip_repeated_ref_mv =
3208
0
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
3209
    // Generate the current mv according to the prediction mode
3210
0
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
3211
0
      continue;
3212
0
    }
3213
3214
    // The above call to build_cur_mv does not handle NEWMV modes. Build
3215
    // the mv here if we have NEWMV for any predictors.
3216
0
    if (have_newmv_in_inter_mode(this_mode)) {
3217
#if CONFIG_COLLECT_COMPONENT_TIMING
3218
      start_timing(cpi, handle_newmv_time);
3219
#endif
3220
0
      newmv_ret_val =
3221
0
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
3222
#if CONFIG_COLLECT_COMPONENT_TIMING
3223
      end_timing(cpi, handle_newmv_time);
3224
#endif
3225
3226
0
      if (newmv_ret_val != 0) continue;
3227
3228
0
      if (is_inter_singleref_mode(this_mode) &&
3229
0
          cur_mv[0].as_int != INVALID_MV) {
3230
0
        const MV_REFERENCE_FRAME ref = refs[0];
3231
0
        const unsigned int this_sse = x->pred_sse[ref];
3232
0
        if (this_sse < args->best_single_sse_in_refs[ref]) {
3233
0
          args->best_single_sse_in_refs[ref] = this_sse;
3234
0
        }
3235
3236
0
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
3237
0
          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
3238
0
          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
3239
0
          const double scale_factor[3][11] = {
3240
0
            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
3241
0
            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
3242
0
            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
3243
0
          };
3244
0
          assert(pix_idx >= 0);
3245
0
          assert(th_idx <= 2);
3246
0
          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
3247
0
            continue;
3248
0
        }
3249
0
      }
3250
3251
0
      rd_stats->rate += rate_mv;
3252
0
    }
3253
    // Copy the motion vector for this mode into mbmi struct
3254
0
    for (i = 0; i < is_comp_pred + 1; ++i) {
3255
0
      mbmi->mv[i].as_int = cur_mv[i].as_int;
3256
0
    }
3257
3258
0
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
3259
0
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
3260
0
      continue;
3261
0
    }
3262
3263
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
3264
    // is enabled, and the current MV is similar to a previous one.
3265
0
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
3266
0
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
3267
0
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
3268
0
      continue;
3269
3270
0
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
3271
0
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
3272
0
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
3273
0
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
3274
0
        continue;
3275
0
      }
3276
0
    }
3277
3278
0
    int skip_build_pred = 0;
3279
0
    const int mi_row = xd->mi_row;
3280
0
    const int mi_col = xd->mi_col;
3281
3282
    // Handle a compound predictor, continue if it is determined this
3283
    // cannot be the best compound mode
3284
0
    if (is_comp_pred) {
3285
#if CONFIG_COLLECT_COMPONENT_TIMING
3286
      start_timing(cpi, compound_type_rd_time);
3287
#endif
3288
0
      const int not_best_mode = process_compound_inter_mode(
3289
0
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
3290
0
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
3291
0
          &skip_build_pred);
3292
#if CONFIG_COLLECT_COMPONENT_TIMING
3293
      end_timing(cpi, compound_type_rd_time);
3294
#endif
3295
0
      if (not_best_mode) continue;
3296
0
    }
3297
3298
0
    if (!args->skip_ifs) {
3299
#if CONFIG_COLLECT_COMPONENT_TIMING
3300
      start_timing(cpi, interpolation_filter_search_time);
3301
#endif
3302
      // Determine the interpolation filter for this mode
3303
0
      ret_val = av1_interpolation_filter_search(
3304
0
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
3305
0
          &skip_build_pred, args, ref_best_rd);
3306
#if CONFIG_COLLECT_COMPONENT_TIMING
3307
      end_timing(cpi, interpolation_filter_search_time);
3308
#endif
3309
0
      if (args->modelled_rd != NULL && !is_comp_pred) {
3310
0
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3311
0
      }
3312
0
      if (ret_val != 0) {
3313
0
        restore_dst_buf(xd, orig_dst, num_planes);
3314
0
        continue;
3315
0
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3316
0
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3317
0
        restore_dst_buf(xd, orig_dst, num_planes);
3318
0
        continue;
3319
0
      }
3320
3321
      // Compute modelled RD if enabled
3322
0
      if (args->modelled_rd != NULL) {
3323
0
        if (is_comp_pred) {
3324
0
          const int mode0 = compound_ref0_mode(this_mode);
3325
0
          const int mode1 = compound_ref1_mode(this_mode);
3326
0
          const int64_t mrd =
3327
0
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3328
0
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3329
0
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3330
0
            restore_dst_buf(xd, orig_dst, num_planes);
3331
0
            continue;
3332
0
          }
3333
0
        }
3334
0
      }
3335
0
    }
3336
3337
0
    rd_stats->rate += compmode_interinter_cost;
3338
0
    if (skip_build_pred != 1) {
3339
      // Build this inter predictor if it has not been previously built
3340
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3341
0
                                    av1_num_planes(cm) - 1);
3342
0
    }
3343
3344
#if CONFIG_COLLECT_COMPONENT_TIMING
3345
    start_timing(cpi, motion_mode_rd_time);
3346
#endif
3347
0
    int rate2_nocoeff = rd_stats->rate;
3348
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3349
    // OBMC_CAUSAL or WARPED_CAUSAL
3350
0
    int64_t this_yrd;
3351
0
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3352
0
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3353
0
                             &orig_dst, best_est_rd, do_tx_search,
3354
0
                             inter_modes_info, 0, &this_yrd);
3355
#if CONFIG_COLLECT_COMPONENT_TIMING
3356
    end_timing(cpi, motion_mode_rd_time);
3357
#endif
3358
0
    assert(
3359
0
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3360
3361
0
    if (ret_val != INT64_MAX) {
3362
0
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3363
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
3364
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3365
      // Collect mode stats for multiwinner mode processing
3366
0
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3367
0
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3368
0
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3369
0
                              do_tx_search);
3370
0
      if (tmp_rd < best_rd) {
3371
0
        best_yrd = this_yrd;
3372
        // Update the best rd stats if we found the best mode so far
3373
0
        best_rd_stats = *rd_stats;
3374
0
        best_rd_stats_y = *rd_stats_y;
3375
0
        best_rd_stats_uv = *rd_stats_uv;
3376
0
        best_rd = tmp_rd;
3377
0
        best_mbmi = *mbmi;
3378
0
        best_xskip_txfm = txfm_info->skip_txfm;
3379
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3380
0
                       xd->height * xd->width);
3381
0
        motion_mode_cand->rate_mv = rate_mv;
3382
0
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3383
0
      }
3384
3385
0
      if (tmp_rd < ref_best_rd) {
3386
0
        ref_best_rd = tmp_rd;
3387
0
        best_ref_mv_idx = ref_mv_idx;
3388
0
      }
3389
0
    }
3390
0
    restore_dst_buf(xd, orig_dst, num_planes);
3391
0
  }
3392
3393
0
  if (best_rd == INT64_MAX) return INT64_MAX;
3394
3395
  // re-instate status of the best choice
3396
0
  *rd_stats = best_rd_stats;
3397
0
  *rd_stats_y = best_rd_stats_y;
3398
0
  *rd_stats_uv = best_rd_stats_uv;
3399
0
  *yrd = best_yrd;
3400
0
  *mbmi = best_mbmi;
3401
0
  txfm_info->skip_txfm = best_xskip_txfm;
3402
0
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3403
0
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3404
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3405
3406
0
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3407
3408
0
  return rd_stats->rdcost;
3409
0
}
3410
3411
/*!\brief Search for the best intrabc predictor
3412
 *
3413
 * \ingroup intra_mode_search
3414
 * \callergraph
3415
 * This function performs a motion search to find the best intrabc predictor.
3416
 *
3417
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3418
 * search before this function).
3419
 */
3420
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3421
                                       PICK_MODE_CONTEXT *ctx,
3422
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3423
0
                                       int64_t best_rd) {
3424
0
  const AV1_COMMON *const cm = &cpi->common;
3425
0
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3426
0
      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3427
0
    return INT64_MAX;
3428
0
  if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 &&
3429
0
      bsize != BLOCK_8X8 && bsize != BLOCK_16X16) {
3430
0
    return INT64_MAX;
3431
0
  }
3432
0
  const int num_planes = av1_num_planes(cm);
3433
3434
0
  MACROBLOCKD *const xd = &x->e_mbd;
3435
0
  const TileInfo *tile = &xd->tile;
3436
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3437
3438
0
  const int mi_row = xd->mi_row;
3439
0
  const int mi_col = xd->mi_col;
3440
0
  const int w = block_size_wide[bsize];
3441
0
  const int h = block_size_high[bsize];
3442
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3443
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3444
3445
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3446
0
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3447
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3448
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3449
0
                   mbmi_ext->mode_context);
3450
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3451
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3452
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3453
0
  int_mv nearestmv, nearmv;
3454
0
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3455
0
                                   0);
3456
3457
0
  if (nearestmv.as_int == INVALID_MV) {
3458
0
    nearestmv.as_int = 0;
3459
0
  }
3460
0
  if (nearmv.as_int == INVALID_MV) {
3461
0
    nearmv.as_int = 0;
3462
0
  }
3463
3464
0
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3465
0
  if (dv_ref.as_int == 0) {
3466
0
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3467
0
  }
3468
  // Ref DV should not have sub-pel.
3469
0
  assert((dv_ref.as_mv.col & 7) == 0);
3470
0
  assert((dv_ref.as_mv.row & 7) == 0);
3471
0
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3472
3473
0
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3474
0
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3475
0
  for (int i = 0; i < num_planes; ++i) {
3476
0
    xd->plane[i].pre[0] = yv12_mb[i];
3477
0
  }
3478
3479
0
  enum IntrabcMotionDirection {
3480
0
    IBC_MOTION_ABOVE,
3481
0
    IBC_MOTION_LEFT,
3482
0
    IBC_MOTION_DIRECTIONS
3483
0
  };
3484
3485
0
  MB_MODE_INFO best_mbmi = *mbmi;
3486
0
  RD_STATS best_rdstats = *rd_stats;
3487
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3488
0
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3489
3490
0
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3491
0
  const SEARCH_METHODS search_method =
3492
0
      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3493
0
  const search_site_config *lookahead_search_sites =
3494
0
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3495
0
  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3496
0
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3497
0
                                     &dv_ref.as_mv, start_mv,
3498
0
                                     lookahead_search_sites, search_method,
3499
0
                                     /*fine_search_interval=*/0);
3500
0
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3501
0
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3502
3503
0
  const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level
3504
0
                                                  ? IBC_MOTION_LEFT
3505
0
                                                  : IBC_MOTION_DIRECTIONS;
3506
3507
0
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir;
3508
0
       ++dir) {
3509
0
    switch (dir) {
3510
0
      case IBC_MOTION_ABOVE:
3511
0
        fullms_params.mv_limits.col_min =
3512
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3513
0
        fullms_params.mv_limits.col_max =
3514
0
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3515
0
        fullms_params.mv_limits.row_min =
3516
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3517
0
        fullms_params.mv_limits.row_max =
3518
0
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3519
0
        break;
3520
0
      case IBC_MOTION_LEFT:
3521
0
        fullms_params.mv_limits.col_min =
3522
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3523
0
        fullms_params.mv_limits.col_max =
3524
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3525
        // TODO(aconverse@google.com): Minimize the overlap between above and
3526
        // left areas.
3527
0
        fullms_params.mv_limits.row_min =
3528
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3529
0
        int bottom_coded_mi_edge =
3530
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3531
0
        fullms_params.mv_limits.row_max =
3532
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3533
0
        break;
3534
0
      default: assert(0);
3535
0
    }
3536
0
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3537
0
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3538
0
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3539
0
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3540
3541
0
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3542
3543
0
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3544
0
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3545
0
      continue;
3546
0
    }
3547
3548
0
    const int step_param = cpi->mv_search_params.mv_step_param;
3549
0
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3550
0
    int_mv best_mv;
3551
0
    FULLPEL_MV_STATS best_mv_stats;
3552
0
    int bestsme = INT_MAX;
3553
3554
    // Perform a hash search first, and see if we get any matches.
3555
0
    if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
3556
0
      bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params,
3557
0
                                        intrabc_hash_info, &best_mv.as_fullmv);
3558
0
    }
3559
3560
    // If intrabc_search_level is not 0 and we found a hash search match, do
3561
    // not proceed with pixel search as the hash match is very likely to be the
3562
    // best intrabc candidate anyway.
3563
0
    if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) {
3564
0
      int_mv best_pixel_mv;
3565
0
      const int pixelsme =
3566
0
          av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3567
0
                                &best_pixel_mv.as_fullmv, &best_mv_stats, NULL);
3568
0
      if (pixelsme < bestsme) {
3569
0
        bestsme = pixelsme;
3570
0
        best_mv = best_pixel_mv;
3571
0
      }
3572
0
    }
3573
0
    if (bestsme == INT_MAX) continue;
3574
0
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3575
0
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3576
0
                                get_fullmv_from_mv(&dv)))
3577
0
      continue;
3578
0
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3579
0
                         cm->seq_params->mib_size_log2))
3580
0
      continue;
3581
3582
    // DV should not have sub-pel.
3583
0
    assert((dv.col & 7) == 0);
3584
0
    assert((dv.row & 7) == 0);
3585
0
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3586
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3587
0
    mbmi->use_intrabc = 1;
3588
0
    mbmi->mode = DC_PRED;
3589
0
    mbmi->uv_mode = UV_DC_PRED;
3590
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3591
0
    mbmi->mv[0].as_mv = dv;
3592
0
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3593
0
    mbmi->skip_txfm = 0;
3594
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3595
0
                                  av1_num_planes(cm) - 1);
3596
3597
    // TODO(aconverse@google.com): The full motion field defining discount
3598
    // in MV_COST_WEIGHT is too large. Explore other values.
3599
0
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3600
0
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3601
0
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3602
0
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3603
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3604
0
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3605
0
      continue;
3606
0
    rd_stats_yuv.rdcost =
3607
0
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3608
0
    if (rd_stats_yuv.rdcost < best_rd) {
3609
0
      best_rd = rd_stats_yuv.rdcost;
3610
0
      best_mbmi = *mbmi;
3611
0
      best_rdstats = rd_stats_yuv;
3612
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3613
0
    }
3614
0
  }
3615
0
  *mbmi = best_mbmi;
3616
0
  *rd_stats = best_rdstats;
3617
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3618
#if CONFIG_RD_DEBUG
3619
  mbmi->rd_stats = *rd_stats;
3620
#endif
3621
0
  return best_rd;
3622
0
}
3623
3624
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3625
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3626
// the typedef will prevent doxygen from finding this function and generating
3627
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3628
// doxygen, we can revert back to using the typedefs.
3629
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3630
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3631
0
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3632
0
  const AV1_COMMON *const cm = &cpi->common;
3633
0
  MACROBLOCKD *const xd = &x->e_mbd;
3634
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3635
0
  const int num_planes = av1_num_planes(cm);
3636
0
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3637
0
  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3638
0
  int64_t dist_y = 0, dist_uv = 0;
3639
3640
0
  ctx->rd_stats.skip_txfm = 0;
3641
0
  mbmi->ref_frame[0] = INTRA_FRAME;
3642
0
  mbmi->ref_frame[1] = NONE_FRAME;
3643
0
  mbmi->use_intrabc = 0;
3644
0
  mbmi->mv[0].as_int = 0;
3645
0
  mbmi->skip_mode = 0;
3646
3647
0
  const int64_t intra_yrd =
3648
0
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3649
0
                                 &y_skip_txfm, bsize, best_rd, ctx);
3650
3651
  // Initialize default mode evaluation params
3652
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3653
3654
0
  if (intra_yrd < best_rd) {
3655
    // Search intra modes for uv planes if needed
3656
0
    if (num_planes > 1) {
3657
      // Set up the tx variables for reproducing the y predictions in case we
3658
      // need it for chroma-from-luma.
3659
0
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3660
0
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3661
0
      }
3662
0
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3663
0
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3664
0
                                  &dist_uv, &uv_skip_txfm, bsize,
3665
0
                                  max_uv_tx_size);
3666
0
    }
3667
3668
    // Intra block is always coded as non-skip
3669
0
    rd_cost->rate =
3670
0
        rate_y + rate_uv +
3671
0
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3672
0
    rd_cost->dist = dist_y + dist_uv;
3673
0
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3674
0
    rd_cost->skip_txfm = 0;
3675
0
  } else {
3676
0
    rd_cost->rate = INT_MAX;
3677
0
  }
3678
3679
0
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3680
0
    best_rd = rd_cost->rdcost;
3681
0
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3682
0
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3683
0
    assert(rd_cost->rate != INT_MAX);
3684
0
  }
3685
0
  if (rd_cost->rate == INT_MAX) return;
3686
3687
0
  ctx->mic = *mbmi;
3688
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3689
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3690
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3691
0
}
3692
3693
static inline void calc_target_weighted_pred(
3694
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3695
    const uint8_t *above, int above_stride, const uint8_t *left,
3696
    int left_stride);
3697
3698
static inline void rd_pick_skip_mode(
3699
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3700
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3701
0
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3702
0
  const AV1_COMMON *const cm = &cpi->common;
3703
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3704
0
  const int num_planes = av1_num_planes(cm);
3705
0
  MACROBLOCKD *const xd = &x->e_mbd;
3706
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3707
3708
0
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3709
0
  RD_STATS skip_mode_rd_stats;
3710
0
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3711
3712
0
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3713
0
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3714
0
    return;
3715
0
  }
3716
3717
0
  const MV_REFERENCE_FRAME ref_frame =
3718
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3719
0
  const MV_REFERENCE_FRAME second_ref_frame =
3720
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3721
0
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3722
0
  const THR_MODES mode_index =
3723
0
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3724
3725
0
  if (mode_index == THR_INVALID) {
3726
0
    return;
3727
0
  }
3728
3729
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3730
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
3731
0
      cpi->all_one_sided_refs) {
3732
0
    return;
3733
0
  }
3734
3735
0
  mbmi->mode = this_mode;
3736
0
  mbmi->uv_mode = UV_DC_PRED;
3737
0
  mbmi->ref_frame[0] = ref_frame;
3738
0
  mbmi->ref_frame[1] = second_ref_frame;
3739
0
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3740
0
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3741
0
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3742
0
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3743
0
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3744
0
      return;
3745
0
    }
3746
0
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3747
0
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3748
0
                     mbmi_ext->mode_context);
3749
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3750
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3751
0
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3752
0
  }
3753
3754
0
  assert(this_mode == NEAREST_NEARESTMV);
3755
0
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3756
0
    return;
3757
0
  }
3758
3759
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3760
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3761
0
  mbmi->comp_group_idx = 0;
3762
0
  mbmi->compound_idx = x->compound_idx;
3763
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3764
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3765
0
  mbmi->ref_mv_idx = 0;
3766
0
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3767
0
  mbmi->palette_mode_info.palette_size[0] = 0;
3768
0
  mbmi->palette_mode_info.palette_size[1] = 0;
3769
3770
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3771
3772
0
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3773
0
  for (int i = 0; i < num_planes; i++) {
3774
0
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3775
0
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3776
0
  }
3777
3778
0
  BUFFER_SET orig_dst;
3779
0
  for (int i = 0; i < num_planes; i++) {
3780
0
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3781
0
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3782
0
  }
3783
3784
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3785
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3786
0
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3787
0
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3788
0
    const ModeCosts *mode_costs = &x->mode_costs;
3789
0
    best_intra_inter_mode_cost = RDCOST(
3790
0
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3791
0
        rd_cost->dist);
3792
    // Account for non-skip mode rate in total rd stats
3793
0
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3794
0
    av1_rd_cost_update(x->rdmult, rd_cost);
3795
0
  }
3796
3797
  // Obtain the rdcost for skip_mode.
3798
0
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3799
0
               best_intra_inter_mode_cost);
3800
3801
0
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3802
0
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3803
0
    assert(mode_index != THR_INVALID);
3804
0
    search_state->best_mbmode.skip_mode = 1;
3805
0
    search_state->best_mbmode = *mbmi;
3806
0
    memset(search_state->best_mbmode.inter_tx_size,
3807
0
           search_state->best_mbmode.tx_size,
3808
0
           sizeof(search_state->best_mbmode.inter_tx_size));
3809
0
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3810
0
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3811
0
                  xd);
3812
0
    search_state->best_mode_index = mode_index;
3813
3814
    // Update rd_cost
3815
0
    rd_cost->rate = skip_mode_rd_stats.rate;
3816
0
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3817
0
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3818
3819
0
    search_state->best_rd = rd_cost->rdcost;
3820
0
    search_state->best_skip2 = 1;
3821
0
    search_state->best_mode_skippable = 1;
3822
3823
0
    x->txfm_search_info.skip_txfm = 1;
3824
0
  }
3825
0
}
3826
3827
// Get winner mode stats of given mode index
3828
static inline MB_MODE_INFO *get_winner_mode_stats(
3829
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3830
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3831
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3832
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3833
0
    int mode_idx) {
3834
0
  MB_MODE_INFO *winner_mbmi;
3835
0
  if (multi_winner_mode_type) {
3836
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3837
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3838
0
    winner_mbmi = &winner_mode_stat->mbmi;
3839
3840
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3841
0
    *winner_rate_y = winner_mode_stat->rate_y;
3842
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3843
0
    *winner_mode_index = winner_mode_stat->mode_index;
3844
0
  } else {
3845
0
    winner_mbmi = best_mbmode;
3846
0
    *winner_rd_cost = best_rd_cost;
3847
0
    *winner_rate_y = best_rate_y;
3848
0
    *winner_rate_uv = best_rate_uv;
3849
0
    *winner_mode_index = *best_mode_index;
3850
0
  }
3851
0
  return winner_mbmi;
3852
0
}
3853
3854
// speed feature: fast intra/inter transform type search
3855
// Used for speed >= 2
3856
// When this speed feature is on, in rd mode search, only DCT is used.
3857
// After the mode is determined, this function is called, to select
3858
// transform types and get accurate rdcost.
3859
static inline void refine_winner_mode_tx(
3860
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3861
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3862
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3863
0
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3864
0
  const AV1_COMMON *const cm = &cpi->common;
3865
0
  MACROBLOCKD *const xd = &x->e_mbd;
3866
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3867
0
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3868
0
  int64_t best_rd;
3869
0
  const int num_planes = av1_num_planes(cm);
3870
3871
0
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3872
0
                                         rd_cost->skip_txfm))
3873
0
    return;
3874
3875
  // Set params for winner mode evaluation
3876
0
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3877
3878
  // No best mode identified so far
3879
0
  if (*best_mode_index == THR_INVALID) return;
3880
3881
0
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3882
0
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3883
0
    RD_STATS *winner_rd_stats = NULL;
3884
0
    int winner_rate_y = 0, winner_rate_uv = 0;
3885
0
    THR_MODES winner_mode_index = 0;
3886
3887
    // TODO(any): Combine best mode and multi-winner mode processing paths
3888
    // Get winner mode stats for current mode index
3889
0
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3890
0
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3891
0
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3892
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3893
3894
0
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3895
0
        winner_mode_index != THR_INVALID &&
3896
0
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3897
0
                                          rd_cost->skip_txfm)) {
3898
0
      RD_STATS rd_stats = *winner_rd_stats;
3899
0
      int skip_blk = 0;
3900
0
      RD_STATS rd_stats_y, rd_stats_uv;
3901
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3902
3903
0
      *mbmi = *winner_mbmi;
3904
3905
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3906
3907
      // Select prediction reference frames.
3908
0
      for (int i = 0; i < num_planes; i++) {
3909
0
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3910
0
        if (has_second_ref(mbmi))
3911
0
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3912
0
      }
3913
3914
0
      if (is_inter_mode(mbmi->mode)) {
3915
0
        const int mi_row = xd->mi_row;
3916
0
        const int mi_col = xd->mi_col;
3917
0
        bool is_predictor_built = false;
3918
0
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3919
        // Do interpolation filter search for realtime mode if applicable.
3920
0
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3921
0
            cpi->oxcf.mode == REALTIME &&
3922
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3923
0
            is_inter_mode(prediction_mode) &&
3924
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3925
0
            !is_inter_compound_mode(prediction_mode)) {
3926
0
          is_predictor_built =
3927
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3928
0
        }
3929
0
        if (!is_predictor_built) {
3930
0
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3931
0
                                        av1_num_planes(cm) - 1);
3932
0
        }
3933
0
        if (mbmi->motion_mode == OBMC_CAUSAL)
3934
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3935
3936
0
        av1_subtract_plane(x, bsize, 0);
3937
0
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3938
0
            !xd->lossless[mbmi->segment_id]) {
3939
0
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3940
0
                                              INT64_MAX);
3941
0
          assert(rd_stats_y.rate != INT_MAX);
3942
0
        } else {
3943
0
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3944
0
                                            INT64_MAX);
3945
0
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3946
0
                 sizeof(mbmi->inter_tx_size));
3947
0
        }
3948
0
      } else {
3949
0
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3950
0
                                          INT64_MAX);
3951
0
      }
3952
3953
0
      if (num_planes > 1) {
3954
0
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3955
0
      } else {
3956
0
        av1_init_rd_stats(&rd_stats_uv);
3957
0
      }
3958
3959
0
      const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
3960
3961
0
      const ModeCosts *mode_costs = &x->mode_costs;
3962
0
      if (is_inter_mode(mbmi->mode) &&
3963
0
          (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
3964
0
          RDCOST(x->rdmult,
3965
0
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3966
0
                     rd_stats_uv.rate,
3967
0
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3968
0
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3969
0
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3970
0
        skip_blk = 1;
3971
0
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3972
0
        rd_stats_uv.rate = 0;
3973
0
        rd_stats_y.dist = rd_stats_y.sse;
3974
0
        rd_stats_uv.dist = rd_stats_uv.sse;
3975
0
      } else {
3976
0
        skip_blk = 0;
3977
0
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3978
0
      }
3979
0
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3980
0
                      winner_rate_y - winner_rate_uv;
3981
0
      int64_t this_rd =
3982
0
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3983
0
      if (best_rd > this_rd) {
3984
0
        *best_mbmode = *mbmi;
3985
0
        *best_mode_index = winner_mode_index;
3986
0
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3987
0
        rd_cost->rate = this_rate;
3988
0
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3989
0
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3990
0
        rd_cost->rdcost = this_rd;
3991
0
        best_rd = this_rd;
3992
0
        *best_skip2 = skip_blk;
3993
0
      }
3994
0
    }
3995
0
  }
3996
0
}
3997
3998
/*!\cond */
3999
typedef struct {
4000
  // Mask for each reference frame, specifying which prediction modes to NOT try
4001
  // during search.
4002
  uint32_t pred_modes[REF_FRAMES];
4003
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
4004
  // reference frames (i, j).
4005
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
4006
  // (NONE_FRAME).
4007
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
4008
} mode_skip_mask_t;
4009
/*!\endcond */
4010
4011
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
4012
static inline void disable_reference(
4013
0
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
4014
0
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4015
0
    ref_combo[ref][ref2 + 1] = true;
4016
0
  }
4017
0
}
4018
4019
// Update 'ref_combo' mask to disable all inter references except ALTREF.
4020
static inline void disable_inter_references_except_altref(
4021
0
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
4022
0
  disable_reference(LAST_FRAME, ref_combo);
4023
0
  disable_reference(LAST2_FRAME, ref_combo);
4024
0
  disable_reference(LAST3_FRAME, ref_combo);
4025
0
  disable_reference(GOLDEN_FRAME, ref_combo);
4026
0
  disable_reference(BWDREF_FRAME, ref_combo);
4027
0
  disable_reference(ALTREF2_FRAME, ref_combo);
4028
0
}
4029
4030
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
4031
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
4032
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
4033
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
4034
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
4035
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
4036
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
4037
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
4038
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
4039
};
4040
4041
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
4042
4043
0
static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
4044
0
  if (ref_set == REF_SET_FULL) {
4045
    // Everything available by default.
4046
0
    memset(mask, 0, sizeof(*mask));
4047
0
  } else {
4048
    // All modes available by default.
4049
0
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
4050
    // All references disabled first.
4051
0
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
4052
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4053
0
        mask->ref_combo[ref1][ref2 + 1] = true;
4054
0
      }
4055
0
    }
4056
0
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
4057
0
    int num_ref_combos;
4058
4059
    // Then enable reduced set of references explicitly.
4060
0
    switch (ref_set) {
4061
0
      case REF_SET_REDUCED:
4062
0
        ref_set_combos = reduced_ref_combos;
4063
0
        num_ref_combos =
4064
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
4065
0
        break;
4066
0
      case REF_SET_REALTIME:
4067
0
        ref_set_combos = real_time_ref_combos;
4068
0
        num_ref_combos =
4069
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
4070
0
        break;
4071
0
      default: assert(0); num_ref_combos = 0;
4072
0
    }
4073
4074
0
    for (int i = 0; i < num_ref_combos; ++i) {
4075
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
4076
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
4077
0
    }
4078
0
  }
4079
0
}
4080
4081
static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
4082
                                       const AV1_COMP *cpi, MACROBLOCK *x,
4083
0
                                       BLOCK_SIZE bsize) {
4084
0
  const AV1_COMMON *const cm = &cpi->common;
4085
0
  const struct segmentation *const seg = &cm->seg;
4086
0
  MACROBLOCKD *const xd = &x->e_mbd;
4087
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4088
0
  unsigned char segment_id = mbmi->segment_id;
4089
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4090
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
4091
0
  REF_SET ref_set = REF_SET_FULL;
4092
4093
0
  if (sf->rt_sf.use_real_time_ref_set)
4094
0
    ref_set = REF_SET_REALTIME;
4095
0
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
4096
0
    ref_set = REF_SET_REDUCED;
4097
4098
0
  default_skip_mask(mask, ref_set);
4099
4100
0
  int min_pred_mv_sad = INT_MAX;
4101
0
  MV_REFERENCE_FRAME ref_frame;
4102
0
  if (ref_set == REF_SET_REALTIME) {
4103
    // For real-time encoding, we only look at a subset of ref frames. So the
4104
    // threshold for pruning should be computed from this subset as well.
4105
0
    const int num_rt_refs =
4106
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
4107
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
4108
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
4109
0
      if (ref != INTRA_FRAME) {
4110
0
        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
4111
0
      }
4112
0
    }
4113
0
  } else {
4114
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
4115
0
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
4116
0
  }
4117
4118
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4119
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
4120
      // Skip checking missing reference in both single and compound reference
4121
      // modes.
4122
0
      disable_reference(ref_frame, mask->ref_combo);
4123
0
    } else {
4124
      // Skip fixed mv modes for poor references
4125
0
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
4126
0
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4127
0
      }
4128
0
    }
4129
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4130
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4131
      // Reference not used for the segment.
4132
0
      disable_reference(ref_frame, mask->ref_combo);
4133
0
    }
4134
0
  }
4135
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
4136
  // is disabled for this segment. This is to prevent the possibility that we
4137
  // end up unable to pick any mode.
4138
0
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4139
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
4140
    // unless ARNR filtering is enabled in which case we want
4141
    // an unfiltered alternative. We allow near/nearest as well
4142
    // because they may result in zero-zero MVs but be cheaper.
4143
0
    if (cpi->rc.is_src_frame_alt_ref &&
4144
0
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
4145
0
      disable_inter_references_except_altref(mask->ref_combo);
4146
4147
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4148
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
4149
0
      int_mv near_mv, nearest_mv, global_mv;
4150
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
4151
0
                  &x->mbmi_ext);
4152
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4153
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4154
4155
0
      if (near_mv.as_int != global_mv.as_int)
4156
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
4157
0
      if (nearest_mv.as_int != global_mv.as_int)
4158
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
4159
0
    }
4160
0
  }
4161
4162
0
  if (cpi->rc.is_src_frame_alt_ref) {
4163
0
    if (inter_sf->alt_ref_search_fp &&
4164
0
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
4165
0
      mask->pred_modes[ALTREF_FRAME] = 0;
4166
0
      disable_inter_references_except_altref(mask->ref_combo);
4167
0
      disable_reference(INTRA_FRAME, mask->ref_combo);
4168
0
    }
4169
0
  }
4170
4171
0
  if (inter_sf->alt_ref_search_fp) {
4172
0
    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
4173
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
4174
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
4175
      // those are past frames
4176
0
      MV_REFERENCE_FRAME start_frame =
4177
0
          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
4178
0
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
4179
0
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4180
0
            0) {
4181
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
4182
          // to the relative dist of LAST_FRAME.
4183
0
          if (abs(cpi->ref_frame_dist_info
4184
0
                      .ref_relative_dist[ref_frame - LAST_FRAME] -
4185
0
                  cpi->ref_frame_dist_info
4186
0
                      .ref_relative_dist[LAST_FRAME - LAST_FRAME]) > 4) {
4187
0
            continue;
4188
0
          }
4189
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
4190
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
4191
0
        }
4192
0
      }
4193
0
    }
4194
0
  }
4195
4196
0
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4197
0
    if (x->best_pred_mv_sad[0] < INT_MAX) {
4198
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
4199
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
4200
4201
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
4202
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
4203
0
        ref_frame = prune_ref_list[ref_idx];
4204
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
4205
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4206
0
      }
4207
0
    }
4208
0
  }
4209
4210
0
  if (bsize > sf->part_sf.max_intra_bsize) {
4211
0
    disable_reference(INTRA_FRAME, mask->ref_combo);
4212
0
  }
4213
4214
0
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
4215
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4216
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
4217
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
4218
0
    }
4219
0
  }
4220
4221
0
  mask->pred_modes[INTRA_FRAME] |=
4222
0
      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
4223
4224
  // Prune reference frames which are not the closest to the current
4225
  // frame and with large pred_mv_sad.
4226
0
  if (inter_sf->prune_single_ref) {
4227
0
    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 5);
4228
0
    const double prune_thresh = (inter_sf->prune_single_ref <= 3) ? 1.20 : 1.05;
4229
4230
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4231
0
      const RefFrameDistanceInfo *const ref_frame_dist_info =
4232
0
          &cpi->ref_frame_dist_info;
4233
0
      const int is_closest_ref =
4234
0
          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
4235
0
          (ref_frame == ref_frame_dist_info->nearest_future_ref);
4236
0
      const int ref_idx = ref_frame - LAST_FRAME;
4237
4238
0
      if (!(cpi->keep_single_ref_frame_mask & (1 << ref_idx) ||
4239
0
            is_closest_ref)) {
4240
0
        const int dir =
4241
0
            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
4242
0
                ? 0
4243
0
                : 1;
4244
0
        if (x->best_pred_mv_sad[dir] < INT_MAX &&
4245
0
            x->pred_mv_sad[ref_frame] > prune_thresh * x->best_pred_mv_sad[dir])
4246
0
          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
4247
0
      }
4248
0
    }
4249
0
  }
4250
0
}
4251
4252
static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
4253
                                          HandleInterModeArgs *const args,
4254
0
                                          int is_hbd) {
4255
0
  if (is_hbd) {
4256
0
    const int len = sizeof(uint16_t);
4257
0
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
4258
0
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
4259
0
                                                 (MAX_SB_SQUARE >> 1) * len);
4260
0
    args->above_pred_buf[2] =
4261
0
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
4262
0
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
4263
0
    args->left_pred_buf[1] =
4264
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
4265
0
    args->left_pred_buf[2] =
4266
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
4267
0
  } else {
4268
0
    args->above_pred_buf[0] = obmc_buffer->above_pred;
4269
0
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
4270
0
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
4271
0
    args->left_pred_buf[0] = obmc_buffer->left_pred;
4272
0
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
4273
0
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
4274
0
  }
4275
0
}
4276
4277
static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
4278
0
                                  MV_REFERENCE_FRAME ref_frame) {
4279
0
  const AV1_COMMON *const cm = &cpi->common;
4280
0
  MV_REFERENCE_FRAME rf[2];
4281
0
  av1_set_ref_frame(rf, ref_frame);
4282
4283
0
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
4284
4285
0
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
4286
0
                                       cm->cur_frame->ref_display_order_hint)) {
4287
0
    return 1;
4288
0
  }
4289
4290
0
  return 0;
4291
0
}
4292
4293
static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
4294
0
                                                    int skip_ref_frame_mask) {
4295
0
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
4296
0
    if (!(skip_ref_frame_mask & (1 << r))) {
4297
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
4298
0
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
4299
0
        return 1;
4300
0
      }
4301
0
    }
4302
0
  }
4303
0
  return 0;
4304
0
}
4305
4306
static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
4307
0
                                             const MB_MODE_INFO *mi_cache) {
4308
0
  if (!mi_cache) {
4309
0
    return 0;
4310
0
  }
4311
4312
0
  if (ref_frame < REF_FRAMES) {
4313
0
    return (ref_frame == mi_cache->ref_frame[0] ||
4314
0
            ref_frame == mi_cache->ref_frame[1]);
4315
0
  }
4316
4317
  // if we are here, then the current mode is compound.
4318
0
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4319
0
  return ref_frame == cached_ref_type;
4320
0
}
4321
4322
// Please add/modify parameter setting in this function, making it consistent
4323
// and easy to read and maintain.
4324
static inline void set_params_rd_pick_inter_mode(
4325
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4326
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4327
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4328
0
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4329
0
  const AV1_COMMON *const cm = &cpi->common;
4330
0
  MACROBLOCKD *const xd = &x->e_mbd;
4331
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4332
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4333
0
  unsigned char segment_id = mbmi->segment_id;
4334
4335
0
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4336
0
  av1_collect_neighbors_ref_counts(xd);
4337
0
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4338
0
                           ref_costs_comp);
4339
4340
0
  const int mi_row = xd->mi_row;
4341
0
  const int mi_col = xd->mi_col;
4342
0
  x->best_pred_mv_sad[0] = INT_MAX;
4343
0
  x->best_pred_mv_sad[1] = INT_MAX;
4344
4345
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4346
0
       ++ref_frame) {
4347
0
    x->pred_mv_sad[ref_frame] = INT_MAX;
4348
0
    mbmi_ext->mode_context[ref_frame] = 0;
4349
0
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4350
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4351
      // Skip the ref frame if the mask says skip and the ref is not used by
4352
      // compound ref.
4353
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4354
0
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4355
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4356
0
        continue;
4357
0
      }
4358
0
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4359
0
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4360
0
    }
4361
0
    if (cpi->sf.inter_sf.alt_ref_search_fp ||
4362
0
        cpi->sf.inter_sf.prune_single_ref ||
4363
0
        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4364
      // Store the best pred_mv_sad across all past frames
4365
0
      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4366
0
          0)
4367
0
        x->best_pred_mv_sad[0] =
4368
0
            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4369
0
      else
4370
        // Store the best pred_mv_sad across all future frames
4371
0
        x->best_pred_mv_sad[1] =
4372
0
            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4373
0
    }
4374
0
  }
4375
4376
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4377
    // No second reference on RT ref set, so no need to initialize
4378
0
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4379
0
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4380
0
      mbmi_ext->mode_context[ref_frame] = 0;
4381
0
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4382
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4383
0
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4384
0
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4385
0
        continue;
4386
0
      }
4387
4388
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4389
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4390
0
        continue;
4391
0
      }
4392
      // Ref mv list population is not required, when compound references are
4393
      // pruned.
4394
0
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4395
4396
0
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4397
0
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4398
0
                       mbmi_ext->mode_context);
4399
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4400
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4401
0
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4402
0
    }
4403
0
  }
4404
4405
0
  av1_count_overlappable_neighbors(cm, xd);
4406
0
  const FRAME_UPDATE_TYPE update_type =
4407
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4408
0
  int use_actual_frame_probs = 1;
4409
0
  int prune_obmc;
4410
#if CONFIG_FPMT_TEST
4411
  use_actual_frame_probs =
4412
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4413
  if (!use_actual_frame_probs) {
4414
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4415
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4416
  }
4417
#endif
4418
0
  if (use_actual_frame_probs) {
4419
0
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4420
0
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4421
0
  }
4422
0
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4423
0
    if (check_num_overlappable_neighbors(mbmi) &&
4424
0
        is_motion_variation_allowed_bsize(bsize)) {
4425
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4426
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4427
0
                                       MAX_SB_SIZE >> 1 };
4428
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4429
0
                                        MAX_SB_SIZE >> 1 };
4430
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4431
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4432
0
                                          dst_width1, dst_height1,
4433
0
                                          args->above_pred_stride);
4434
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4435
0
                                         dst_width2, dst_height2,
4436
0
                                         args->left_pred_stride);
4437
0
      const int num_planes = av1_num_planes(cm);
4438
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4439
0
                           mi_col, 0, num_planes);
4440
0
      calc_target_weighted_pred(
4441
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4442
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4443
0
    }
4444
0
  }
4445
4446
0
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4447
4448
  // Set params for mode evaluation
4449
0
  set_mode_eval_params(cpi, x, MODE_EVAL);
4450
4451
0
  x->comp_rd_stats_idx = 0;
4452
4453
0
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4454
0
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4455
0
  }
4456
0
}
4457
4458
static inline void init_single_inter_mode_search_state(
4459
0
    InterModeSearchState *search_state) {
4460
0
  for (int dir = 0; dir < 2; ++dir) {
4461
0
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4462
0
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4463
0
        SingleInterModeState *state;
4464
4465
0
        state = &search_state->single_state[dir][mode][ref_frame];
4466
0
        state->ref_frame = NONE_FRAME;
4467
0
        state->rd = INT64_MAX;
4468
4469
0
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4470
0
        state->ref_frame = NONE_FRAME;
4471
0
        state->rd = INT64_MAX;
4472
4473
0
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4474
0
      }
4475
0
    }
4476
0
  }
4477
4478
0
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4479
0
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4480
0
    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4481
0
  }
4482
0
  av1_zero(search_state->single_state_cnt);
4483
0
  av1_zero(search_state->single_state_modelled_cnt);
4484
0
}
4485
4486
static inline void init_inter_mode_search_state(
4487
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4488
0
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4489
0
  init_intra_mode_search_state(&search_state->intra_search_state);
4490
0
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4491
4492
0
  search_state->best_rd = best_rd_so_far;
4493
0
  search_state->best_skip_rd[0] = INT64_MAX;
4494
0
  search_state->best_skip_rd[1] = INT64_MAX;
4495
4496
0
  av1_zero(search_state->best_mbmode);
4497
4498
0
  search_state->best_rate_y = INT_MAX;
4499
4500
0
  search_state->best_rate_uv = INT_MAX;
4501
4502
0
  search_state->best_mode_skippable = 0;
4503
4504
0
  search_state->best_skip2 = 0;
4505
4506
0
  search_state->best_mode_index = THR_INVALID;
4507
4508
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4509
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4510
0
  const unsigned char segment_id = mbmi->segment_id;
4511
4512
0
  search_state->num_available_refs = 0;
4513
0
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4514
0
  memset(search_state->dist_order_refs, -1,
4515
0
         sizeof(search_state->dist_order_refs));
4516
4517
0
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4518
0
    search_state->mode_threshold[i] = 0;
4519
0
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4520
0
  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4521
0
    search_state->mode_threshold[i] =
4522
0
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4523
0
        RD_THRESH_FAC_FRAC_BITS;
4524
4525
0
  search_state->best_intra_rd = INT64_MAX;
4526
4527
0
  search_state->best_pred_sse = UINT_MAX;
4528
4529
0
  av1_zero(search_state->single_newmv);
4530
0
  av1_zero(search_state->single_newmv_rate);
4531
0
  av1_zero(search_state->single_newmv_valid);
4532
0
  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4533
0
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4534
0
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4535
0
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4536
0
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4537
0
      }
4538
0
    }
4539
0
  }
4540
4541
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4542
0
    search_state->best_pred_rd[i] = INT64_MAX;
4543
0
  }
4544
4545
0
  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4546
0
    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4547
0
      search_state->mode_threshold[i] =
4548
0
          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4549
0
          RD_THRESH_FAC_FRAC_BITS;
4550
4551
0
    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4552
0
      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4553
0
        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4554
0
          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4555
0
          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4556
0
        }
4557
0
      }
4558
0
    }
4559
4560
0
    init_single_inter_mode_search_state(search_state);
4561
0
  }
4562
0
}
4563
4564
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4565
                           const MV_REFERENCE_FRAME *ref_frame,
4566
0
                           const PREDICTION_MODE this_mode) {
4567
0
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4568
0
    return true;
4569
0
  }
4570
4571
0
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4572
0
}
4573
4574
static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4575
                                      BLOCK_SIZE bsize,
4576
                                      PREDICTION_MODE curr_mode,
4577
0
                                      const MV_REFERENCE_FRAME *ref_frames) {
4578
0
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4579
0
  if (comp_pred) {
4580
0
    if (!is_comp_ref_allowed(bsize)) return 1;
4581
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4582
0
      return 1;
4583
0
    }
4584
4585
0
    const AV1_COMMON *const cm = &cpi->common;
4586
0
    if (frame_is_intra_only(cm)) return 1;
4587
4588
0
    const CurrentFrame *const current_frame = &cm->current_frame;
4589
0
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4590
4591
0
    const struct segmentation *const seg = &cm->seg;
4592
0
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4593
    // Do not allow compound prediction if the segment level reference frame
4594
    // feature is in use as in this case there can only be one reference.
4595
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4596
0
  }
4597
4598
0
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4599
    // Mode must be compatible
4600
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4601
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4602
0
  }
4603
4604
0
  return 0;
4605
0
}
4606
4607
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4608
0
                                        BLOCK_SIZE bsize, int mib_size) {
4609
0
  const int sb_size_mask = mib_size - 1;
4610
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4611
0
  const int mi_row = xd->mi_row;
4612
0
  const int mi_col = xd->mi_col;
4613
0
  const int mi_row_in_sb = mi_row & sb_size_mask;
4614
0
  const int mi_col_in_sb = mi_col & sb_size_mask;
4615
0
  const int mi_w = mi_size_wide[bsize];
4616
0
  const int mi_h = mi_size_high[bsize];
4617
0
  int picked_ref_frames_mask = 0;
4618
0
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4619
0
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4620
0
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4621
0
    }
4622
0
  }
4623
0
  return picked_ref_frames_mask;
4624
0
}
4625
4626
// Check if reference frame pair of the current block matches with the given
4627
// block.
4628
static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4629
0
                                       const MV_REFERENCE_FRAME *ref_frames) {
4630
0
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4631
0
          (ref_frames[1] == mbmi->ref_frame[1]));
4632
0
}
4633
4634
// Case 1: return 0, means don't skip this mode
4635
// Case 2: return 1, means skip this mode completely
4636
// Case 3: return 2, means skip compound only, but still try single motion modes
4637
static int inter_mode_search_order_independent_skip(
4638
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4639
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4640
0
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4641
0
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4642
0
    return 1;
4643
0
  }
4644
4645
0
  const int ref_type = av1_ref_frame_type(ref_frame);
4646
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set)
4647
0
    if (prune_ref_frame(cpi, x, ref_type)) return 1;
4648
4649
  // This is only used in motion vector unit test.
4650
0
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4651
0
      ref_frame[0] == INTRA_FRAME)
4652
0
    return 1;
4653
4654
0
  const AV1_COMMON *const cm = &cpi->common;
4655
0
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4656
0
    return 1;
4657
0
  }
4658
4659
  // Reuse the prediction mode in cache
4660
0
  if (x->use_mb_mode_cache) {
4661
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4662
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4663
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4664
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4665
4666
    // If the cached mode is intra, then we just need to match the mode.
4667
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4668
0
      return 1;
4669
0
    }
4670
4671
    // If the cached mode is single inter mode, then we match the mode and
4672
    // reference frame.
4673
0
    if (cached_mode_is_single) {
4674
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4675
0
        return 1;
4676
0
      }
4677
0
    } else {
4678
      // If the cached mode is compound, then we need to consider several cases.
4679
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4680
0
      if (mode_is_single) {
4681
        // If the mode is single, we know the modes can't match. But we might
4682
        // still want to search it if compound mode depends on the current mode.
4683
0
        int skip_motion_mode_only = 0;
4684
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4685
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4686
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4687
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4688
0
        } else if (cached_mode == NEW_NEWMV) {
4689
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4690
0
                                   ref_frame[0] == cached_frame[1]);
4691
0
        }
4692
4693
0
        return 1 + skip_motion_mode_only;
4694
0
      } else {
4695
        // If both modes are compound, then everything must match.
4696
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4697
0
            ref_frame[1] != cached_frame[1]) {
4698
0
          return 1;
4699
0
        }
4700
0
      }
4701
0
    }
4702
0
  }
4703
4704
0
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4705
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4706
  // valid partition is required, do not skip mode.
4707
0
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4708
0
      x->must_find_valid_partition)
4709
0
    return 0;
4710
4711
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4712
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4713
  // frames
4714
0
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4715
0
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4716
0
    const MACROBLOCKD *const xd = &x->e_mbd;
4717
0
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4718
0
        xd->up_available) {
4719
0
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4720
0
                                                    { 1, 1, 0 },
4721
0
                                                    { 2, 1, 0 } };
4722
0
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4723
4724
0
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4725
0
             qindex_sub_range < 3);
4726
0
      const int num_ref_frame_pair_match_thresh =
4727
0
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4728
0
                    [qindex_sub_range];
4729
4730
0
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4731
0
             num_ref_frame_pair_match_thresh >= 0);
4732
0
      int num_ref_frame_pair_match = 0;
4733
4734
0
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4735
0
      num_ref_frame_pair_match +=
4736
0
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4737
4738
      // Pruning based on ref frame pair match with neighbors.
4739
0
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4740
0
    }
4741
0
  }
4742
4743
0
  int skip_motion_mode = 0;
4744
0
  if (mbmi->partition != PARTITION_NONE) {
4745
0
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4746
0
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4747
      // Since the compound ref modes depends on the motion estimation result of
4748
      // two single ref modes (best mv of single ref modes as the start point),
4749
      // if current single ref mode is marked skip, we need to check if it will
4750
      // be used in compound ref modes.
4751
0
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4752
        // Found a not skipped compound ref mode which contains current
4753
        // single ref. So this single ref can't be skipped completely
4754
        // Just skip its motion mode search, still try its simple
4755
        // transition mode.
4756
0
        skip_motion_mode = 1;
4757
0
        skip_ref = 0;
4758
0
      }
4759
0
    }
4760
    // If we are reusing the prediction from cache, and the current frame is
4761
    // required by the cache, then we cannot prune it.
4762
0
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4763
0
      skip_ref = 0;
4764
      // If the cache only needs the current reference type for compound
4765
      // prediction, then we can skip motion mode search.
4766
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4767
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4768
0
    }
4769
0
    if (skip_ref) return 1;
4770
0
  }
4771
4772
0
  if (ref_frame[0] == INTRA_FRAME) {
4773
0
    if (mode != DC_PRED) {
4774
      // Disable intra modes other than DC_PRED for blocks with low variance
4775
      // Threshold for intra skipping based on source variance
4776
      // TODO(debargha): Specialize the threshold for super block sizes
4777
0
      const unsigned int skip_intra_var_thresh = 64;
4778
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4779
0
          x->source_variance < skip_intra_var_thresh)
4780
0
        return 1;
4781
0
    }
4782
0
  }
4783
4784
0
  if (skip_motion_mode) return 2;
4785
4786
0
  return 0;
4787
0
}
4788
4789
static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4790
                             const MV_REFERENCE_FRAME *ref_frames,
4791
0
                             const AV1_COMMON *cm) {
4792
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4793
0
  mbmi->ref_mv_idx = 0;
4794
0
  mbmi->mode = curr_mode;
4795
0
  mbmi->uv_mode = UV_DC_PRED;
4796
0
  mbmi->ref_frame[0] = ref_frames[0];
4797
0
  mbmi->ref_frame[1] = ref_frames[1];
4798
0
  pmi->palette_size[0] = 0;
4799
0
  pmi->palette_size[1] = 0;
4800
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4801
0
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4802
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4803
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4804
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4805
0
}
4806
4807
static inline void collect_single_states(MACROBLOCK *x,
4808
                                         InterModeSearchState *search_state,
4809
0
                                         const MB_MODE_INFO *const mbmi) {
4810
0
  int i, j;
4811
0
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4812
0
  const PREDICTION_MODE this_mode = mbmi->mode;
4813
0
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4814
0
  const int mode_offset = INTER_OFFSET(this_mode);
4815
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4816
4817
  // Simple rd
4818
0
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4819
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4820
0
    const int64_t rd =
4821
0
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4822
0
    if (rd < simple_rd) simple_rd = rd;
4823
0
  }
4824
4825
  // Insertion sort of single_state
4826
0
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4827
0
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4828
0
  i = search_state->single_state_cnt[dir][mode_offset];
4829
0
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4830
0
    state_s[j] = state_s[j - 1];
4831
0
  state_s[j] = this_state_s;
4832
0
  search_state->single_state_cnt[dir][mode_offset]++;
4833
4834
  // Modelled rd
4835
0
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4836
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4837
0
    const int64_t rd =
4838
0
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4839
0
    if (rd < modelled_rd) modelled_rd = rd;
4840
0
  }
4841
4842
  // Insertion sort of single_state_modelled
4843
0
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4844
0
  SingleInterModeState *state_m =
4845
0
      search_state->single_state_modelled[dir][mode_offset];
4846
0
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4847
0
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4848
0
    state_m[j] = state_m[j - 1];
4849
0
  state_m[j] = this_state_m;
4850
0
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4851
0
}
4852
4853
static inline void analyze_single_states(const AV1_COMP *cpi,
4854
0
                                         InterModeSearchState *search_state) {
4855
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4856
0
  assert(prune_level >= 1);
4857
0
  int i, j, dir, mode;
4858
4859
0
  for (dir = 0; dir < 2; ++dir) {
4860
0
    int64_t best_rd;
4861
0
    SingleInterModeState(*state)[FWD_REFS];
4862
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4863
4864
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4865
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4866
    // have same motion vectors). Always keep the best of each mode
4867
    // because it might form the best possible combination with other mode.
4868
0
    state = search_state->single_state[dir];
4869
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4870
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4871
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4872
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4873
0
        if (state[mode][i].rd != INT64_MAX &&
4874
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4875
0
          state[mode][i].valid = 0;
4876
0
        }
4877
0
      }
4878
0
    }
4879
4880
0
    state = search_state->single_state_modelled[dir];
4881
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4882
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4883
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4884
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4885
0
        if (state[mode][i].rd != INT64_MAX &&
4886
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4887
0
          state[mode][i].valid = 0;
4888
0
        }
4889
0
      }
4890
0
    }
4891
0
  }
4892
4893
  // Ordering by simple rd first, then by modelled rd
4894
0
  for (dir = 0; dir < 2; ++dir) {
4895
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4896
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4897
0
      const int state_cnt_m =
4898
0
          search_state->single_state_modelled_cnt[dir][mode];
4899
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4900
0
      SingleInterModeState *state_m =
4901
0
          search_state->single_state_modelled[dir][mode];
4902
0
      int count = 0;
4903
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4904
0
      for (i = 0; i < state_cnt_s; ++i) {
4905
0
        if (state_s[i].rd == INT64_MAX) break;
4906
0
        if (state_s[i].valid) {
4907
0
          search_state->single_rd_order[dir][mode][count++] =
4908
0
              state_s[i].ref_frame;
4909
0
        }
4910
0
      }
4911
0
      if (count >= max_candidates) continue;
4912
4913
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4914
0
        if (state_m[i].rd == INT64_MAX) break;
4915
0
        if (!state_m[i].valid) continue;
4916
0
        const int ref_frame = state_m[i].ref_frame;
4917
0
        int match = 0;
4918
        // Check if existing already
4919
0
        for (j = 0; j < count; ++j) {
4920
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4921
0
            match = 1;
4922
0
            break;
4923
0
          }
4924
0
        }
4925
0
        if (match) continue;
4926
        // Check if this ref_frame is removed in simple rd
4927
0
        int valid = 1;
4928
0
        for (j = 0; j < state_cnt_s; ++j) {
4929
0
          if (ref_frame == state_s[j].ref_frame) {
4930
0
            valid = state_s[j].valid;
4931
0
            break;
4932
0
          }
4933
0
        }
4934
0
        if (valid) {
4935
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4936
0
        }
4937
0
      }
4938
0
    }
4939
0
  }
4940
0
}
4941
4942
static int compound_skip_get_candidates(
4943
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4944
0
    const int dir, const PREDICTION_MODE mode) {
4945
0
  const int mode_offset = INTER_OFFSET(mode);
4946
0
  const SingleInterModeState *state =
4947
0
      search_state->single_state[dir][mode_offset];
4948
0
  const SingleInterModeState *state_modelled =
4949
0
      search_state->single_state_modelled[dir][mode_offset];
4950
4951
0
  int max_candidates = 0;
4952
0
  for (int i = 0; i < FWD_REFS; ++i) {
4953
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4954
0
    max_candidates++;
4955
0
  }
4956
4957
0
  int candidates = max_candidates;
4958
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4959
0
    candidates = AOMMIN(2, max_candidates);
4960
0
  }
4961
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4962
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4963
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4964
0
      candidates = 1;
4965
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4966
0
  }
4967
4968
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4969
    // Limit the number of candidates to 1 in each direction for compound
4970
    // prediction
4971
0
    candidates = AOMMIN(1, candidates);
4972
0
  }
4973
0
  return candidates;
4974
0
}
4975
4976
static int compound_skip_by_single_states(
4977
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4978
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4979
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4980
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4981
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4982
0
                        compound_ref1_mode(this_mode) };
4983
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4984
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4985
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4986
0
  int ref_searched[2] = { 0, 0 };
4987
0
  int ref_mv_match[2] = { 1, 1 };
4988
0
  int i, j;
4989
4990
0
  for (i = 0; i < 2; ++i) {
4991
0
    const SingleInterModeState *state =
4992
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
4993
0
    const int state_cnt =
4994
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4995
0
    for (j = 0; j < state_cnt; ++j) {
4996
0
      if (state[j].ref_frame == refs[i]) {
4997
0
        ref_searched[i] = 1;
4998
0
        break;
4999
0
      }
5000
0
    }
5001
0
  }
5002
5003
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
5004
0
  for (i = 0; i < 2; ++i) {
5005
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
5006
0
      continue;
5007
0
    }
5008
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
5009
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
5010
0
      int_mv single_mv;
5011
0
      int_mv comp_mv;
5012
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
5013
0
                  &x->mbmi_ext);
5014
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
5015
0
      if (single_mv.as_int != comp_mv.as_int) {
5016
0
        ref_mv_match[i] = 0;
5017
0
        break;
5018
0
      }
5019
0
    }
5020
0
  }
5021
5022
0
  for (i = 0; i < 2; ++i) {
5023
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
5024
0
    const int candidates =
5025
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
5026
0
    const MV_REFERENCE_FRAME *ref_order =
5027
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
5028
0
    int match = 0;
5029
0
    for (j = 0; j < candidates; ++j) {
5030
0
      if (refs[i] == ref_order[j]) {
5031
0
        match = 1;
5032
0
        break;
5033
0
      }
5034
0
    }
5035
0
    if (!match) return 1;
5036
0
  }
5037
5038
0
  return 0;
5039
0
}
5040
5041
// Check if ref frames of current block matches with given block.
5042
static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
5043
                                   const MV_REFERENCE_FRAME *ref_frames,
5044
0
                                   int *const is_ref_match) {
5045
0
  if (is_inter_block(mbmi)) {
5046
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
5047
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
5048
0
    if (has_second_ref(mbmi)) {
5049
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
5050
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
5051
0
    }
5052
0
  }
5053
0
}
5054
5055
// Prune compound mode using ref frames of neighbor blocks.
5056
static inline int compound_skip_using_neighbor_refs(
5057
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
5058
0
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
5059
  // Exclude non-extended compound modes from pruning
5060
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5061
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5062
0
    return 0;
5063
5064
0
  if (prune_ext_comp_using_neighbors >= 3) return 1;
5065
5066
0
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
5067
                                // 1 - match for backward refs
5068
  // Check if ref frames of this block matches with left neighbor.
5069
0
  if (xd->left_available)
5070
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
5071
5072
  // Check if ref frames of this block matches with above neighbor.
5073
0
  if (xd->up_available)
5074
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
5075
5076
  // Combine ref frame match with neighbors in forward and backward refs.
5077
0
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
5078
5079
  // Pruning based on ref frame match with neighbors.
5080
0
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
5081
0
  return 1;
5082
0
}
5083
5084
// Update best single mode for the given reference frame based on simple rd.
5085
static inline void update_best_single_mode(InterModeSearchState *search_state,
5086
                                           const PREDICTION_MODE this_mode,
5087
                                           const MV_REFERENCE_FRAME ref_frame,
5088
0
                                           int64_t this_rd) {
5089
0
  if (this_rd < search_state->best_single_rd[ref_frame]) {
5090
0
    search_state->best_single_rd[ref_frame] = this_rd;
5091
0
    search_state->best_single_mode[ref_frame] = this_mode;
5092
0
  }
5093
0
}
5094
5095
// Prune compound mode using best single mode for the same reference.
5096
static inline int skip_compound_using_best_single_mode_ref(
5097
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
5098
    const PREDICTION_MODE *best_single_mode,
5099
0
    int prune_comp_using_best_single_mode_ref) {
5100
  // Exclude non-extended compound modes from pruning
5101
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5102
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5103
0
    return 0;
5104
5105
0
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
5106
0
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
5107
  // Get ref frame direction corresponding to NEWMV
5108
  // 0 - NEWMV corresponding to forward direction
5109
  // 1 - NEWMV corresponding to backward direction
5110
0
  const int newmv_dir = comp_mode_ref0 != NEWMV;
5111
5112
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
5113
  // have NEWMV as single mode winner.
5114
  // Example: For an extended-compound mode,
5115
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
5116
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
5117
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
5118
  //   ALTREF_FRAME is NEWMV
5119
0
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
5120
0
  if (single_mode == NEWMV) return 0;
5121
5122
  // Avoid pruning the compound mode when best single mode is not available
5123
0
  if (prune_comp_using_best_single_mode_ref == 1)
5124
0
    if (single_mode == MB_MODE_COUNT) return 0;
5125
0
  return 1;
5126
0
}
5127
5128
0
static int compare_int64(const void *a, const void *b) {
5129
0
  int64_t a64 = *((int64_t *)a);
5130
0
  int64_t b64 = *((int64_t *)b);
5131
0
  if (a64 < b64) {
5132
0
    return -1;
5133
0
  } else if (a64 == b64) {
5134
0
    return 0;
5135
0
  } else {
5136
0
    return 1;
5137
0
  }
5138
0
}
5139
5140
static inline void update_search_state(
5141
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
5142
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
5143
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
5144
0
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
5145
0
  const MACROBLOCKD *xd = &x->e_mbd;
5146
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
5147
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5148
0
  const int skip_txfm =
5149
0
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
5150
5151
0
  search_state->best_rd = new_best_rd_stats->rdcost;
5152
0
  search_state->best_mode_index = new_best_mode;
5153
0
  *best_rd_stats_dst = *new_best_rd_stats;
5154
0
  search_state->best_mbmode = *mbmi;
5155
0
  search_state->best_skip2 = skip_txfm;
5156
0
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
5157
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
5158
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
5159
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
5160
  // These two values will be updated when av1_txfm_search is called.
5161
0
  if (txfm_search_done) {
5162
0
    search_state->best_rate_y =
5163
0
        new_best_rd_stats_y->rate +
5164
0
        x->mode_costs.skip_txfm_cost[skip_ctx]
5165
0
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
5166
0
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
5167
0
  }
5168
0
  search_state->best_y_rdcost = *new_best_rd_stats_y;
5169
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5170
0
}
5171
5172
// Find the best RD for a reference frame (among single reference modes)
5173
// and store +10% of it in the 0-th element in ref_frame_rd.
5174
0
static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
5175
0
  assert(ref_frame_rd[0] == INT64_MAX);
5176
0
  int64_t ref_copy[REF_FRAMES - 1];
5177
0
  memcpy(ref_copy, ref_frame_rd + 1,
5178
0
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
5179
0
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
5180
5181
0
  int64_t cutoff = ref_copy[0];
5182
  // The cut-off is within 10% of the best.
5183
0
  if (cutoff != INT64_MAX) {
5184
0
    assert(cutoff < INT64_MAX / 200);
5185
0
    cutoff = (110 * cutoff) / 100;
5186
0
  }
5187
0
  ref_frame_rd[0] = cutoff;
5188
0
}
5189
5190
// Check if either frame is within the cutoff.
5191
static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
5192
                                        MV_REFERENCE_FRAME frame1,
5193
0
                                        MV_REFERENCE_FRAME frame2) {
5194
0
  assert(frame2 > 0);
5195
0
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
5196
0
         ref_frame_rd[frame2] <= ref_frame_rd[0];
5197
0
}
5198
5199
static inline void evaluate_motion_mode_for_winner_candidates(
5200
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
5201
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
5202
    PICK_MODE_CONTEXT *const ctx,
5203
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
5204
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
5205
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
5206
0
    InterModeSearchState *const search_state, int64_t *yrd) {
5207
0
  const AV1_COMMON *const cm = &cpi->common;
5208
0
  const int num_planes = av1_num_planes(cm);
5209
0
  MACROBLOCKD *const xd = &x->e_mbd;
5210
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5211
0
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
5212
0
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
5213
5214
0
  for (int cand = 0; cand < num_best_cand; cand++) {
5215
0
    RD_STATS rd_stats;
5216
0
    RD_STATS rd_stats_y;
5217
0
    RD_STATS rd_stats_uv;
5218
0
    av1_init_rd_stats(&rd_stats);
5219
0
    av1_init_rd_stats(&rd_stats_y);
5220
0
    av1_init_rd_stats(&rd_stats_uv);
5221
0
    int rate_mv;
5222
5223
0
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
5224
0
    args->skip_motion_mode =
5225
0
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
5226
0
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
5227
0
    rd_stats.rate =
5228
0
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
5229
5230
    // Continue if the best candidate is compound.
5231
0
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
5232
5233
0
    x->txfm_search_info.skip_txfm = 0;
5234
0
    struct macroblockd_plane *pd = xd->plane;
5235
0
    const BUFFER_SET orig_dst = {
5236
0
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
5237
0
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
5238
0
    };
5239
5240
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5241
    // Initialize motion mode to simple translation
5242
    // Calculation of switchable rate depends on it.
5243
0
    mbmi->motion_mode = 0;
5244
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5245
0
    for (int i = 0; i < num_planes; i++) {
5246
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5247
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5248
0
    }
5249
5250
0
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
5251
0
                           search_state->best_skip_rd[1] };
5252
0
    int64_t this_yrd = INT64_MAX;
5253
0
    int64_t ret_value = motion_mode_rd(
5254
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
5255
0
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
5256
0
        do_tx_search, inter_modes_info, 1, &this_yrd);
5257
5258
0
    if (ret_value != INT64_MAX) {
5259
0
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5260
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
5261
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5262
      // Collect mode stats for multiwinner mode processing
5263
0
      store_winner_mode_stats(
5264
0
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
5265
0
          mode_enum, NULL, bsize, rd_stats.rdcost,
5266
0
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
5267
5268
0
      int64_t best_scaled_rd = search_state->best_rd;
5269
0
      int64_t this_scaled_rd = rd_stats.rdcost;
5270
0
      if (search_state->best_mode_index != THR_INVALID)
5271
0
        increase_motion_mode_rd(&search_state->best_mbmode, mbmi,
5272
0
                                &best_scaled_rd, &this_scaled_rd,
5273
0
                                cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
5274
0
                                cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
5275
5276
0
      if (this_scaled_rd < best_scaled_rd) {
5277
0
        *yrd = this_yrd;
5278
0
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5279
0
                            &rd_stats_uv, mode_enum, x, do_tx_search);
5280
0
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
5281
0
      }
5282
0
    }
5283
0
  }
5284
0
}
5285
5286
/*!\cond */
5287
// Arguments for speed feature pruning of inter mode search
5288
typedef struct {
5289
  int *skip_motion_mode;
5290
  mode_skip_mask_t *mode_skip_mask;
5291
  InterModeSearchState *search_state;
5292
  int skip_ref_frame_mask;
5293
  int reach_first_comp_mode;
5294
  int mode_thresh_mul_fact;
5295
  int num_single_modes_processed;
5296
  int prune_cpd_using_sr_stats_ready;
5297
} InterModeSFArgs;
5298
/*!\endcond */
5299
5300
static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
5301
                           int64_t *ref_frame_rd, int midx,
5302
0
                           InterModeSFArgs *args, int is_low_temp_var) {
5303
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5304
0
  MACROBLOCKD *const xd = &x->e_mbd;
5305
  // Get the actual prediction mode we are trying in this iteration
5306
0
  const THR_MODES mode_enum = av1_default_mode_order[midx];
5307
0
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5308
0
  const PREDICTION_MODE this_mode = mode_def->mode;
5309
0
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5310
0
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5311
0
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5312
0
  const int comp_pred = second_ref_frame > INTRA_FRAME;
5313
5314
0
  if (ref_frame == INTRA_FRAME) return 1;
5315
5316
0
  const FRAME_UPDATE_TYPE update_type =
5317
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5318
0
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5319
0
      comp_pred) {
5320
0
    return 1;
5321
0
  }
5322
5323
  // This is for real time encoding.
5324
0
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5325
0
      this_mode != NEARESTMV)
5326
0
    return 1;
5327
5328
  // Check if this mode should be skipped because it is incompatible with the
5329
  // current frame
5330
0
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5331
0
    return 1;
5332
0
  const int ret = inter_mode_search_order_independent_skip(
5333
0
      cpi, x, args->mode_skip_mask, args->search_state,
5334
0
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5335
0
  if (ret == 1) return 1;
5336
0
  *(args->skip_motion_mode) = (ret == 2);
5337
5338
  // We've reached the first compound prediction mode, get stats from the
5339
  // single reference predictors to help with pruning.
5340
  // Disable this pruning logic if interpolation filter search was skipped for
5341
  // single prediction modes as it can result in aggressive pruning of compound
5342
  // prediction modes due to the absence of modelled_rd populated by
5343
  // av1_interpolation_filter_search().
5344
  // TODO(Remya): Check the impact of the sf
5345
  // 'prune_comp_search_by_single_result' if compound prediction modes are
5346
  // enabled in future for REALTIME encode.
5347
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5348
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5349
0
      args->reach_first_comp_mode == 0) {
5350
0
    analyze_single_states(cpi, args->search_state);
5351
0
    args->reach_first_comp_mode = 1;
5352
0
  }
5353
5354
  // Prune aggressively when best mode is skippable.
5355
0
  int mul_fact = args->search_state->best_mode_skippable
5356
0
                     ? args->mode_thresh_mul_fact
5357
0
                     : (1 << MODE_THRESH_QBITS);
5358
0
  int64_t mode_threshold =
5359
0
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5360
0
      MODE_THRESH_QBITS;
5361
5362
0
  if (args->search_state->best_rd < mode_threshold) return 1;
5363
5364
  // Skip this compound mode based on the RD results from the single prediction
5365
  // modes
5366
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5367
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5368
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5369
0
                                       ref_frame, second_ref_frame, x))
5370
0
      return 1;
5371
0
  }
5372
5373
0
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5374
    // After we done with single reference modes, find the 2nd best RD
5375
    // for a reference frame. Only search compound modes that have a reference
5376
    // frame at least as good as the 2nd best.
5377
0
    if (!args->prune_cpd_using_sr_stats_ready &&
5378
0
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5379
0
      find_top_ref(ref_frame_rd);
5380
0
      args->prune_cpd_using_sr_stats_ready = 1;
5381
0
    }
5382
0
    if (args->prune_cpd_using_sr_stats_ready &&
5383
0
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5384
0
      return 1;
5385
0
  }
5386
5387
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5388
0
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5389
0
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5390
0
    return 1;
5391
0
  }
5392
5393
0
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5394
0
    if (compound_skip_using_neighbor_refs(
5395
0
            xd, this_mode, ref_frames,
5396
0
            sf->inter_sf.prune_ext_comp_using_neighbors))
5397
0
      return 1;
5398
0
  }
5399
5400
0
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5401
0
    if (skip_compound_using_best_single_mode_ref(
5402
0
            this_mode, ref_frames, args->search_state->best_single_mode,
5403
0
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5404
0
      return 1;
5405
0
  }
5406
5407
0
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5408
0
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5409
0
    if (skip_nearest_near_mv_using_refmv_weight(
5410
0
            x, this_mode, ref_frame_type,
5411
0
            args->search_state->best_mbmode.mode)) {
5412
      // Ensure the mode is pruned only when the current block has obtained a
5413
      // valid inter mode.
5414
0
      assert(is_inter_mode(args->search_state->best_mbmode.mode));
5415
0
      return 1;
5416
0
    }
5417
0
  }
5418
5419
0
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5420
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5421
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5422
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5423
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5424
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5425
0
        return 1;
5426
0
    }
5427
0
  }
5428
5429
0
  return 0;
5430
0
}
5431
5432
static void record_best_compound(REFERENCE_MODE reference_mode,
5433
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5434
                                 InterModeSearchState *search_state,
5435
0
                                 int compmode_cost) {
5436
0
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5437
5438
0
  if (reference_mode == REFERENCE_MODE_SELECT) {
5439
0
    single_rate = rd_stats->rate - compmode_cost;
5440
0
    hybrid_rate = rd_stats->rate;
5441
0
  } else {
5442
0
    single_rate = rd_stats->rate;
5443
0
    hybrid_rate = rd_stats->rate + compmode_cost;
5444
0
  }
5445
5446
0
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5447
0
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5448
5449
0
  if (!comp_pred) {
5450
0
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5451
0
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5452
0
  } else {
5453
0
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5454
0
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5455
0
  }
5456
0
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5457
0
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5458
0
}
5459
5460
// Does a transform search over a list of the best inter mode candidates.
5461
// This is called if the original mode search computed an RD estimate
5462
// for the transform search rather than doing a full search.
5463
static void tx_search_best_inter_candidates(
5464
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5465
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5466
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5467
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5468
0
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5469
0
  AV1_COMMON *const cm = &cpi->common;
5470
0
  MACROBLOCKD *const xd = &x->e_mbd;
5471
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5472
0
  const ModeCosts *mode_costs = &x->mode_costs;
5473
0
  const int num_planes = av1_num_planes(cm);
5474
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5475
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5476
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5477
0
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5478
0
  search_state->best_rd = best_rd_so_far;
5479
0
  search_state->best_mode_index = THR_INVALID;
5480
  // Initialize best mode stats for winner mode processing
5481
0
  x->winner_mode_count = 0;
5482
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5483
0
                          NULL, bsize, best_rd_so_far,
5484
0
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5485
0
  inter_modes_info->num =
5486
0
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5487
0
          ? inter_modes_info->num
5488
0
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5489
0
  const int64_t top_est_rd =
5490
0
      inter_modes_info->num > 0
5491
0
          ? inter_modes_info
5492
0
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5493
0
          : INT64_MAX;
5494
0
  *yrd = INT64_MAX;
5495
0
  int64_t best_rd_in_this_partition = INT64_MAX;
5496
0
  int num_inter_mode_cands = inter_modes_info->num;
5497
0
  int newmv_mode_evaled = 0;
5498
0
  int max_allowed_cands = INT_MAX;
5499
0
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5500
    // The bound on the no. of inter mode candidates, beyond which the
5501
    // candidates are limited if a newmv mode got evaluated, is set as
5502
    // max_allowed_cands + 1.
5503
0
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5504
0
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5505
0
    max_allowed_cands =
5506
0
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5507
0
  }
5508
5509
0
  int num_mode_thresh = INT_MAX;
5510
0
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5511
    // Bound the no. of transform searches per prediction mode beyond a
5512
    // threshold.
5513
0
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5514
0
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5515
0
    num_mode_thresh =
5516
0
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5517
0
  }
5518
5519
0
  int num_tx_cands = 0;
5520
0
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5521
  // Iterate over best inter mode candidates and perform tx search
5522
0
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5523
0
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5524
0
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5525
0
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5526
0
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5527
0
    if (curr_est_rd * 0.80 > top_est_rd) break;
5528
5529
0
    if (num_tx_cands > num_mode_thresh) {
5530
0
      if ((prediction_mode != NEARESTMV &&
5531
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5532
0
          (prediction_mode == NEARESTMV &&
5533
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5534
0
        continue;
5535
0
    }
5536
5537
0
    txfm_info->skip_txfm = 0;
5538
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5539
5540
    // Select prediction reference frames.
5541
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5542
0
    for (int i = 0; i < num_planes; i++) {
5543
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5544
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5545
0
    }
5546
5547
0
    bool is_predictor_built = false;
5548
5549
    // Initialize RD stats
5550
0
    RD_STATS rd_stats;
5551
0
    RD_STATS rd_stats_y;
5552
0
    RD_STATS rd_stats_uv;
5553
0
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5554
0
    int64_t skip_rd = INT64_MAX;
5555
0
    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5556
0
        cm->seq_params->enable_masked_compound,
5557
0
        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5558
0
        /*eval_motion_mode=*/0);
5559
0
    if (txfm_rd_gate_level) {
5560
      // Check if the mode is good enough based on skip RD
5561
0
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5562
0
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5563
0
      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5564
0
                                      skip_rd, txfm_rd_gate_level, 0);
5565
0
      if (!eval_txfm) continue;
5566
0
    }
5567
5568
    // Build the prediction for this mode
5569
0
    if (!is_predictor_built) {
5570
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5571
0
                                    av1_num_planes(cm) - 1);
5572
0
    }
5573
0
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5574
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5575
0
    }
5576
5577
0
    num_tx_cands++;
5578
0
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5579
0
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5580
0
    int64_t this_yrd = INT64_MAX;
5581
    // Do the transform search
5582
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5583
0
                         mode_rate, search_state->best_rd)) {
5584
0
      continue;
5585
0
    } else {
5586
0
      const int y_rate =
5587
0
          rd_stats.skip_txfm
5588
0
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5589
0
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5590
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5591
5592
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5593
0
        inter_mode_data_push(
5594
0
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5595
0
            rd_stats_y.rate + rd_stats_uv.rate +
5596
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5597
0
      }
5598
0
    }
5599
5600
0
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5601
5602
0
    const THR_MODES mode_enum = get_prediction_mode_idx(
5603
0
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5604
5605
    // Collect mode stats for multiwinner mode processing
5606
0
    const int txfm_search_done = 1;
5607
0
    store_winner_mode_stats(
5608
0
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5609
0
        NULL, bsize, rd_stats.rdcost,
5610
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5611
5612
0
    int64_t best_scaled_rd = search_state->best_rd;
5613
0
    int64_t this_scaled_rd = rd_stats.rdcost;
5614
0
    increase_motion_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5615
0
                            &this_scaled_rd,
5616
0
                            cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct,
5617
0
                            cpi->sf.inter_sf.bias_obmc_mode_rd_scale_pct);
5618
0
    if (this_scaled_rd < best_rd_in_this_partition) {
5619
0
      best_rd_in_this_partition = rd_stats.rdcost;
5620
0
      *yrd = this_yrd;
5621
0
    }
5622
5623
0
    if (this_scaled_rd < best_scaled_rd) {
5624
0
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5625
0
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5626
0
      search_state->best_skip_rd[0] = skip_rd;
5627
      // Limit the total number of modes to be evaluated if the first is valid
5628
      // and transform skip or compound
5629
0
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5630
0
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5631
          // Evaluate more candidates at high quantizers where occurrence of
5632
          // transform skip is high.
5633
0
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5634
0
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5635
0
          num_inter_mode_cands =
5636
0
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5637
0
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5638
0
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5639
          // Evaluate more candidates at low quantizers where occurrence of
5640
          // single reference mode is high.
5641
0
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5642
0
                                                { 10, 7, 5, 3 } };
5643
0
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5644
0
          num_inter_mode_cands = AOMMIN(
5645
0
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5646
0
        }
5647
0
      }
5648
0
    }
5649
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5650
    // a newmv mode was evaluated already.
5651
0
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5652
0
  }
5653
0
}
5654
5655
// Indicates number of winner simple translation modes to be used
5656
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5657
5658
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5659
// speed feature. This list consists of modes that have only searched
5660
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5661
// modes after the initial RD search.
5662
static void handle_winner_cand(
5663
    MB_MODE_INFO *const mbmi,
5664
    motion_mode_best_st_candidate *best_motion_mode_cands,
5665
    int max_winner_motion_mode_cand, int64_t this_rd,
5666
0
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5667
  // Number of current motion mode candidates in list
5668
0
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5669
0
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5670
5671
  // find the best location to insert new motion mode candidate
5672
0
  for (int j = 0; j < num_motion_mode_cand; j++) {
5673
0
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5674
0
      valid_motion_mode_cand_loc = j;
5675
0
      break;
5676
0
    }
5677
0
  }
5678
5679
  // Insert motion mode if location is found
5680
0
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5681
0
    if (num_motion_mode_cand > 0 &&
5682
0
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5683
0
      memmove(
5684
0
          &best_motion_mode_cands
5685
0
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5686
0
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5687
0
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5688
0
           valid_motion_mode_cand_loc) *
5689
0
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5690
0
    motion_mode_cand->mbmi = *mbmi;
5691
0
    motion_mode_cand->rd_cost = this_rd;
5692
0
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5693
0
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5694
0
        *motion_mode_cand;
5695
0
    best_motion_mode_cands->num_motion_mode_cand =
5696
0
        AOMMIN(max_winner_motion_mode_cand,
5697
0
               best_motion_mode_cands->num_motion_mode_cand + 1);
5698
0
  }
5699
0
}
5700
5701
/*!\brief Search intra modes in interframes
5702
 *
5703
 * \ingroup intra_mode_search
5704
 *
5705
 * This function searches for the best intra mode when the current frame is an
5706
 * interframe. This function however does *not* handle luma palette mode.
5707
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5708
 *
5709
 * This function will first iterate through the luma mode candidates to find the
5710
 * best luma intra mode. Once the best luma mode it's found, it will then search
5711
 * for the best chroma mode. Because palette mode is currently not handled by
5712
 * here, a cache of uv mode is stored in
5713
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5714
 * av1_search_palette_mode.
5715
 *
5716
 * \param[in,out] search_state      Struct keep track of the prediction mode
5717
 *                                  search state in interframe.
5718
 *
5719
 * \param[in]     cpi               Top-level encoder structure.
5720
 * \param[in,out] x                 Pointer to struct holding all the data for
5721
 *                                  the current prediction block.
5722
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5723
 *                                  prediction modes searched.
5724
 * \param[in]     bsize             Current block size.
5725
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5726
 *                                  copy the tx_type and txfm_skip arrays.
5727
 *                                  for only the Y plane.
5728
 * \param[in]     sf_args           Stores the list of intra mode candidates
5729
 *                                  to be searched.
5730
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5731
 *                                      current ref frame is an intra frame.
5732
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5733
 *                                  terminate chroma intra mode search.
5734
 *
5735
 * \remark If a new best mode is found, search_state and rd_costs are updated
5736
 * correspondingly. While x is also modified, it is only used as a temporary
5737
 * buffer, and the final decisions are stored in search_state.
5738
 */
5739
static inline void search_intra_modes_in_interframe(
5740
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5741
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5742
    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5743
0
    int64_t yrd_threshold) {
5744
0
  const AV1_COMMON *const cm = &cpi->common;
5745
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5746
0
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5747
0
  MACROBLOCKD *const xd = &x->e_mbd;
5748
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5749
0
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5750
5751
0
  int is_best_y_mode_intra = 0;
5752
0
  RD_STATS best_intra_rd_stats_y;
5753
0
  int64_t best_rd_y = INT64_MAX;
5754
0
  int best_mode_cost_y = -1;
5755
0
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5756
0
  THR_MODES best_mode_enum = THR_INVALID;
5757
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5758
0
  const int num_4x4 = bsize_to_num_blk(bsize);
5759
5760
  // Performs luma search
5761
0
  int64_t best_model_rd = INT64_MAX;
5762
0
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5763
0
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5764
0
    top_intra_model_rd[i] = INT64_MAX;
5765
0
  }
5766
5767
0
  if (cpi->oxcf.algo_cfg.sharpness) {
5768
0
    int bh = mi_size_high[bsize];
5769
0
    int bw = mi_size_wide[bsize];
5770
0
    if (bh > 4 || bw > 4) return;
5771
0
  }
5772
5773
0
  mbmi->skip_txfm = 0;
5774
5775
0
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5776
0
    if (sf->intra_sf.skip_intra_in_interframe &&
5777
0
        search_state->intra_search_state.skip_intra_modes)
5778
0
      break;
5779
0
    set_y_mode_and_delta_angle(
5780
0
        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5781
0
    assert(mbmi->mode < INTRA_MODE_END);
5782
5783
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5784
0
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5785
0
      continue;
5786
5787
0
    const THR_MODES mode_enum =
5788
0
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5789
0
    if ((!intra_mode_cfg->enable_smooth_intra ||
5790
0
         cpi->sf.intra_sf.disable_smooth_intra) &&
5791
0
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5792
0
         mbmi->mode == SMOOTH_V_PRED))
5793
0
      continue;
5794
0
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5795
0
      continue;
5796
0
    if (av1_is_directional_mode(mbmi->mode) &&
5797
0
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5798
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5799
0
      continue;
5800
0
    const PREDICTION_MODE this_mode = mbmi->mode;
5801
5802
0
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5803
0
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5804
0
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5805
0
    x->txfm_search_info.skip_txfm = 0;
5806
5807
0
    if (this_mode != DC_PRED) {
5808
      // Only search the oblique modes if the best so far is
5809
      // one of the neighboring directional modes
5810
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5811
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5812
0
        if (search_state->best_mode_index != THR_INVALID &&
5813
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5814
0
          continue;
5815
0
      }
5816
0
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5817
0
        if (conditional_skipintra(
5818
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5819
0
          continue;
5820
0
      }
5821
0
    }
5822
5823
0
    RD_STATS intra_rd_stats_y;
5824
0
    int mode_cost_y;
5825
0
    int64_t intra_rd_y = INT64_MAX;
5826
0
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5827
0
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5828
0
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5829
0
        &best_model_rd, top_intra_model_rd);
5830
5831
0
    if (intra_rd_y < INT64_MAX) {
5832
0
      adjust_cost(cpi, x, &intra_rd_y, /*is_inter_pred=*/false);
5833
0
    }
5834
5835
0
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5836
0
      is_best_y_mode_intra = 1;
5837
0
      if (intra_rd_y < best_rd_y) {
5838
0
        best_intra_rd_stats_y = intra_rd_stats_y;
5839
0
        best_mode_cost_y = mode_cost_y;
5840
0
        best_rd_y = intra_rd_y;
5841
0
        best_mbmi = *mbmi;
5842
0
        best_mode_enum = mode_enum;
5843
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5844
0
      }
5845
0
    }
5846
0
  }
5847
5848
0
  if (!is_best_y_mode_intra) {
5849
0
    return;
5850
0
  }
5851
5852
0
  assert(best_rd_y < INT64_MAX);
5853
5854
  // Restores the best luma mode
5855
0
  *mbmi = best_mbmi;
5856
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5857
5858
  // Performs chroma search
5859
0
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5860
0
  av1_init_rd_stats(&intra_rd_stats);
5861
0
  av1_init_rd_stats(&intra_rd_stats_uv);
5862
0
  const int num_planes = av1_num_planes(cm);
5863
0
  if (num_planes > 1) {
5864
0
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5865
0
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5866
0
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5867
5868
0
    if (!intra_uv_mode_valid) {
5869
0
      return;
5870
0
    }
5871
0
  }
5872
5873
  // Merge the luma and chroma rd stats
5874
0
  assert(best_mode_cost_y >= 0);
5875
0
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5876
0
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5877
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5878
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5879
    // (prediction granularity), so we account for it in the full rate,
5880
    // not the tokenonly rate.
5881
0
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5882
0
  }
5883
5884
0
  const ModeCosts *mode_costs = &x->mode_costs;
5885
0
  const PREDICTION_MODE mode = mbmi->mode;
5886
0
  if (num_planes > 1 && xd->is_chroma_ref) {
5887
0
    const int uv_mode_cost =
5888
0
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5889
0
    intra_rd_stats.rate +=
5890
0
        intra_rd_stats_uv.rate +
5891
0
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5892
0
  }
5893
5894
  // Intra block is always coded as non-skip
5895
0
  intra_rd_stats.skip_txfm = 0;
5896
0
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5897
  // Add in the cost of the no skip flag.
5898
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5899
0
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5900
  // Calculate the final RD estimate for this mode.
5901
0
  const int64_t this_rd =
5902
0
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5903
  // Keep record of best intra rd
5904
0
  if (this_rd < search_state->best_intra_rd) {
5905
0
    search_state->best_intra_rd = this_rd;
5906
0
    intra_search_state->best_intra_mode = mode;
5907
0
  }
5908
5909
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5910
0
    search_state->best_pred_rd[i] =
5911
0
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5912
0
  }
5913
5914
0
  intra_rd_stats.rdcost = this_rd;
5915
5916
0
  adjust_rdcost(cpi, x, &intra_rd_stats, /*is_inter_pred=*/false);
5917
5918
  // Collect mode stats for multiwinner mode processing
5919
0
  const int txfm_search_done = 1;
5920
0
  store_winner_mode_stats(
5921
0
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5922
0
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5923
0
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5924
0
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5925
0
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5926
0
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5927
0
                        best_mode_enum, x, txfm_search_done);
5928
0
  }
5929
0
}
5930
5931
#if !CONFIG_REALTIME_ONLY
5932
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5933
// features in intra mode pruning.
5934
static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5935
                                                MACROBLOCK *x, BLOCK_SIZE bsize,
5936
                                                int mi_row, int mi_col,
5937
                                                int64_t *inter_cost,
5938
0
                                                int64_t *intra_cost) {
5939
0
  const AV1_COMMON *const cm = &cpi->common;
5940
  // Only consider full SB.
5941
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5942
0
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5943
0
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5944
0
                  (block_size_high[sb_size] / tpl_bsize_1d);
5945
0
  SuperBlockEnc *sb_enc = &x->sb_enc;
5946
0
  if (sb_enc->tpl_data_count == len) {
5947
0
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5948
0
    const int tpl_stride = sb_enc->tpl_stride;
5949
0
    const int tplw = mi_size_wide[tpl_bsize];
5950
0
    const int tplh = mi_size_high[tpl_bsize];
5951
0
    const int nw = mi_size_wide[bsize] / tplw;
5952
0
    const int nh = mi_size_high[bsize] / tplh;
5953
0
    if (nw >= 1 && nh >= 1) {
5954
0
      const int of_h = mi_row % mi_size_high[sb_size];
5955
0
      const int of_w = mi_col % mi_size_wide[sb_size];
5956
0
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5957
5958
0
      for (int k = 0; k < nh; k++) {
5959
0
        for (int l = 0; l < nw; l++) {
5960
0
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5961
0
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5962
0
        }
5963
0
      }
5964
0
      *inter_cost /= nw * nh;
5965
0
      *intra_cost /= nw * nh;
5966
0
    }
5967
0
  }
5968
0
}
5969
#endif  // !CONFIG_REALTIME_ONLY
5970
5971
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5972
// intra mode search.
5973
static inline void skip_intra_modes_in_interframe(
5974
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5975
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5976
0
    int64_t inter_cost, int64_t intra_cost) {
5977
0
  MACROBLOCKD *const xd = &x->e_mbd;
5978
0
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5979
0
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5980
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5981
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5982
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5983
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5984
0
        x->source_variance > 128) {
5985
0
      search_state->intra_search_state.skip_intra_modes = 1;
5986
0
      return;
5987
0
    }
5988
0
  }
5989
5990
0
  const unsigned int src_var_thresh_intra_skip = 1;
5991
0
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5992
0
  if (!(skip_intra_in_interframe &&
5993
0
        (x->source_variance > src_var_thresh_intra_skip)))
5994
0
    return;
5995
5996
  // Prune intra search based on best inter mode being transfrom skip.
5997
0
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5998
0
    const int qindex_thresh[2] = { 200, MAXQ };
5999
0
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
6000
0
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
6001
0
        (x->qindex <= qindex_thresh[ind])) {
6002
0
      search_state->intra_search_state.skip_intra_modes = 1;
6003
0
      return;
6004
0
    } else if ((skip_intra_in_interframe >= 4) &&
6005
0
               (inter_cost < 0 || intra_cost < 0)) {
6006
0
      search_state->intra_search_state.skip_intra_modes = 1;
6007
0
      return;
6008
0
    }
6009
0
  }
6010
  // Use ML model to prune intra search.
6011
0
  if (inter_cost >= 0 && intra_cost >= 0) {
6012
0
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
6013
0
                                     ? &av1_intrap_nn_config
6014
0
                                     : &av1_intrap_hd_nn_config;
6015
0
    float nn_features[6];
6016
0
    float scores[2] = { 0.0f };
6017
6018
0
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
6019
0
    nn_features[1] = (float)mi_size_wide_log2[bsize];
6020
0
    nn_features[2] = (float)mi_size_high_log2[bsize];
6021
0
    nn_features[3] = (float)intra_cost;
6022
0
    nn_features[4] = (float)inter_cost;
6023
0
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
6024
0
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
6025
0
    nn_features[5] = (float)(ac_q_max / ac_q);
6026
6027
0
    av1_nn_predict(nn_features, nn_config, 1, scores);
6028
6029
    // For two parameters, the max prob returned from av1_nn_softmax equals
6030
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
6031
    // calling of av1_nn_softmax.
6032
0
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
6033
0
    assert(skip_intra_in_interframe <= 5);
6034
0
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
6035
0
      search_state->intra_search_state.skip_intra_modes = 1;
6036
0
    }
6037
0
  }
6038
0
}
6039
6040
static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
6041
0
                                             int is_single_pred) {
6042
0
  const MODE encoding_mode = cpi->oxcf.mode;
6043
0
  if (encoding_mode == REALTIME) {
6044
0
    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
6045
0
            (cpi->sf.interp_sf.skip_interp_filter_search ||
6046
0
             cpi->sf.winner_mode_sf.winner_mode_ifs));
6047
0
  } else if (encoding_mode == GOOD) {
6048
    // Skip interpolation filter search for single prediction modes.
6049
0
    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
6050
0
  }
6051
0
  return false;
6052
0
}
6053
6054
static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
6055
0
                                     BLOCK_SIZE bsize) {
6056
0
  const AV1_COMMON *const cm = &cpi->common;
6057
0
  const SPEED_FEATURES *const sf = &cpi->sf;
6058
6059
0
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
6060
0
      !sf->rt_sf.short_circuit_low_temp_var ||
6061
0
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
6062
0
    return 0;
6063
0
  }
6064
6065
0
  const int mi_row = x->e_mbd.mi_row;
6066
0
  const int mi_col = x->e_mbd.mi_col;
6067
0
  int is_low_temp_var = 0;
6068
6069
0
  if (cm->seq_params->sb_size == BLOCK_64X64)
6070
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
6071
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6072
0
  else
6073
0
    is_low_temp_var = av1_get_force_skip_low_temp_var(
6074
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6075
6076
0
  return is_low_temp_var;
6077
0
}
6078
6079
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
6080
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
6081
                            struct macroblock *x, struct RD_STATS *rd_cost,
6082
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
6083
0
                            int64_t best_rd_so_far) {
6084
0
  AV1_COMMON *const cm = &cpi->common;
6085
0
  const FeatureFlags *const features = &cm->features;
6086
0
  const int num_planes = av1_num_planes(cm);
6087
0
  const SPEED_FEATURES *const sf = &cpi->sf;
6088
0
  MACROBLOCKD *const xd = &x->e_mbd;
6089
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6090
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
6091
0
  int i;
6092
0
  const ModeCosts *mode_costs = &x->mode_costs;
6093
0
  const int *comp_inter_cost =
6094
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6095
6096
0
  InterModeSearchState search_state;
6097
0
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
6098
0
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
6099
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
6100
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
6101
0
  };
6102
0
  HandleInterModeArgs args = { { NULL },
6103
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
6104
0
                               { NULL },
6105
0
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
6106
0
                                 MAX_SB_SIZE >> 1 },
6107
0
                               NULL,
6108
0
                               NULL,
6109
0
                               NULL,
6110
0
                               search_state.modelled_rd,
6111
0
                               INT_MAX,
6112
0
                               INT_MAX,
6113
0
                               search_state.simple_rd,
6114
0
                               0,
6115
0
                               false,
6116
0
                               interintra_modes,
6117
0
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
6118
0
                               { { 0, 0 } },
6119
0
                               { 0 },
6120
0
                               0,
6121
0
                               0,
6122
0
                               -1,
6123
0
                               -1,
6124
0
                               -1,
6125
0
                               { 0 },
6126
0
                               { 0 },
6127
0
                               UINT_MAX };
6128
  // Currently, is_low_temp_var is used in real time encoding.
6129
0
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
6130
6131
0
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
6132
  // Indicates the appropriate number of simple translation winner modes for
6133
  // exhaustive motion mode evaluation
6134
0
  const int max_winner_motion_mode_cand =
6135
0
      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
6136
0
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
6137
0
  motion_mode_candidate motion_mode_cand;
6138
0
  motion_mode_best_st_candidate best_motion_mode_cands;
6139
  // Initializing the number of motion mode candidates to zero.
6140
0
  best_motion_mode_cands.num_motion_mode_cand = 0;
6141
0
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
6142
0
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
6143
6144
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6145
6146
0
  av1_invalid_rd_stats(rd_cost);
6147
6148
0
  for (i = 0; i < REF_FRAMES; ++i) {
6149
0
    x->warp_sample_info[i].num = -1;
6150
0
  }
6151
6152
  // Ref frames that are selected by square partition blocks.
6153
0
  int picked_ref_frames_mask = 0;
6154
0
  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
6155
0
      mbmi->partition != PARTITION_NONE) {
6156
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
6157
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
6158
    // implies prune for vert, horiz and extended partition blocks.
6159
0
    if ((mbmi->partition != PARTITION_VERT &&
6160
0
         mbmi->partition != PARTITION_HORZ) ||
6161
0
        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
6162
0
      picked_ref_frames_mask =
6163
0
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
6164
0
    }
6165
0
  }
6166
6167
#if CONFIG_COLLECT_COMPONENT_TIMING
6168
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
6169
#endif
6170
  // Skip ref frames that never selected by square blocks.
6171
0
  const int skip_ref_frame_mask =
6172
0
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
6173
0
  mode_skip_mask_t mode_skip_mask;
6174
0
  unsigned int ref_costs_single[REF_FRAMES];
6175
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6176
0
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
6177
  // init params, set frame modes, speed features
6178
0
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
6179
0
                                skip_ref_frame_mask, ref_costs_single,
6180
0
                                ref_costs_comp, yv12_mb);
6181
#if CONFIG_COLLECT_COMPONENT_TIMING
6182
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
6183
#endif
6184
6185
0
  int64_t best_est_rd = INT64_MAX;
6186
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
6187
  // If do_tx_search is 0, only estimated RD should be computed.
6188
  // If do_tx_search is 1, all modes have TX search performed.
6189
0
  const int do_tx_search =
6190
0
      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
6191
0
        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
6192
0
         num_pels_log2_lookup[bsize] > 8));
6193
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
6194
0
  inter_modes_info->num = 0;
6195
6196
  // Temporary buffers used by handle_inter_mode().
6197
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
6198
6199
  // The best RD found for the reference frame, among single reference modes.
6200
  // Note that the 0-th element will contain a cut-off that is later used
6201
  // to determine if we should skip a compound mode.
6202
0
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
6203
0
                                       INT64_MAX, INT64_MAX, INT64_MAX,
6204
0
                                       INT64_MAX, INT64_MAX };
6205
6206
  // Prepared stats used later to check if we could skip intra mode eval.
6207
0
  int64_t inter_cost = -1;
6208
0
  int64_t intra_cost = -1;
6209
  // Need to tweak the threshold for hdres speed 0 & 1.
6210
0
  const int mi_row = xd->mi_row;
6211
0
  const int mi_col = xd->mi_col;
6212
6213
  // Obtain the relevant tpl stats for pruning inter modes
6214
0
  PruneInfoFromTpl inter_cost_info_from_tpl;
6215
0
#if !CONFIG_REALTIME_ONLY
6216
0
  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
6217
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
6218
    // prune_ref_by_selective_ref_frame()
6219
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
6220
    // prune_ref_by_selective_ref_frame()
6221
    // Populating valid_refs[idx] = 1 ensures that
6222
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
6223
    // pruned ref frame.
6224
0
    int valid_refs[INTER_REFS_PER_FRAME];
6225
0
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
6226
0
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
6227
0
      valid_refs[frame - 1] =
6228
0
          x->tpl_keep_ref_frame[frame] ||
6229
0
          !prune_ref_by_selective_ref_frame(
6230
0
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
6231
0
    }
6232
0
    av1_zero(inter_cost_info_from_tpl);
6233
0
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
6234
0
                              &inter_cost_info_from_tpl);
6235
0
  }
6236
6237
0
  const int do_pruning =
6238
0
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
6239
0
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
6240
0
      cpi->oxcf.algo_cfg.enable_tpl_model)
6241
0
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
6242
0
                                 &intra_cost);
6243
0
#endif  // !CONFIG_REALTIME_ONLY
6244
6245
  // Initialize best mode stats for winner mode processing.
6246
0
  const int max_winner_mode_count =
6247
0
      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
6248
0
  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
6249
0
  x->winner_mode_count = 0;
6250
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
6251
0
                          NULL, bsize, best_rd_so_far,
6252
0
                          sf->winner_mode_sf.multi_winner_mode_type, 0);
6253
6254
0
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
6255
0
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
6256
    // Higher multiplication factor values for lower quantizers.
6257
0
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
6258
0
  }
6259
6260
  // Initialize arguments for mode loop speed features
6261
0
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
6262
0
                              &mode_skip_mask,
6263
0
                              &search_state,
6264
0
                              skip_ref_frame_mask,
6265
0
                              0,
6266
0
                              mode_thresh_mul_fact,
6267
0
                              0,
6268
0
                              0 };
6269
0
  int64_t best_inter_yrd = INT64_MAX;
6270
6271
  // This is the main loop of this function. It loops over all possible inter
6272
  // modes and calls handle_inter_mode() to compute the RD for each.
6273
  // Here midx is just an iterator index that should not be used by itself
6274
  // except to keep track of the number of modes searched. It should be used
6275
  // with av1_default_mode_order to get the enum that defines the mode, which
6276
  // can be used with av1_mode_defs to get the prediction mode and the ref
6277
  // frames.
6278
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
6279
  // good speedup for real time case. If we decide to use compound mode in real
6280
  // time, maybe we can modify av1_default_mode_order table.
6281
0
  THR_MODES mode_start = THR_INTER_MODE_START;
6282
0
  THR_MODES mode_end = THR_INTER_MODE_END;
6283
0
  const CurrentFrame *const current_frame = &cm->current_frame;
6284
0
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
6285
0
    mode_start = SINGLE_REF_MODE_START;
6286
0
    mode_end = SINGLE_REF_MODE_END;
6287
0
  }
6288
0
  init_comp_avg_est_rd(x,
6289
0
                       sf->inter_sf.skip_comp_eval_using_top_comp_avg_est_rd);
6290
0
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
6291
    // Get the actual prediction mode we are trying in this iteration
6292
0
    const THR_MODES mode_enum = av1_default_mode_order[midx];
6293
0
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
6294
0
    const PREDICTION_MODE this_mode = mode_def->mode;
6295
0
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
6296
6297
0
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
6298
0
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
6299
0
    const int is_single_pred =
6300
0
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
6301
0
    const int comp_pred = second_ref_frame > INTRA_FRAME;
6302
6303
0
    txfm_info->skip_txfm = 0;
6304
0
    sf_args.num_single_modes_processed += is_single_pred;
6305
#if CONFIG_COLLECT_COMPONENT_TIMING
6306
    start_timing(cpi, skip_inter_mode_time);
6307
#endif
6308
    // Apply speed features to decide if this inter mode can be skipped
6309
0
    const int is_skip_inter_mode = skip_inter_mode(
6310
0
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
6311
#if CONFIG_COLLECT_COMPONENT_TIMING
6312
    end_timing(cpi, skip_inter_mode_time);
6313
#endif
6314
0
    if (is_skip_inter_mode) continue;
6315
6316
0
    init_mbmi(mbmi, this_mode, ref_frames, cm);
6317
0
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
6318
6319
    // Select prediction reference frames.
6320
0
    for (i = 0; i < num_planes; i++) {
6321
0
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
6322
0
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
6323
0
    }
6324
6325
0
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
6326
0
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
6327
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
6328
0
    mbmi->ref_mv_idx = 0;
6329
6330
0
    const int64_t ref_best_rd = search_state.best_rd;
6331
0
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
6332
0
    av1_init_rd_stats(&rd_stats);
6333
6334
0
    const int ref_frame_cost = comp_pred
6335
0
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
6336
0
                                   : ref_costs_single[ref_frame];
6337
0
    const int compmode_cost =
6338
0
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6339
0
    const int real_compmode_cost =
6340
0
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6341
0
            ? compmode_cost
6342
0
            : 0;
6343
    // Point to variables that are maintained between loop iterations
6344
0
    args.single_newmv = search_state.single_newmv;
6345
0
    args.single_newmv_rate = search_state.single_newmv_rate;
6346
0
    args.single_newmv_valid = search_state.single_newmv_valid;
6347
0
    args.single_comp_cost = real_compmode_cost;
6348
0
    args.ref_frame_cost = ref_frame_cost;
6349
0
    args.best_pred_sse = search_state.best_pred_sse;
6350
0
    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6351
0
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6352
0
                           search_state.best_skip_rd[1] };
6353
0
    int64_t this_yrd = INT64_MAX;
6354
#if CONFIG_COLLECT_COMPONENT_TIMING
6355
    start_timing(cpi, handle_inter_mode_time);
6356
#endif
6357
0
    int64_t this_rd = handle_inter_mode(
6358
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6359
0
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6360
0
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6361
0
        &this_yrd);
6362
#if CONFIG_COLLECT_COMPONENT_TIMING
6363
    end_timing(cpi, handle_inter_mode_time);
6364
#endif
6365
0
    if (current_frame->reference_mode != SINGLE_REFERENCE) {
6366
0
      if (!args.skip_ifs &&
6367
0
          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6368
0
          is_inter_singleref_mode(this_mode)) {
6369
0
        collect_single_states(x, &search_state, mbmi);
6370
0
      }
6371
6372
0
      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6373
0
          is_inter_singleref_mode(this_mode))
6374
0
        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6375
0
    }
6376
6377
0
    if (this_rd == INT64_MAX) continue;
6378
6379
0
    if (mbmi->skip_txfm) {
6380
0
      rd_stats_y.rate = 0;
6381
0
      rd_stats_uv.rate = 0;
6382
0
    }
6383
6384
0
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6385
0
        this_rd < ref_frame_rd[ref_frame]) {
6386
0
      ref_frame_rd[ref_frame] = this_rd;
6387
0
    }
6388
6389
0
    adjust_cost(cpi, x, &this_rd, /*is_inter_pred=*/true);
6390
0
    adjust_rdcost(cpi, x, &rd_stats, /*is_inter_pred=*/true);
6391
6392
    // Did this mode help, i.e., is it the new best mode
6393
0
    if (this_rd < search_state.best_rd) {
6394
0
      assert(IMPLIES(comp_pred,
6395
0
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
6396
0
      search_state.best_pred_sse = x->pred_sse[ref_frame];
6397
0
      best_inter_yrd = this_yrd;
6398
0
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6399
0
                          &rd_stats_uv, mode_enum, x, do_tx_search);
6400
0
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6401
      // skip_rd[0] is the best total rd for a skip mode so far.
6402
      // skip_rd[1] is the best total rd for a skip mode so far in luma.
6403
      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6404
      // When do_tx_search = 0, skip_rd[1] is updated.
6405
0
      search_state.best_skip_rd[1] = skip_rd[1];
6406
0
    }
6407
0
    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6408
      // Add this mode to motion mode candidate list for motion mode search
6409
      // if using motion_mode_for_winner_cand speed feature
6410
0
      handle_winner_cand(mbmi, &best_motion_mode_cands,
6411
0
                         max_winner_motion_mode_cand, this_rd,
6412
0
                         &motion_mode_cand, args.skip_motion_mode);
6413
0
    }
6414
6415
    /* keep record of best compound/single-only prediction */
6416
0
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6417
0
                         x->rdmult, &search_state, compmode_cost);
6418
0
  }
6419
6420
#if CONFIG_COLLECT_COMPONENT_TIMING
6421
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6422
#endif
6423
0
  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6424
    // For the single ref winner candidates, evaluate other motion modes (non
6425
    // simple translation).
6426
0
    evaluate_motion_mode_for_winner_candidates(
6427
0
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6428
0
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6429
0
        &search_state, &best_inter_yrd);
6430
0
  }
6431
#if CONFIG_COLLECT_COMPONENT_TIMING
6432
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6433
#endif
6434
6435
#if CONFIG_COLLECT_COMPONENT_TIMING
6436
  start_timing(cpi, do_tx_search_time);
6437
#endif
6438
0
  if (do_tx_search != 1) {
6439
    // A full tx search has not yet been done, do tx search for
6440
    // top mode candidates
6441
0
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6442
0
                                    yv12_mb, mi_row, mi_col, &search_state,
6443
0
                                    rd_cost, ctx, &best_inter_yrd);
6444
0
  }
6445
#if CONFIG_COLLECT_COMPONENT_TIMING
6446
  end_timing(cpi, do_tx_search_time);
6447
#endif
6448
6449
#if CONFIG_COLLECT_COMPONENT_TIMING
6450
  start_timing(cpi, handle_intra_mode_time);
6451
#endif
6452
  // Gate intra mode evaluation if best of inter is skip except when source
6453
  // variance is extremely low and also based on max intra bsize.
6454
0
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6455
0
                                 intra_cost);
6456
6457
0
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6458
0
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6459
0
                                   &sf_args, intra_ref_frame_cost,
6460
0
                                   best_inter_yrd);
6461
#if CONFIG_COLLECT_COMPONENT_TIMING
6462
  end_timing(cpi, handle_intra_mode_time);
6463
#endif
6464
6465
#if CONFIG_COLLECT_COMPONENT_TIMING
6466
  start_timing(cpi, refine_winner_mode_tx_time);
6467
#endif
6468
0
  int winner_mode_count =
6469
0
      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6470
  // In effect only when fast tx search speed features are enabled.
6471
0
  refine_winner_mode_tx(
6472
0
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6473
0
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6474
0
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6475
#if CONFIG_COLLECT_COMPONENT_TIMING
6476
  end_timing(cpi, refine_winner_mode_tx_time);
6477
#endif
6478
6479
  // Initialize default mode evaluation params
6480
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6481
6482
  // Only try palette mode when the best mode so far is an intra mode.
6483
0
  const int try_palette =
6484
0
      cpi->oxcf.tool_cfg.enable_palette &&
6485
0
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6486
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6487
0
  RD_STATS this_rd_cost;
6488
0
  int this_skippable = 0;
6489
0
  if (try_palette) {
6490
#if CONFIG_COLLECT_COMPONENT_TIMING
6491
    start_timing(cpi, av1_search_palette_mode_time);
6492
#endif
6493
0
    this_skippable = av1_search_palette_mode(
6494
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6495
0
        ctx, &this_rd_cost, search_state.best_rd);
6496
#if CONFIG_COLLECT_COMPONENT_TIMING
6497
    end_timing(cpi, av1_search_palette_mode_time);
6498
#endif
6499
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6500
0
      search_state.best_mode_index = THR_DC;
6501
0
      mbmi->mv[0].as_int = 0;
6502
0
      rd_cost->rate = this_rd_cost.rate;
6503
0
      rd_cost->dist = this_rd_cost.dist;
6504
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6505
0
      search_state.best_rd = rd_cost->rdcost;
6506
0
      search_state.best_mbmode = *mbmi;
6507
0
      search_state.best_skip2 = 0;
6508
0
      search_state.best_mode_skippable = this_skippable;
6509
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6510
0
    }
6511
0
  }
6512
6513
0
  search_state.best_mbmode.skip_mode = 0;
6514
0
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6515
0
      cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) {
6516
0
    const struct segmentation *const seg = &cm->seg;
6517
0
    unsigned char segment_id = mbmi->segment_id;
6518
0
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6519
0
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6520
0
    }
6521
0
  }
6522
6523
  // Make sure that the ref_mv_idx is only nonzero when we're
6524
  // using a mode which can support ref_mv_idx
6525
0
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6526
0
      !(search_state.best_mbmode.mode == NEWMV ||
6527
0
        search_state.best_mbmode.mode == NEW_NEWMV ||
6528
0
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6529
0
    search_state.best_mbmode.ref_mv_idx = 0;
6530
0
  }
6531
6532
0
  if (search_state.best_mode_index == THR_INVALID ||
6533
0
      search_state.best_rd >= best_rd_so_far) {
6534
0
    rd_cost->rate = INT_MAX;
6535
0
    rd_cost->rdcost = INT64_MAX;
6536
0
    return;
6537
0
  }
6538
6539
0
  const InterpFilter interp_filter = features->interp_filter;
6540
0
  assert((interp_filter == SWITCHABLE) ||
6541
0
         (interp_filter ==
6542
0
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6543
0
         !is_inter_block(&search_state.best_mbmode));
6544
0
  assert((interp_filter == SWITCHABLE) ||
6545
0
         (interp_filter ==
6546
0
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6547
0
         !is_inter_block(&search_state.best_mbmode));
6548
6549
0
  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6550
0
    av1_update_rd_thresh_fact(
6551
0
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6552
0
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6553
0
  }
6554
6555
  // macroblock modes
6556
0
  *mbmi = search_state.best_mbmode;
6557
0
  txfm_info->skip_txfm |= search_state.best_skip2;
6558
6559
  // Note: this section is needed since the mode may have been forced to
6560
  // GLOBALMV by the all-zero mode handling of ref-mv.
6561
0
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6562
    // Correct the interp filters for GLOBALMV
6563
0
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6564
0
      int_interpfilters filters =
6565
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6566
0
      assert(mbmi->interp_filters.as_int == filters.as_int);
6567
0
      (void)filters;
6568
0
    }
6569
0
  }
6570
6571
0
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6572
6573
0
  assert(search_state.best_mode_index != THR_INVALID);
6574
6575
#if CONFIG_INTERNAL_STATS
6576
  store_coding_context(x, ctx, search_state.best_mode_index,
6577
                       search_state.best_mode_skippable);
6578
#else
6579
0
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6580
0
#endif  // CONFIG_INTERNAL_STATS
6581
6582
0
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6583
0
    assert(try_palette);
6584
0
    av1_restore_uv_color_map(cpi, x);
6585
0
  }
6586
0
}
6587
6588
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6589
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6590
                                        int mi_row, int mi_col,
6591
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6592
                                        PICK_MODE_CONTEXT *ctx,
6593
0
                                        int64_t best_rd_so_far) {
6594
0
  const AV1_COMMON *const cm = &cpi->common;
6595
0
  const FeatureFlags *const features = &cm->features;
6596
0
  MACROBLOCKD *const xd = &x->e_mbd;
6597
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6598
0
  unsigned char segment_id = mbmi->segment_id;
6599
0
  const int comp_pred = 0;
6600
0
  int i;
6601
0
  unsigned int ref_costs_single[REF_FRAMES];
6602
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6603
0
  const ModeCosts *mode_costs = &x->mode_costs;
6604
0
  const int *comp_inter_cost =
6605
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6606
0
  InterpFilter best_filter = SWITCHABLE;
6607
0
  int64_t this_rd = INT64_MAX;
6608
0
  int rate2 = 0;
6609
0
  const int64_t distortion2 = 0;
6610
0
  (void)mi_row;
6611
0
  (void)mi_col;
6612
0
  (void)tile_data;
6613
6614
0
  av1_collect_neighbors_ref_counts(xd);
6615
6616
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6617
0
                           ref_costs_comp);
6618
6619
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6620
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6621
6622
0
  rd_cost->rate = INT_MAX;
6623
6624
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6625
6626
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6627
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6628
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6629
0
  mbmi->mode = GLOBALMV;
6630
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6631
0
  mbmi->uv_mode = UV_DC_PRED;
6632
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6633
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6634
0
  else
6635
0
    mbmi->ref_frame[0] = LAST_FRAME;
6636
0
  mbmi->ref_frame[1] = NONE_FRAME;
6637
0
  mbmi->mv[0].as_int =
6638
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6639
0
                           features->allow_high_precision_mv, bsize, mi_col,
6640
0
                           mi_row, features->cur_frame_force_integer_mv)
6641
0
          .as_int;
6642
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6643
0
  x->txfm_search_info.skip_txfm = 1;
6644
6645
0
  mbmi->ref_mv_idx = 0;
6646
6647
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6648
0
  av1_count_overlappable_neighbors(cm, xd);
6649
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6650
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6651
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6652
    // Select the samples according to motion vector difference
6653
0
    if (mbmi->num_proj_ref > 1) {
6654
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6655
0
                                             mbmi->num_proj_ref, bsize);
6656
0
    }
6657
0
  }
6658
6659
0
  const InterpFilter interp_filter = features->interp_filter;
6660
0
  set_default_interp_filters(mbmi, interp_filter);
6661
6662
0
  if (interp_filter != SWITCHABLE) {
6663
0
    best_filter = interp_filter;
6664
0
  } else {
6665
0
    best_filter = EIGHTTAP_REGULAR;
6666
0
    if (av1_is_interp_needed(xd)) {
6667
0
      int rs;
6668
0
      int best_rs = INT_MAX;
6669
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6670
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6671
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6672
0
                                     cm->seq_params->enable_dual_filter);
6673
0
        if (rs < best_rs) {
6674
0
          best_rs = rs;
6675
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6676
0
        }
6677
0
      }
6678
0
    }
6679
0
  }
6680
  // Set the appropriate filter
6681
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6682
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6683
0
                                   cm->seq_params->enable_dual_filter);
6684
6685
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6686
0
    rate2 += comp_inter_cost[comp_pred];
6687
6688
  // Estimate the reference frame signaling cost and add it
6689
  // to the rolling cost variable.
6690
0
  rate2 += ref_costs_single[LAST_FRAME];
6691
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6692
6693
0
  rd_cost->rate = rate2;
6694
0
  rd_cost->dist = distortion2;
6695
0
  rd_cost->rdcost = this_rd;
6696
6697
0
  if (this_rd >= best_rd_so_far) {
6698
0
    rd_cost->rate = INT_MAX;
6699
0
    rd_cost->rdcost = INT64_MAX;
6700
0
    return;
6701
0
  }
6702
6703
0
  assert((interp_filter == SWITCHABLE) ||
6704
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6705
6706
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6707
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6708
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6709
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6710
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6711
0
  }
6712
6713
#if CONFIG_INTERNAL_STATS
6714
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6715
#else
6716
0
  store_coding_context(x, ctx, 0);
6717
0
#endif  // CONFIG_INTERNAL_STATS
6718
0
}
6719
6720
/*!\cond */
6721
struct calc_target_weighted_pred_ctxt {
6722
  const OBMCBuffer *obmc_buffer;
6723
  const uint8_t *tmp;
6724
  int tmp_stride;
6725
  int overlap;
6726
};
6727
/*!\endcond */
6728
6729
static inline void calc_target_weighted_pred_above(
6730
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6731
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6732
0
  (void)nb_mi;
6733
0
  (void)num_planes;
6734
0
  (void)rel_mi_row;
6735
0
  (void)dir;
6736
6737
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6738
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6739
6740
0
  const int bw = xd->width << MI_SIZE_LOG2;
6741
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6742
6743
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6744
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6745
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6746
0
  const int is_hbd = is_cur_buf_hbd(xd);
6747
6748
0
  if (!is_hbd) {
6749
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6750
0
      const uint8_t m0 = mask1d[row];
6751
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6752
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6753
0
        wsrc[col] = m1 * tmp[col];
6754
0
        mask[col] = m0;
6755
0
      }
6756
0
      wsrc += bw;
6757
0
      mask += bw;
6758
0
      tmp += ctxt->tmp_stride;
6759
0
    }
6760
0
  } else {
6761
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6762
6763
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6764
0
      const uint8_t m0 = mask1d[row];
6765
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6766
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6767
0
        wsrc[col] = m1 * tmp16[col];
6768
0
        mask[col] = m0;
6769
0
      }
6770
0
      wsrc += bw;
6771
0
      mask += bw;
6772
0
      tmp16 += ctxt->tmp_stride;
6773
0
    }
6774
0
  }
6775
0
}
6776
6777
static inline void calc_target_weighted_pred_left(
6778
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6779
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6780
0
  (void)nb_mi;
6781
0
  (void)num_planes;
6782
0
  (void)rel_mi_col;
6783
0
  (void)dir;
6784
6785
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6786
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6787
6788
0
  const int bw = xd->width << MI_SIZE_LOG2;
6789
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6790
6791
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6792
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6793
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6794
0
  const int is_hbd = is_cur_buf_hbd(xd);
6795
6796
0
  if (!is_hbd) {
6797
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6798
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6799
0
        const uint8_t m0 = mask1d[col];
6800
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6801
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6802
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6803
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6804
0
      }
6805
0
      wsrc += bw;
6806
0
      mask += bw;
6807
0
      tmp += ctxt->tmp_stride;
6808
0
    }
6809
0
  } else {
6810
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6811
6812
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6813
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6814
0
        const uint8_t m0 = mask1d[col];
6815
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6816
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6817
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6818
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6819
0
      }
6820
0
      wsrc += bw;
6821
0
      mask += bw;
6822
0
      tmp16 += ctxt->tmp_stride;
6823
0
    }
6824
0
  }
6825
0
}
6826
6827
// This function has a structure similar to av1_build_obmc_inter_prediction
6828
//
6829
// The OBMC predictor is computed as:
6830
//
6831
//  PObmc(x,y) =
6832
//    AOM_BLEND_A64(Mh(x),
6833
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6834
//                  PLeft(x, y))
6835
//
6836
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6837
// rounding, this can be written as:
6838
//
6839
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6840
//    Mh(x) * Mv(y) * P(x,y) +
6841
//      Mh(x) * Cv(y) * Pabove(x,y) +
6842
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6843
//
6844
// Where :
6845
//
6846
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6847
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6848
//
6849
// This function computes 'wsrc' and 'mask' as:
6850
//
6851
//  wsrc(x, y) =
6852
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6853
//      Mh(x) * Cv(y) * Pabove(x,y) +
6854
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6855
//
6856
//  mask(x, y) = Mh(x) * Mv(y)
6857
//
6858
// These can then be used to efficiently approximate the error for any
6859
// predictor P in the context of the provided neighbouring predictors by
6860
// computing:
6861
//
6862
//  error(x, y) =
6863
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6864
//
6865
static inline void calc_target_weighted_pred(
6866
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6867
    const uint8_t *above, int above_stride, const uint8_t *left,
6868
0
    int left_stride) {
6869
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6870
0
  const int bw = xd->width << MI_SIZE_LOG2;
6871
0
  const int bh = xd->height << MI_SIZE_LOG2;
6872
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6873
0
  int32_t *mask_buf = obmc_buffer->mask;
6874
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6875
6876
0
  const int is_hbd = is_cur_buf_hbd(xd);
6877
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6878
6879
  // plane 0 should not be sub-sampled
6880
0
  assert(xd->plane[0].subsampling_x == 0);
6881
0
  assert(xd->plane[0].subsampling_y == 0);
6882
6883
0
  av1_zero_array(wsrc_buf, bw * bh);
6884
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6885
6886
  // handle above row
6887
0
  if (xd->up_available) {
6888
0
    const int overlap =
6889
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6890
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6891
0
                                                   above_stride, overlap };
6892
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6893
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6894
0
                                  calc_target_weighted_pred_above, &ctxt);
6895
0
  }
6896
6897
0
  for (int i = 0; i < bw * bh; ++i) {
6898
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6899
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6900
0
  }
6901
6902
  // handle left column
6903
0
  if (xd->left_available) {
6904
0
    const int overlap =
6905
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6906
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6907
0
                                                   left_stride, overlap };
6908
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6909
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6910
0
                                 calc_target_weighted_pred_left, &ctxt);
6911
0
  }
6912
6913
0
  if (!is_hbd) {
6914
0
    const uint8_t *src = x->plane[0].src.buf;
6915
6916
0
    for (int row = 0; row < bh; ++row) {
6917
0
      for (int col = 0; col < bw; ++col) {
6918
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6919
0
      }
6920
0
      wsrc_buf += bw;
6921
0
      src += x->plane[0].src.stride;
6922
0
    }
6923
0
  } else {
6924
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6925
6926
0
    for (int row = 0; row < bh; ++row) {
6927
0
      for (int col = 0; col < bw; ++col) {
6928
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6929
0
      }
6930
0
      wsrc_buf += bw;
6931
0
      src += x->plane[0].src.stride;
6932
0
    }
6933
0
  }
6934
0
}