Coverage Report

Created: 2026-04-01 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/encoder/rdopt.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdint.h>
16
#include <string.h>
17
18
#include "config/aom_config.h"
19
#include "config/aom_dsp_rtcd.h"
20
#include "config/av1_rtcd.h"
21
22
#include "aom_dsp/aom_dsp_common.h"
23
#include "aom_dsp/blend.h"
24
#include "aom_mem/aom_mem.h"
25
#include "aom_ports/aom_timer.h"
26
#include "aom_ports/mem.h"
27
28
#include "av1/common/av1_common_int.h"
29
#include "av1/common/cfl.h"
30
#include "av1/common/blockd.h"
31
#include "av1/common/common.h"
32
#include "av1/common/common_data.h"
33
#include "av1/common/entropy.h"
34
#include "av1/common/entropymode.h"
35
#include "av1/common/enums.h"
36
#include "av1/common/idct.h"
37
#include "av1/common/mvref_common.h"
38
#include "av1/common/obmc.h"
39
#include "av1/common/pred_common.h"
40
#include "av1/common/quant_common.h"
41
#include "av1/common/reconinter.h"
42
#include "av1/common/reconintra.h"
43
#include "av1/common/scan.h"
44
#include "av1/common/seg_common.h"
45
#include "av1/common/txb_common.h"
46
#include "av1/common/warped_motion.h"
47
48
#include "av1/encoder/aq_variance.h"
49
#include "av1/encoder/av1_quantize.h"
50
#include "av1/encoder/block.h"
51
#include "av1/encoder/cost.h"
52
#include "av1/encoder/compound_type.h"
53
#include "av1/encoder/encodemb.h"
54
#include "av1/encoder/encodemv.h"
55
#include "av1/encoder/encoder.h"
56
#include "av1/encoder/encodetxb.h"
57
#include "av1/encoder/hybrid_fwd_txfm.h"
58
#include "av1/encoder/interp_search.h"
59
#include "av1/encoder/intra_mode_search.h"
60
#include "av1/encoder/intra_mode_search_utils.h"
61
#include "av1/encoder/mcomp.h"
62
#include "av1/encoder/ml.h"
63
#include "av1/encoder/mode_prune_model_weights.h"
64
#include "av1/encoder/model_rd.h"
65
#include "av1/encoder/motion_search_facade.h"
66
#include "av1/encoder/palette.h"
67
#include "av1/encoder/pustats.h"
68
#include "av1/encoder/random.h"
69
#include "av1/encoder/ratectrl.h"
70
#include "av1/encoder/rd.h"
71
#include "av1/encoder/rdopt.h"
72
#include "av1/encoder/reconinter_enc.h"
73
#include "av1/encoder/tokenize.h"
74
#include "av1/encoder/tpl_model.h"
75
#include "av1/encoder/tx_search.h"
76
#include "av1/encoder/var_based_part.h"
77
78
0
#define LAST_NEW_MV_INDEX 6
79
80
// Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
81
// The values are kept in Q12 format and equation used to derive is
82
// (2.5 - ((float)x->qindex / MAXQ) * 1.5)
83
0
#define MODE_THRESH_QBITS 12
84
static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
85
  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
86
  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
87
  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
88
  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
89
  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
90
  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
91
  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
92
  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
93
  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
94
  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
95
  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
96
  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
97
  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
98
  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
99
  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
100
  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
101
  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
102
  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
103
  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
104
  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
105
  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
106
  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
107
  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
108
  4144,  4120,  4096
109
};
110
111
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
112
  THR_NEARESTMV,
113
  THR_NEARESTL2,
114
  THR_NEARESTL3,
115
  THR_NEARESTB,
116
  THR_NEARESTA2,
117
  THR_NEARESTA,
118
  THR_NEARESTG,
119
120
  THR_NEWMV,
121
  THR_NEWL2,
122
  THR_NEWL3,
123
  THR_NEWB,
124
  THR_NEWA2,
125
  THR_NEWA,
126
  THR_NEWG,
127
128
  THR_NEARMV,
129
  THR_NEARL2,
130
  THR_NEARL3,
131
  THR_NEARB,
132
  THR_NEARA2,
133
  THR_NEARA,
134
  THR_NEARG,
135
136
  THR_GLOBALMV,
137
  THR_GLOBALL2,
138
  THR_GLOBALL3,
139
  THR_GLOBALB,
140
  THR_GLOBALA2,
141
  THR_GLOBALA,
142
  THR_GLOBALG,
143
144
  THR_COMP_NEAREST_NEARESTLA,
145
  THR_COMP_NEAREST_NEARESTL2A,
146
  THR_COMP_NEAREST_NEARESTL3A,
147
  THR_COMP_NEAREST_NEARESTGA,
148
  THR_COMP_NEAREST_NEARESTLB,
149
  THR_COMP_NEAREST_NEARESTL2B,
150
  THR_COMP_NEAREST_NEARESTL3B,
151
  THR_COMP_NEAREST_NEARESTGB,
152
  THR_COMP_NEAREST_NEARESTLA2,
153
  THR_COMP_NEAREST_NEARESTL2A2,
154
  THR_COMP_NEAREST_NEARESTL3A2,
155
  THR_COMP_NEAREST_NEARESTGA2,
156
  THR_COMP_NEAREST_NEARESTLL2,
157
  THR_COMP_NEAREST_NEARESTLL3,
158
  THR_COMP_NEAREST_NEARESTLG,
159
  THR_COMP_NEAREST_NEARESTBA,
160
161
  THR_COMP_NEAR_NEARLB,
162
  THR_COMP_NEW_NEWLB,
163
  THR_COMP_NEW_NEARESTLB,
164
  THR_COMP_NEAREST_NEWLB,
165
  THR_COMP_NEW_NEARLB,
166
  THR_COMP_NEAR_NEWLB,
167
  THR_COMP_GLOBAL_GLOBALLB,
168
169
  THR_COMP_NEAR_NEARLA,
170
  THR_COMP_NEW_NEWLA,
171
  THR_COMP_NEW_NEARESTLA,
172
  THR_COMP_NEAREST_NEWLA,
173
  THR_COMP_NEW_NEARLA,
174
  THR_COMP_NEAR_NEWLA,
175
  THR_COMP_GLOBAL_GLOBALLA,
176
177
  THR_COMP_NEAR_NEARL2A,
178
  THR_COMP_NEW_NEWL2A,
179
  THR_COMP_NEW_NEARESTL2A,
180
  THR_COMP_NEAREST_NEWL2A,
181
  THR_COMP_NEW_NEARL2A,
182
  THR_COMP_NEAR_NEWL2A,
183
  THR_COMP_GLOBAL_GLOBALL2A,
184
185
  THR_COMP_NEAR_NEARL3A,
186
  THR_COMP_NEW_NEWL3A,
187
  THR_COMP_NEW_NEARESTL3A,
188
  THR_COMP_NEAREST_NEWL3A,
189
  THR_COMP_NEW_NEARL3A,
190
  THR_COMP_NEAR_NEWL3A,
191
  THR_COMP_GLOBAL_GLOBALL3A,
192
193
  THR_COMP_NEAR_NEARGA,
194
  THR_COMP_NEW_NEWGA,
195
  THR_COMP_NEW_NEARESTGA,
196
  THR_COMP_NEAREST_NEWGA,
197
  THR_COMP_NEW_NEARGA,
198
  THR_COMP_NEAR_NEWGA,
199
  THR_COMP_GLOBAL_GLOBALGA,
200
201
  THR_COMP_NEAR_NEARL2B,
202
  THR_COMP_NEW_NEWL2B,
203
  THR_COMP_NEW_NEARESTL2B,
204
  THR_COMP_NEAREST_NEWL2B,
205
  THR_COMP_NEW_NEARL2B,
206
  THR_COMP_NEAR_NEWL2B,
207
  THR_COMP_GLOBAL_GLOBALL2B,
208
209
  THR_COMP_NEAR_NEARL3B,
210
  THR_COMP_NEW_NEWL3B,
211
  THR_COMP_NEW_NEARESTL3B,
212
  THR_COMP_NEAREST_NEWL3B,
213
  THR_COMP_NEW_NEARL3B,
214
  THR_COMP_NEAR_NEWL3B,
215
  THR_COMP_GLOBAL_GLOBALL3B,
216
217
  THR_COMP_NEAR_NEARGB,
218
  THR_COMP_NEW_NEWGB,
219
  THR_COMP_NEW_NEARESTGB,
220
  THR_COMP_NEAREST_NEWGB,
221
  THR_COMP_NEW_NEARGB,
222
  THR_COMP_NEAR_NEWGB,
223
  THR_COMP_GLOBAL_GLOBALGB,
224
225
  THR_COMP_NEAR_NEARLA2,
226
  THR_COMP_NEW_NEWLA2,
227
  THR_COMP_NEW_NEARESTLA2,
228
  THR_COMP_NEAREST_NEWLA2,
229
  THR_COMP_NEW_NEARLA2,
230
  THR_COMP_NEAR_NEWLA2,
231
  THR_COMP_GLOBAL_GLOBALLA2,
232
233
  THR_COMP_NEAR_NEARL2A2,
234
  THR_COMP_NEW_NEWL2A2,
235
  THR_COMP_NEW_NEARESTL2A2,
236
  THR_COMP_NEAREST_NEWL2A2,
237
  THR_COMP_NEW_NEARL2A2,
238
  THR_COMP_NEAR_NEWL2A2,
239
  THR_COMP_GLOBAL_GLOBALL2A2,
240
241
  THR_COMP_NEAR_NEARL3A2,
242
  THR_COMP_NEW_NEWL3A2,
243
  THR_COMP_NEW_NEARESTL3A2,
244
  THR_COMP_NEAREST_NEWL3A2,
245
  THR_COMP_NEW_NEARL3A2,
246
  THR_COMP_NEAR_NEWL3A2,
247
  THR_COMP_GLOBAL_GLOBALL3A2,
248
249
  THR_COMP_NEAR_NEARGA2,
250
  THR_COMP_NEW_NEWGA2,
251
  THR_COMP_NEW_NEARESTGA2,
252
  THR_COMP_NEAREST_NEWGA2,
253
  THR_COMP_NEW_NEARGA2,
254
  THR_COMP_NEAR_NEWGA2,
255
  THR_COMP_GLOBAL_GLOBALGA2,
256
257
  THR_COMP_NEAR_NEARLL2,
258
  THR_COMP_NEW_NEWLL2,
259
  THR_COMP_NEW_NEARESTLL2,
260
  THR_COMP_NEAREST_NEWLL2,
261
  THR_COMP_NEW_NEARLL2,
262
  THR_COMP_NEAR_NEWLL2,
263
  THR_COMP_GLOBAL_GLOBALLL2,
264
265
  THR_COMP_NEAR_NEARLL3,
266
  THR_COMP_NEW_NEWLL3,
267
  THR_COMP_NEW_NEARESTLL3,
268
  THR_COMP_NEAREST_NEWLL3,
269
  THR_COMP_NEW_NEARLL3,
270
  THR_COMP_NEAR_NEWLL3,
271
  THR_COMP_GLOBAL_GLOBALLL3,
272
273
  THR_COMP_NEAR_NEARLG,
274
  THR_COMP_NEW_NEWLG,
275
  THR_COMP_NEW_NEARESTLG,
276
  THR_COMP_NEAREST_NEWLG,
277
  THR_COMP_NEW_NEARLG,
278
  THR_COMP_NEAR_NEWLG,
279
  THR_COMP_GLOBAL_GLOBALLG,
280
281
  THR_COMP_NEAR_NEARBA,
282
  THR_COMP_NEW_NEWBA,
283
  THR_COMP_NEW_NEARESTBA,
284
  THR_COMP_NEAREST_NEWBA,
285
  THR_COMP_NEW_NEARBA,
286
  THR_COMP_NEAR_NEWBA,
287
  THR_COMP_GLOBAL_GLOBALBA,
288
289
  THR_DC,
290
  THR_PAETH,
291
  THR_SMOOTH,
292
  THR_SMOOTH_V,
293
  THR_SMOOTH_H,
294
  THR_H_PRED,
295
  THR_V_PRED,
296
  THR_D135_PRED,
297
  THR_D203_PRED,
298
  THR_D157_PRED,
299
  THR_D67_PRED,
300
  THR_D113_PRED,
301
  THR_D45_PRED,
302
};
303
304
/*!\cond */
305
typedef struct SingleInterModeState {
306
  int64_t rd;
307
  MV_REFERENCE_FRAME ref_frame;
308
  int valid;
309
} SingleInterModeState;
310
311
typedef struct InterModeSearchState {
312
  int64_t best_rd;
313
  int64_t best_skip_rd[2];
314
  MB_MODE_INFO best_mbmode;
315
  int best_rate_y;
316
  int best_rate_uv;
317
  int best_mode_skippable;
318
  int best_skip2;
319
  THR_MODES best_mode_index;
320
  int num_available_refs;
321
  int64_t dist_refs[REF_FRAMES];
322
  int dist_order_refs[REF_FRAMES];
323
  int64_t mode_threshold[MAX_MODES];
324
  int64_t best_intra_rd;
325
  unsigned int best_pred_sse;
326
327
  /*!
328
   * \brief Keep track of best intra rd for use in compound mode.
329
   */
330
  int64_t best_pred_rd[REFERENCE_MODES];
331
  // Save a set of single_newmv for each checked ref_mv.
332
  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
333
  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
334
  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
335
  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
336
  // The rd of simple translation in single inter modes
337
  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
338
  int64_t best_single_rd[REF_FRAMES];
339
  PREDICTION_MODE best_single_mode[REF_FRAMES];
340
341
  // Single search results by [directions][modes][reference frames]
342
  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
343
  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
344
  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
345
                                            [FWD_REFS];
346
  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
347
  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
348
  IntraModeSearchState intra_search_state;
349
  RD_STATS best_y_rdcost;
350
} InterModeSearchState;
351
/*!\endcond */
352
353
0
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
354
0
  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
355
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
356
0
    md->ready = 0;
357
0
    md->num = 0;
358
0
    md->dist_sum = 0;
359
0
    md->ld_sum = 0;
360
0
    md->sse_sum = 0;
361
0
    md->sse_sse_sum = 0;
362
0
    md->sse_ld_sum = 0;
363
0
  }
364
0
}
365
366
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
367
                             int64_t sse, int *est_residue_cost,
368
0
                             int64_t *est_dist) {
369
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
370
0
  if (md->ready) {
371
0
    if (sse < md->dist_mean) {
372
0
      *est_residue_cost = 0;
373
0
      *est_dist = sse;
374
0
    } else {
375
0
      *est_dist = (int64_t)round(md->dist_mean);
376
0
      const double est_ld = md->a * sse + md->b;
377
      // Clamp estimated rate cost by INT_MAX / 2.
378
      // TODO(angiebird@google.com): find better solution than clamping.
379
0
      if (fabs(est_ld) < 1e-2) {
380
0
        *est_residue_cost = INT_MAX / 2;
381
0
      } else {
382
0
        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
383
0
        if (est_residue_cost_dbl < 0) {
384
0
          *est_residue_cost = 0;
385
0
        } else {
386
0
          *est_residue_cost =
387
0
              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
388
0
        }
389
0
      }
390
0
      if (*est_residue_cost <= 0) {
391
0
        *est_residue_cost = 0;
392
0
        *est_dist = sse;
393
0
      }
394
0
    }
395
0
    return 1;
396
0
  }
397
0
  return 0;
398
0
}
399
400
0
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
401
0
  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
402
0
    const int block_idx = inter_mode_data_block_idx(bsize);
403
0
    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
404
0
    if (block_idx == -1) continue;
405
0
    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
406
0
      continue;
407
0
    } else {
408
0
      if (md->ready == 0) {
409
0
        md->dist_mean = md->dist_sum / md->num;
410
0
        md->ld_mean = md->ld_sum / md->num;
411
0
        md->sse_mean = md->sse_sum / md->num;
412
0
        md->sse_sse_mean = md->sse_sse_sum / md->num;
413
0
        md->sse_ld_mean = md->sse_ld_sum / md->num;
414
0
      } else {
415
0
        const double factor = 3;
416
0
        md->dist_mean =
417
0
            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
418
0
        md->ld_mean =
419
0
            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
420
0
        md->sse_mean =
421
0
            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
422
0
        md->sse_sse_mean =
423
0
            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
424
0
            (factor + 1);
425
0
        md->sse_ld_mean =
426
0
            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
427
0
            (factor + 1);
428
0
      }
429
430
0
      const double my = md->ld_mean;
431
0
      const double mx = md->sse_mean;
432
0
      const double dx = sqrt(md->sse_sse_mean);
433
0
      const double dxy = md->sse_ld_mean;
434
435
0
      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
436
0
      md->b = my - md->a * mx;
437
0
      md->ready = 1;
438
439
0
      md->num = 0;
440
0
      md->dist_sum = 0;
441
0
      md->ld_sum = 0;
442
0
      md->sse_sum = 0;
443
0
      md->sse_sse_sum = 0;
444
0
      md->sse_ld_sum = 0;
445
0
    }
446
0
    (void)rdmult;
447
0
  }
448
0
}
449
450
static inline void inter_mode_data_push(TileDataEnc *tile_data,
451
                                        BLOCK_SIZE bsize, int64_t sse,
452
0
                                        int64_t dist, int residue_cost) {
453
0
  if (residue_cost == 0 || sse == dist) return;
454
0
  const int block_idx = inter_mode_data_block_idx(bsize);
455
0
  if (block_idx == -1) return;
456
0
  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
457
0
  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
458
0
    const double ld = (sse - dist) * 1. / residue_cost;
459
0
    ++rd_model->num;
460
0
    rd_model->dist_sum += dist;
461
0
    rd_model->ld_sum += ld;
462
0
    rd_model->sse_sum += sse;
463
0
    rd_model->sse_sse_sum += (double)sse * (double)sse;
464
0
    rd_model->sse_ld_sum += sse * ld;
465
0
  }
466
0
}
467
468
static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
469
                                         int mode_rate, int64_t sse, int64_t rd,
470
                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
471
                                         RD_STATS *rd_cost_uv,
472
0
                                         const MB_MODE_INFO *mbmi) {
473
0
  const int num = inter_modes_info->num;
474
0
  assert(num < MAX_INTER_MODES);
475
0
  inter_modes_info->mbmi_arr[num] = *mbmi;
476
0
  inter_modes_info->mode_rate_arr[num] = mode_rate;
477
0
  inter_modes_info->sse_arr[num] = sse;
478
0
  inter_modes_info->est_rd_arr[num] = rd;
479
0
  inter_modes_info->rd_cost_arr[num] = *rd_cost;
480
0
  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
481
0
  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
482
0
  ++inter_modes_info->num;
483
0
}
484
485
0
static int compare_rd_idx_pair(const void *a, const void *b) {
486
0
  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
487
    // To avoid inconsistency in qsort() ordering when two elements are equal,
488
    // using idx as tie breaker. Refer aomedia:2928
489
0
    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
490
0
      return 0;
491
0
    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
492
0
      return 1;
493
0
    else
494
0
      return -1;
495
0
  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
496
0
    return 1;
497
0
  } else {
498
0
    return -1;
499
0
  }
500
0
}
501
502
static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
503
0
                                         RdIdxPair *rd_idx_pair_arr) {
504
0
  if (inter_modes_info->num == 0) {
505
0
    return;
506
0
  }
507
0
  for (int i = 0; i < inter_modes_info->num; ++i) {
508
0
    rd_idx_pair_arr[i].idx = i;
509
0
    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
510
0
  }
511
0
  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
512
0
        compare_rd_idx_pair);
513
0
}
514
515
// Similar to get_horver_correlation, but also takes into account first
516
// row/column, when computing horizontal/vertical correlation.
517
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
518
                                       int width, int height, float *hcorr,
519
0
                                       float *vcorr) {
520
  // The following notation is used:
521
  // x - current pixel
522
  // y - left neighbor pixel
523
  // z - top neighbor pixel
524
0
  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
525
0
  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
526
0
  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
527
528
  // First, process horizontal correlation on just the first row
529
0
  x_sum += diff[0];
530
0
  x2_sum += diff[0] * diff[0];
531
0
  x_firstrow += diff[0];
532
0
  x2_firstrow += diff[0] * diff[0];
533
0
  for (int j = 1; j < width; ++j) {
534
0
    const int16_t x = diff[j];
535
0
    const int16_t y = diff[j - 1];
536
0
    x_sum += x;
537
0
    x_firstrow += x;
538
0
    x2_sum += x * x;
539
0
    x2_firstrow += x * x;
540
0
    xy_sum += x * y;
541
0
  }
542
543
  // Process vertical correlation in the first column
544
0
  x_firstcol += diff[0];
545
0
  x2_firstcol += diff[0] * diff[0];
546
0
  for (int i = 1; i < height; ++i) {
547
0
    const int16_t x = diff[i * stride];
548
0
    const int16_t z = diff[(i - 1) * stride];
549
0
    x_sum += x;
550
0
    x_firstcol += x;
551
0
    x2_sum += x * x;
552
0
    x2_firstcol += x * x;
553
0
    xz_sum += x * z;
554
0
  }
555
556
  // Now process horiz and vert correlation through the rest unit
557
0
  for (int i = 1; i < height; ++i) {
558
0
    for (int j = 1; j < width; ++j) {
559
0
      const int16_t x = diff[i * stride + j];
560
0
      const int16_t y = diff[i * stride + j - 1];
561
0
      const int16_t z = diff[(i - 1) * stride + j];
562
0
      x_sum += x;
563
0
      x2_sum += x * x;
564
0
      xy_sum += x * y;
565
0
      xz_sum += x * z;
566
0
    }
567
0
  }
568
569
0
  for (int j = 0; j < width; ++j) {
570
0
    x_finalrow += diff[(height - 1) * stride + j];
571
0
    x2_finalrow +=
572
0
        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
573
0
  }
574
0
  for (int i = 0; i < height; ++i) {
575
0
    x_finalcol += diff[i * stride + width - 1];
576
0
    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
577
0
  }
578
579
0
  int64_t xhor_sum = x_sum - x_finalcol;
580
0
  int64_t xver_sum = x_sum - x_finalrow;
581
0
  int64_t y_sum = x_sum - x_firstcol;
582
0
  int64_t z_sum = x_sum - x_firstrow;
583
0
  int64_t x2hor_sum = x2_sum - x2_finalcol;
584
0
  int64_t x2ver_sum = x2_sum - x2_finalrow;
585
0
  int64_t y2_sum = x2_sum - x2_firstcol;
586
0
  int64_t z2_sum = x2_sum - x2_firstrow;
587
588
0
  const float num_hor = (float)(height * (width - 1));
589
0
  const float num_ver = (float)((height - 1) * width);
590
591
0
  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
592
0
  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
593
594
0
  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
595
0
  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
596
597
0
  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
598
0
  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
599
600
0
  if (xhor_var_n > 0 && y_var_n > 0) {
601
0
    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
602
0
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
603
0
  } else {
604
0
    *hcorr = 1.0;
605
0
  }
606
0
  if (xver_var_n > 0 && z_var_n > 0) {
607
0
    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
608
0
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
609
0
  } else {
610
0
    *vcorr = 1.0;
611
0
  }
612
0
}
613
614
static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var,
615
0
                                   int64_t *rec_var) {
616
0
  const MACROBLOCKD *xd = &x->e_mbd;
617
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
618
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
619
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
620
621
0
  BLOCK_SIZE bsize = mbmi->bsize;
622
0
  int bw = block_size_wide[bsize];
623
0
  int bh = block_size_high[bsize];
624
625
0
  static const int gau_filter[3][3] = {
626
0
    { 1, 2, 1 },
627
0
    { 2, 4, 2 },
628
0
    { 1, 2, 1 },
629
0
  };
630
631
0
  DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
632
633
0
  uint16_t *pred_ptr = &dclevel[bw + 1];
634
0
  int pred_stride = xd->plane[0].dst.stride;
635
636
0
  for (int idy = -1; idy < bh + 1; ++idy) {
637
0
    for (int idx = -1; idx < bw + 1; ++idx) {
638
0
      int offset_idy = idy;
639
0
      int offset_idx = idx;
640
0
      if (idy == -1) offset_idy = 0;
641
0
      if (idy == bh) offset_idy = bh - 1;
642
0
      if (idx == -1) offset_idx = 0;
643
0
      if (idx == bw) offset_idx = bw - 1;
644
645
0
      int offset = offset_idy * pred_stride + offset_idx;
646
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset];
647
0
    }
648
0
  }
649
650
0
  *rec_var = 0;
651
0
  for (int idy = 0; idy < bh; ++idy) {
652
0
    for (int idx = 0; idx < bw; ++idx) {
653
0
      int sum = 0;
654
0
      for (int iy = 0; iy < 3; ++iy)
655
0
        for (int ix = 0; ix < 3; ++ix)
656
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
657
0
                 gau_filter[iy][ix];
658
659
0
      sum = sum >> 4;
660
661
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
662
0
      *rec_var += diff * diff;
663
0
    }
664
0
  }
665
0
  *rec_var <<= 4;
666
667
0
  int src_stride = p->src.stride;
668
0
  for (int idy = -1; idy < bh + 1; ++idy) {
669
0
    for (int idx = -1; idx < bw + 1; ++idx) {
670
0
      int offset_idy = idy;
671
0
      int offset_idx = idx;
672
0
      if (idy == -1) offset_idy = 0;
673
0
      if (idy == bh) offset_idy = bh - 1;
674
0
      if (idx == -1) offset_idx = 0;
675
0
      if (idx == bw) offset_idx = bw - 1;
676
677
0
      int offset = offset_idy * src_stride + offset_idx;
678
0
      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset];
679
0
    }
680
0
  }
681
682
0
  *src_var = 0;
683
0
  for (int idy = 0; idy < bh; ++idy) {
684
0
    for (int idx = 0; idx < bw; ++idx) {
685
0
      int sum = 0;
686
0
      for (int iy = 0; iy < 3; ++iy)
687
0
        for (int ix = 0; ix < 3; ++ix)
688
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
689
0
                 gau_filter[iy][ix];
690
691
0
      sum = sum >> 4;
692
693
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
694
0
      *src_var += diff * diff;
695
0
    }
696
0
  }
697
0
  *src_var <<= 4;
698
0
}
699
700
static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
701
0
                               int64_t *rec_var) {
702
0
  const MACROBLOCKD *xd = &x->e_mbd;
703
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
704
0
  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
705
0
  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
706
707
0
  BLOCK_SIZE bsize = mbmi->bsize;
708
0
  int bw = block_size_wide[bsize];
709
0
  int bh = block_size_high[bsize];
710
711
0
  static const int gau_filter[3][3] = {
712
0
    { 1, 2, 1 },
713
0
    { 2, 4, 2 },
714
0
    { 1, 2, 1 },
715
0
  };
716
717
0
  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
718
719
0
  uint8_t *pred_ptr = &dclevel[bw + 1];
720
0
  int pred_stride = xd->plane[0].dst.stride;
721
722
0
  for (int idy = -1; idy < bh + 1; ++idy) {
723
0
    for (int idx = -1; idx < bw + 1; ++idx) {
724
0
      int offset_idy = idy;
725
0
      int offset_idx = idx;
726
0
      if (idy == -1) offset_idy = 0;
727
0
      if (idy == bh) offset_idy = bh - 1;
728
0
      if (idx == -1) offset_idx = 0;
729
0
      if (idx == bw) offset_idx = bw - 1;
730
731
0
      int offset = offset_idy * pred_stride + offset_idx;
732
0
      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
733
0
    }
734
0
  }
735
736
0
  *rec_var = 0;
737
0
  for (int idy = 0; idy < bh; ++idy) {
738
0
    for (int idx = 0; idx < bw; ++idx) {
739
0
      int sum = 0;
740
0
      for (int iy = 0; iy < 3; ++iy)
741
0
        for (int ix = 0; ix < 3; ++ix)
742
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
743
0
                 gau_filter[iy][ix];
744
745
0
      sum = sum >> 4;
746
747
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
748
0
      *rec_var += diff * diff;
749
0
    }
750
0
  }
751
0
  *rec_var <<= 4;
752
753
0
  int src_stride = p->src.stride;
754
0
  for (int idy = -1; idy < bh + 1; ++idy) {
755
0
    for (int idx = -1; idx < bw + 1; ++idx) {
756
0
      int offset_idy = idy;
757
0
      int offset_idx = idx;
758
0
      if (idy == -1) offset_idy = 0;
759
0
      if (idy == bh) offset_idy = bh - 1;
760
0
      if (idx == -1) offset_idx = 0;
761
0
      if (idx == bw) offset_idx = bw - 1;
762
763
0
      int offset = offset_idy * src_stride + offset_idx;
764
0
      pred_ptr[idy * bw + idx] = p->src.buf[offset];
765
0
    }
766
0
  }
767
768
0
  *src_var = 0;
769
0
  for (int idy = 0; idy < bh; ++idy) {
770
0
    for (int idx = 0; idx < bw; ++idx) {
771
0
      int sum = 0;
772
0
      for (int iy = 0; iy < 3; ++iy)
773
0
        for (int ix = 0; ix < 3; ++ix)
774
0
          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
775
0
                 gau_filter[iy][ix];
776
777
0
      sum = sum >> 4;
778
779
0
      int64_t diff = pred_ptr[idy * bw + idx] - sum;
780
0
      *src_var += diff * diff;
781
0
    }
782
0
  }
783
0
  *src_var <<= 4;
784
0
}
785
786
static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
787
0
                          RD_STATS *rd_cost) {
788
0
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
789
790
0
  if (frame_is_kf_gf_arf(cpi)) return;
791
792
0
  int64_t src_var, rec_var;
793
794
0
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
795
0
  if (is_hbd)
796
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
797
0
  else
798
0
    get_variance_stats(x, &src_var, &rec_var);
799
800
0
  if (src_var <= rec_var) return;
801
802
0
  int64_t var_offset = src_var - rec_var;
803
804
0
  rd_cost->dist += var_offset;
805
806
0
  rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
807
0
}
808
809
static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
810
0
                        int64_t *rd_cost) {
811
0
  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
812
813
0
  if (frame_is_kf_gf_arf(cpi)) return;
814
815
0
  int64_t src_var, rec_var;
816
0
  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
817
818
0
  if (is_hbd)
819
0
    get_variance_stats_hbd(x, &src_var, &rec_var);
820
0
  else
821
0
    get_variance_stats(x, &src_var, &rec_var);
822
823
0
  if (src_var <= rec_var) return;
824
825
0
  int64_t var_offset = src_var - rec_var;
826
827
0
  *rd_cost += RDCOST(x->rdmult, 0, var_offset);
828
0
}
829
830
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
831
0
                       int64_t *sse_y) {
832
0
  const AV1_COMMON *cm = &cpi->common;
833
0
  const int num_planes = av1_num_planes(cm);
834
0
  const MACROBLOCKD *xd = &x->e_mbd;
835
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
836
0
  int64_t total_sse = 0;
837
0
  for (int plane = 0; plane < num_planes; ++plane) {
838
0
    if (plane && !xd->is_chroma_ref) break;
839
0
    const struct macroblock_plane *const p = &x->plane[plane];
840
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
841
0
    const BLOCK_SIZE bs =
842
0
        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
843
0
    unsigned int sse;
844
845
0
    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
846
0
                            pd->dst.stride, &sse);
847
0
    total_sse += sse;
848
0
    if (!plane && sse_y) *sse_y = sse;
849
0
  }
850
0
  total_sse <<= 4;
851
0
  return total_sse;
852
0
}
853
854
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
855
0
                          intptr_t block_size, int64_t *ssz) {
856
0
  int i;
857
0
  int64_t error = 0, sqcoeff = 0;
858
859
0
  for (i = 0; i < block_size; i++) {
860
0
    const int diff = coeff[i] - dqcoeff[i];
861
0
    error += diff * diff;
862
0
    sqcoeff += coeff[i] * coeff[i];
863
0
  }
864
865
0
  *ssz = sqcoeff;
866
0
  return error;
867
0
}
868
869
int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
870
0
                             intptr_t block_size) {
871
0
  int64_t error = 0;
872
873
0
  for (int i = 0; i < block_size; i++) {
874
0
    const int diff = coeff[i] - dqcoeff[i];
875
0
    error += diff * diff;
876
0
  }
877
878
0
  return error;
879
0
}
880
881
#if CONFIG_AV1_HIGHBITDEPTH
882
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
883
                                 const tran_low_t *dqcoeff, intptr_t block_size,
884
0
                                 int64_t *ssz, int bd) {
885
0
  int i;
886
0
  int64_t error = 0, sqcoeff = 0;
887
0
  int shift = 2 * (bd - 8);
888
0
  int rounding = (1 << shift) >> 1;
889
890
0
  for (i = 0; i < block_size; i++) {
891
0
    const int64_t diff = coeff[i] - dqcoeff[i];
892
0
    error += diff * diff;
893
0
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
894
0
  }
895
0
  error = (error + rounding) >> shift;
896
0
  sqcoeff = (sqcoeff + rounding) >> shift;
897
898
0
  *ssz = sqcoeff;
899
0
  return error;
900
0
}
901
#endif
902
903
static int conditional_skipintra(PREDICTION_MODE mode,
904
0
                                 PREDICTION_MODE best_intra_mode) {
905
0
  if (mode == D113_PRED && best_intra_mode != V_PRED &&
906
0
      best_intra_mode != D135_PRED)
907
0
    return 1;
908
0
  if (mode == D67_PRED && best_intra_mode != V_PRED &&
909
0
      best_intra_mode != D45_PRED)
910
0
    return 1;
911
0
  if (mode == D203_PRED && best_intra_mode != H_PRED &&
912
0
      best_intra_mode != D45_PRED)
913
0
    return 1;
914
0
  if (mode == D157_PRED && best_intra_mode != H_PRED &&
915
0
      best_intra_mode != D135_PRED)
916
0
    return 1;
917
0
  return 0;
918
0
}
919
920
static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
921
0
                       int16_t mode_context) {
922
0
  if (is_inter_compound_mode(mode)) {
923
0
    return mode_costs
924
0
        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
925
0
  }
926
927
0
  int mode_cost = 0;
928
0
  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
929
930
0
  assert(is_inter_mode(mode));
931
932
0
  if (mode == NEWMV) {
933
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
934
0
    return mode_cost;
935
0
  } else {
936
0
    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
937
0
    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
938
939
0
    if (mode == GLOBALMV) {
940
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
941
0
      return mode_cost;
942
0
    } else {
943
0
      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
944
0
      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
945
0
      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
946
0
      return mode_cost;
947
0
    }
948
0
  }
949
0
}
950
951
static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
952
0
                                              int ref_idx) {
953
0
  return ref_idx ? compound_ref1_mode(this_mode)
954
0
                 : compound_ref0_mode(this_mode);
955
0
}
956
957
static inline void estimate_ref_frame_costs(
958
    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
959
    int segment_id, unsigned int *ref_costs_single,
960
0
    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
961
0
  int seg_ref_active =
962
0
      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
963
0
  if (seg_ref_active) {
964
0
    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
965
0
    int ref_frame;
966
0
    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
967
0
      memset(ref_costs_comp[ref_frame], 0,
968
0
             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
969
0
  } else {
970
0
    int intra_inter_ctx = av1_get_intra_inter_context(xd);
971
0
    ref_costs_single[INTRA_FRAME] =
972
0
        mode_costs->intra_inter_cost[intra_inter_ctx][0];
973
0
    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
974
975
0
    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
976
0
      ref_costs_single[i] = base_cost;
977
978
0
    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
979
0
    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
980
0
    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
981
0
    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
982
0
    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
983
0
    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
984
985
    // Determine cost of a single ref frame, where frame types are represented
986
    // by a tree:
987
    // Level 0: add cost whether this ref is a forward or backward ref
988
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
989
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
990
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
991
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
992
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
993
0
    ref_costs_single[ALTREF2_FRAME] +=
994
0
        mode_costs->single_ref_cost[ctx_p1][0][1];
995
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
996
997
    // Level 1: if this ref is forward ref,
998
    // add cost whether it is last/last2 or last3/golden
999
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1000
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
1001
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1002
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
1003
1004
    // Level 1: if this ref is backward ref
1005
    // then add cost whether this ref is altref or backward ref
1006
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
1007
0
    ref_costs_single[ALTREF2_FRAME] +=
1008
0
        mode_costs->single_ref_cost[ctx_p2][1][0];
1009
0
    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
1010
1011
    // Level 2: further add cost whether this ref is last or last2
1012
0
    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
1013
0
    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
1014
1015
    // Level 2: last3 or golden
1016
0
    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
1017
0
    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
1018
1019
    // Level 2: bwdref or altref2
1020
0
    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
1021
0
    ref_costs_single[ALTREF2_FRAME] +=
1022
0
        mode_costs->single_ref_cost[ctx_p6][5][1];
1023
1024
0
    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
1025
      // Similar to single ref, determine cost of compound ref frames.
1026
      // cost_compound_refs = cost_first_ref + cost_second_ref
1027
0
      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
1028
0
      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
1029
0
      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
1030
0
      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
1031
0
      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
1032
1033
0
      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
1034
0
      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
1035
1036
0
      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
1037
0
          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
1038
0
              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
1039
0
      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
1040
0
      ref_bicomp_costs[ALTREF_FRAME] = 0;
1041
1042
      // cost of first ref frame
1043
0
      ref_bicomp_costs[LAST_FRAME] +=
1044
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1045
0
      ref_bicomp_costs[LAST2_FRAME] +=
1046
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
1047
0
      ref_bicomp_costs[LAST3_FRAME] +=
1048
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1049
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
1050
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
1051
1052
0
      ref_bicomp_costs[LAST_FRAME] +=
1053
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
1054
0
      ref_bicomp_costs[LAST2_FRAME] +=
1055
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
1056
1057
0
      ref_bicomp_costs[LAST3_FRAME] +=
1058
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
1059
0
      ref_bicomp_costs[GOLDEN_FRAME] +=
1060
0
          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
1061
1062
      // cost of second ref frame
1063
0
      ref_bicomp_costs[BWDREF_FRAME] +=
1064
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1065
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
1066
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
1067
0
      ref_bicomp_costs[ALTREF_FRAME] +=
1068
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
1069
1070
0
      ref_bicomp_costs[BWDREF_FRAME] +=
1071
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
1072
0
      ref_bicomp_costs[ALTREF2_FRAME] +=
1073
0
          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
1074
1075
      // cost: if one ref frame is forward ref, the other ref is backward ref
1076
0
      int ref0, ref1;
1077
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1078
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
1079
0
          ref_costs_comp[ref0][ref1] =
1080
0
              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
1081
0
        }
1082
0
      }
1083
1084
      // cost: if both ref frames are the same side.
1085
0
      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
1086
0
      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
1087
0
      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
1088
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
1089
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1090
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1091
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
1092
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
1093
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1094
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1095
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1096
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
1097
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
1098
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1099
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
1100
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
1101
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
1102
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
1103
0
          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
1104
0
          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
1105
0
    } else {
1106
0
      int ref0, ref1;
1107
0
      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
1108
0
        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
1109
0
          ref_costs_comp[ref0][ref1] = 512;
1110
0
      }
1111
0
      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
1112
0
      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
1113
0
      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
1114
0
      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
1115
0
    }
1116
0
  }
1117
0
}
1118
1119
static inline void store_coding_context(
1120
#if CONFIG_INTERNAL_STATS
1121
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
1122
#else
1123
    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1124
#endif  // CONFIG_INTERNAL_STATS
1125
0
    int skippable) {
1126
0
  MACROBLOCKD *const xd = &x->e_mbd;
1127
1128
  // Take a snapshot of the coding context so it can be
1129
  // restored if we decide to encode this way
1130
0
  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
1131
0
  ctx->skippable = skippable;
1132
#if CONFIG_INTERNAL_STATS
1133
  ctx->best_mode_index = mode_index;
1134
#endif  // CONFIG_INTERNAL_STATS
1135
0
  ctx->mic = *xd->mi[0];
1136
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
1137
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
1138
0
}
1139
1140
static inline void setup_buffer_ref_mvs_inter(
1141
    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
1142
0
    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
1143
0
  const AV1_COMMON *cm = &cpi->common;
1144
0
  const int num_planes = av1_num_planes(cm);
1145
0
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
1146
0
      av1_get_scaled_ref_frame(cpi, ref_frame);
1147
0
  MACROBLOCKD *const xd = &x->e_mbd;
1148
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1149
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1150
0
  const struct scale_factors *const sf =
1151
0
      get_ref_scale_factors_const(cm, ref_frame);
1152
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
1153
0
  assert(yv12 != NULL);
1154
1155
0
  if (scaled_ref_frame) {
1156
    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
1157
    // support scaling.
1158
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
1159
0
                         num_planes);
1160
0
  } else {
1161
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1162
0
  }
1163
1164
  // Gets an initial list of candidate vectors from neighbours and orders them
1165
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
1166
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
1167
0
                   mbmi_ext->mode_context);
1168
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
1169
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
1170
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
1171
  // Further refinement that is encode side only to test the top few candidates
1172
  // in full and choose the best as the center point for subsequent searches.
1173
  // The current implementation doesn't support scaling.
1174
0
  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
1175
0
              ref_frame, block_size);
1176
1177
  // Go back to unscaled reference.
1178
0
  if (scaled_ref_frame) {
1179
    // We had temporarily setup pred block based on scaled reference above. Go
1180
    // back to unscaled reference now, for subsequent use.
1181
0
    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
1182
0
  }
1183
0
}
1184
1185
0
#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1186
0
#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
1187
1188
// TODO(jingning): this mv clamping function should be block size dependent.
1189
0
static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
1190
0
  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
1191
0
                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
1192
0
                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
1193
0
                                     xd->mb_to_bottom_edge +
1194
0
                                         RIGHT_BOTTOM_MARGIN };
1195
0
  clamp_mv(mv, &mv_limits);
1196
0
}
1197
1198
/* If the current mode shares the same mv with other modes with higher cost,
1199
 * skip this mode. */
1200
static int skip_repeated_mv(const AV1_COMMON *const cm,
1201
                            const MACROBLOCK *const x,
1202
                            PREDICTION_MODE this_mode,
1203
                            const MV_REFERENCE_FRAME ref_frames[2],
1204
0
                            InterModeSearchState *search_state) {
1205
0
  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
1206
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
1207
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1208
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1209
0
  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
1210
0
  if (!is_comp_pred) {
1211
0
    if (this_mode == NEARMV) {
1212
0
      if (ref_mv_count == 0) {
1213
        // NEARMV has the same motion vector as NEARESTMV
1214
0
        compare_mode = NEARESTMV;
1215
0
      }
1216
0
      if (ref_mv_count == 1 &&
1217
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1218
        // NEARMV has the same motion vector as GLOBALMV
1219
0
        compare_mode = GLOBALMV;
1220
0
      }
1221
0
    }
1222
0
    if (this_mode == GLOBALMV) {
1223
0
      if (ref_mv_count == 0 &&
1224
0
          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1225
        // GLOBALMV has the same motion vector as NEARESTMV
1226
0
        compare_mode = NEARESTMV;
1227
0
      }
1228
0
      if (ref_mv_count == 1) {
1229
        // GLOBALMV has the same motion vector as NEARMV
1230
0
        compare_mode = NEARMV;
1231
0
      }
1232
0
    }
1233
1234
0
    if (compare_mode != MB_MODE_COUNT) {
1235
      // Use modelled_rd to check whether compare mode was searched
1236
0
      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1237
0
          INT64_MAX) {
1238
0
        const int16_t mode_ctx =
1239
0
            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1240
0
        const int compare_cost =
1241
0
            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1242
0
        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1243
1244
        // Only skip if the mode cost is larger than compare mode cost
1245
0
        if (this_cost > compare_cost) {
1246
0
          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1247
0
              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1248
0
          return 1;
1249
0
        }
1250
0
      }
1251
0
    }
1252
0
  }
1253
0
  return 0;
1254
0
}
1255
1256
static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1257
                                     const AV1_COMMON *cm,
1258
0
                                     const MACROBLOCK *x) {
1259
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1260
0
  *out_mv = in_mv;
1261
0
  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1262
0
                     cm->features.cur_frame_force_integer_mv);
1263
0
  clamp_mv2(&out_mv->as_mv, xd);
1264
0
  return av1_is_fullmv_in_range(&x->mv_limits,
1265
0
                                get_fullmv_from_mv(&out_mv->as_mv));
1266
0
}
1267
1268
// To use single newmv directly for compound modes, need to clamp the mv to the
1269
// valid mv range. Without this, encoder would generate out of range mv, and
1270
// this is seen in 8k encoding.
1271
static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1272
0
                                     int ref_idx) {
1273
0
  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1274
0
  SubpelMvLimits mv_limits;
1275
1276
0
  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1277
0
  clamp_mv(&mv->as_mv, &mv_limits);
1278
0
}
1279
1280
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1281
                            const BLOCK_SIZE bsize, int_mv *cur_mv,
1282
                            int *const rate_mv, HandleInterModeArgs *const args,
1283
0
                            inter_mode_info *mode_info) {
1284
0
  MACROBLOCKD *const xd = &x->e_mbd;
1285
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
1286
0
  const int is_comp_pred = has_second_ref(mbmi);
1287
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1288
0
  const int refs[2] = { mbmi->ref_frame[0],
1289
0
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1290
0
  const int ref_mv_idx = mbmi->ref_mv_idx;
1291
1292
0
  if (is_comp_pred) {
1293
0
    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1294
0
    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1295
0
    if (this_mode == NEW_NEWMV) {
1296
0
      if (valid_mv0) {
1297
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1298
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1299
0
      }
1300
0
      if (valid_mv1) {
1301
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1302
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1303
0
      }
1304
0
      *rate_mv = 0;
1305
0
      for (int i = 0; i < 2; ++i) {
1306
0
        const int_mv ref_mv = av1_get_ref_mv(x, i);
1307
0
        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1308
0
                                    x->mv_costs->nmv_joint_cost,
1309
0
                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1310
0
      }
1311
0
    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1312
0
      if (valid_mv1) {
1313
0
        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1314
0
        clamp_mv_in_range(x, &cur_mv[1], 1);
1315
0
      }
1316
0
      const int_mv ref_mv = av1_get_ref_mv(x, 1);
1317
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1318
0
                                 x->mv_costs->nmv_joint_cost,
1319
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1320
0
    } else {
1321
0
      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1322
0
      if (valid_mv0) {
1323
0
        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1324
0
        clamp_mv_in_range(x, &cur_mv[0], 0);
1325
0
      }
1326
0
      const int_mv ref_mv = av1_get_ref_mv(x, 0);
1327
0
      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1328
0
                                 x->mv_costs->nmv_joint_cost,
1329
0
                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1330
0
    }
1331
0
  } else {
1332
    // Single ref case.
1333
0
    const int ref_idx = 0;
1334
0
    int search_range = INT_MAX;
1335
1336
0
    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1337
0
      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1338
0
      int min_mv_diff = INT_MAX;
1339
0
      int best_match = -1;
1340
0
      MV prev_ref_mv[2] = { { 0 } };
1341
0
      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1342
0
        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1343
0
                                                     idx, &x->mbmi_ext)
1344
0
                               .as_mv;
1345
0
        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1346
0
                                       abs(ref_mv.col - prev_ref_mv[idx].col));
1347
1348
0
        if (min_mv_diff > ref_mv_diff) {
1349
0
          min_mv_diff = ref_mv_diff;
1350
0
          best_match = idx;
1351
0
        }
1352
0
      }
1353
1354
0
      if (min_mv_diff < (16 << 3)) {
1355
0
        if (args->single_newmv_valid[best_match][refs[0]]) {
1356
0
          search_range = min_mv_diff;
1357
0
          search_range +=
1358
0
              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1359
0
                         prev_ref_mv[best_match].row),
1360
0
                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1361
0
                         prev_ref_mv[best_match].col));
1362
          // Get full pixel search range.
1363
0
          search_range = (search_range + 4) >> 3;
1364
0
        }
1365
0
      }
1366
0
    }
1367
1368
0
    int_mv best_mv;
1369
0
    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1370
0
                             mode_info, &best_mv, args);
1371
0
    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1372
1373
0
    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1374
0
    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1375
0
    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1376
0
    cur_mv[0].as_int = best_mv.as_int;
1377
1378
    // Return after single_newmv is set.
1379
0
    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1380
0
  }
1381
1382
0
  return 0;
1383
0
}
1384
1385
static inline void update_mode_start_end_index(
1386
    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1387
    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1388
0
    int interintra_allowed, int eval_motion_mode) {
1389
0
  *mode_index_start = (int)SIMPLE_TRANSLATION;
1390
0
  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1391
0
  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1392
0
    if (!eval_motion_mode) {
1393
0
      *mode_index_end = (int)SIMPLE_TRANSLATION;
1394
0
    } else {
1395
      // Set the start index appropriately to process motion modes other than
1396
      // simple translation
1397
0
      *mode_index_start = 1;
1398
0
    }
1399
0
  }
1400
0
  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1401
0
    *mode_index_end = SIMPLE_TRANSLATION;
1402
0
}
1403
1404
// Increase rd cost of warp mode for low complexity decoding.
1405
static inline void increase_warp_mode_rd(const MB_MODE_INFO *const best_mbmi,
1406
                                         const MB_MODE_INFO *const this_mbmi,
1407
                                         int64_t *const best_scaled_rd,
1408
                                         int64_t *const this_scaled_rd,
1409
0
                                         int rd_bias_scale_pct) {
1410
  // Check rd bias percentage is non-zero.
1411
0
  if (!rd_bias_scale_pct) return;
1412
0
  if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return;
1413
1414
  // Experiments have been performed with increasing the RD cost of warp mode at
1415
  // the below locations of inter mode evaluation.
1416
  // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode().
1417
  // (2). Motion mode evaluation during handle_inter_mode() call.
1418
  // (3). Motion mode evaluation for winner motion modes.
1419
  // (4). Tx search for best inter candidates.
1420
  // Based on the speed quality trade-off results of this speed feature, the rd
1421
  // bias logic is enabled only at (2), (3) and (4).
1422
0
  const double rd_bias_scale = rd_bias_scale_pct / 100.0;
1423
0
  if (best_mbmi->motion_mode == WARPED_CAUSAL)
1424
0
    *best_scaled_rd += (int64_t)(rd_bias_scale * *best_scaled_rd);
1425
0
  if (this_mbmi->motion_mode == WARPED_CAUSAL)
1426
0
    *this_scaled_rd += (int64_t)(rd_bias_scale * *this_scaled_rd);
1427
0
}
1428
1429
/*!\brief AV1 motion mode search
1430
 *
1431
 * \ingroup inter_mode_search
1432
 * Function to search over and determine the motion mode. It will update
1433
 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1434
 * WARPED_CAUSAL and determine any necessary side information for the selected
1435
 * motion mode. It will also perform the full transform search, unless the
1436
 * input parameter do_tx_search indicates to do an estimation of the RD rather
1437
 * than an RD corresponding to a full transform search. It will return the
1438
 * RD for the final motion_mode.
1439
 * Do the RD search for a given inter mode and compute all information relevant
1440
 * to the input mode. It will compute the best MV,
1441
 * compound parameters (if the mode is a compound mode) and interpolation filter
1442
 * parameters.
1443
 *
1444
 * \param[in]     cpi               Top-level encoder structure.
1445
 * \param[in]     tile_data         Pointer to struct holding adaptive
1446
 *                                  data/contexts/models for the tile during
1447
 *                                  encoding.
1448
 * \param[in]     x                 Pointer to struct holding all the data for
1449
 *                                  the current macroblock.
1450
 * \param[in]     bsize             Current block size.
1451
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
1452
 *                                  information.
1453
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1454
 *                                  for only the Y plane.
1455
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1456
 *                                  for only the UV planes.
1457
 * \param[in]     args              HandleInterModeArgs struct holding
1458
 *                                  miscellaneous arguments for inter mode
1459
 *                                  search. See the documentation for this
1460
 *                                  struct for a description of each member.
1461
 * \param[in]     ref_best_rd       Best RD found so far for this block.
1462
 *                                  It is used for early termination of this
1463
 *                                  search if the RD exceeds this value.
1464
 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1465
 *                                  best total RD for a skip mode so far, and
1466
 *                                  skip_rd[1] is the best RD for a skip mode so
1467
 *                                  far in luma. This is used as a speed feature
1468
 *                                  to skip the transform search if the computed
1469
 *                                  skip RD for the current mode is not better
1470
 *                                  than the best skip_rd so far.
1471
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
1472
 *                                  This will be modified if a motion search is
1473
 *                                  done in the motion mode search.
1474
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
1475
 *                                  prediction. This will eventually hold the
1476
 *                                  final prediction, and the tmp_dst info will
1477
 *                                  be copied here.
1478
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1479
 *                                  do_tx_search (see below) is 0.
1480
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1481
 *                                  a full transform search. This will compute
1482
 *                                  an estimated RD for the modes without the
1483
 *                                  transform search and later perform the full
1484
 *                                  transform search on the best candidates.
1485
 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1486
 *                                  information to perform a full transform
1487
 *                                  search only on winning candidates searched
1488
 *                                  with an estimate for transform coding RD.
1489
 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1490
 *                                  motion modes other than SIMPLE_TRANSLATION.
1491
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
1492
 *                                  the luma plane.
1493
 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1494
 * current motion mode being tested should be skipped. It returns 0 if the
1495
 * motion mode search is a success.
1496
 */
1497
static int64_t motion_mode_rd(
1498
    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1499
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1500
    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1501
    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1502
    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1503
0
    int eval_motion_mode, int64_t *yrd) {
1504
0
  const AV1_COMMON *const cm = &cpi->common;
1505
0
  const FeatureFlags *const features = &cm->features;
1506
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1507
0
  const int num_planes = av1_num_planes(cm);
1508
0
  MACROBLOCKD *xd = &x->e_mbd;
1509
0
  MB_MODE_INFO *mbmi = xd->mi[0];
1510
0
  const int is_comp_pred = has_second_ref(mbmi);
1511
0
  const PREDICTION_MODE this_mode = mbmi->mode;
1512
0
  const int rate2_nocoeff = rd_stats->rate;
1513
0
  int best_xskip_txfm = 0;
1514
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1515
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1516
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1517
0
  const int rate_mv0 = *rate_mv;
1518
0
  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1519
0
                                 is_interintra_allowed(mbmi) &&
1520
0
                                 mbmi->compound_idx;
1521
0
  WARP_SAMPLE_INFO *const warp_sample_info =
1522
0
      &x->warp_sample_info[mbmi->ref_frame[0]];
1523
0
  int *pts0 = warp_sample_info->pts;
1524
0
  int *pts_inref0 = warp_sample_info->pts_inref;
1525
1526
0
  assert(mbmi->ref_frame[1] != INTRA_FRAME);
1527
0
  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1528
0
  av1_invalid_rd_stats(&best_rd_stats);
1529
0
  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1530
0
  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1531
0
  *yrd = INT64_MAX;
1532
0
  if (features->switchable_motion_mode) {
1533
    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1534
    // is allowed.
1535
0
    last_motion_mode_allowed = motion_mode_allowed(
1536
0
        xd->global_motion, xd, mbmi, features->allow_warped_motion);
1537
0
  }
1538
1539
0
  if (last_motion_mode_allowed == WARPED_CAUSAL) {
1540
    // Collect projection samples used in least squares approximation of
1541
    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1542
0
    if (warp_sample_info->num < 0) {
1543
0
      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1544
0
    }
1545
0
    mbmi->num_proj_ref = warp_sample_info->num;
1546
0
  }
1547
0
  const int total_samples = mbmi->num_proj_ref;
1548
0
  if (total_samples == 0) {
1549
    // Do not search WARPED_CAUSAL if there are no samples to use to determine
1550
    // warped parameters.
1551
0
    last_motion_mode_allowed = OBMC_CAUSAL;
1552
0
  }
1553
1554
0
  const MB_MODE_INFO base_mbmi = *mbmi;
1555
0
  MB_MODE_INFO best_mbmi;
1556
0
  const int interp_filter = features->interp_filter;
1557
0
  const int switchable_rate =
1558
0
      av1_is_interp_needed(xd)
1559
0
          ? av1_get_switchable_rate(x, xd, interp_filter,
1560
0
                                    cm->seq_params->enable_dual_filter)
1561
0
          : 0;
1562
0
  int64_t best_rd = INT64_MAX;
1563
0
  int best_rate_mv = rate_mv0;
1564
0
  const int mi_row = xd->mi_row;
1565
0
  const int mi_col = xd->mi_col;
1566
0
  int mode_index_start, mode_index_end;
1567
0
  const int txfm_rd_gate_level =
1568
0
      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1569
0
                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1570
0
                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
1571
1572
  // Modify the start and end index according to speed features. For example,
1573
  // if SIMPLE_TRANSLATION has already been searched according to
1574
  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1575
  // to avoid searching it again.
1576
0
  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1577
0
                              last_motion_mode_allowed, interintra_allowed,
1578
0
                              eval_motion_mode);
1579
  // Main function loop. This loops over all of the possible motion modes and
1580
  // computes RD to determine the best one. This process includes computing
1581
  // any necessary side information for the motion mode and performing the
1582
  // transform search.
1583
0
  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1584
0
       mode_index++) {
1585
0
    if (args->skip_motion_mode && mode_index) continue;
1586
0
    int tmp_rate2 = rate2_nocoeff;
1587
0
    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1588
0
    int tmp_rate_mv = rate_mv0;
1589
1590
0
    *mbmi = base_mbmi;
1591
0
    if (is_interintra_mode) {
1592
      // Only use SIMPLE_TRANSLATION for interintra
1593
0
      mbmi->motion_mode = SIMPLE_TRANSLATION;
1594
0
    } else {
1595
0
      mbmi->motion_mode = (MOTION_MODE)mode_index;
1596
0
      assert(mbmi->ref_frame[1] != INTRA_FRAME);
1597
0
    }
1598
1599
0
    if (cpi->oxcf.algo_cfg.sharpness == 3 &&
1600
0
        (mbmi->motion_mode == OBMC_CAUSAL ||
1601
0
         mbmi->motion_mode == WARPED_CAUSAL))
1602
0
      continue;
1603
1604
    // Do not search OBMC if the probability of selecting it is below a
1605
    // predetermined threshold for this update_type and block size.
1606
0
    const FRAME_UPDATE_TYPE update_type =
1607
0
        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1608
0
    int use_actual_frame_probs = 1;
1609
0
    int prune_obmc;
1610
#if CONFIG_FPMT_TEST
1611
    use_actual_frame_probs =
1612
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1613
    if (!use_actual_frame_probs) {
1614
      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1615
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1616
    }
1617
#endif
1618
0
    if (use_actual_frame_probs) {
1619
0
      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1620
0
                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
1621
0
    }
1622
0
    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1623
0
        mbmi->motion_mode == OBMC_CAUSAL)
1624
0
      continue;
1625
1626
0
    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1627
      // SIMPLE_TRANSLATION mode: no need to recalculate.
1628
      // The prediction is calculated before motion_mode_rd() is called in
1629
      // handle_inter_mode()
1630
0
    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1631
0
      const uint32_t cur_mv = mbmi->mv[0].as_int;
1632
      // OBMC_CAUSAL not allowed for compound prediction
1633
0
      assert(!is_comp_pred);
1634
0
      if (have_newmv_in_inter_mode(this_mode)) {
1635
0
        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1636
0
                                 &mbmi->mv[0], NULL);
1637
0
        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1638
0
      }
1639
0
      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1640
        // Build the predictor according to the current motion vector if it has
1641
        // not already been built
1642
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1643
0
                                      0, av1_num_planes(cm) - 1);
1644
0
      }
1645
      // Build the inter predictor by blending the predictor corresponding to
1646
      // this MV, and the neighboring blocks using the OBMC model
1647
0
      av1_build_obmc_inter_prediction(
1648
0
          cm, xd, args->above_pred_buf, args->above_pred_stride,
1649
0
          args->left_pred_buf, args->left_pred_stride);
1650
0
#if !CONFIG_REALTIME_ONLY
1651
0
    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1652
0
      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1653
0
      mbmi->motion_mode = WARPED_CAUSAL;
1654
0
      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1655
0
      mbmi->interp_filters =
1656
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1657
1658
0
      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1659
0
      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1660
      // Select the samples according to motion vector difference
1661
0
      if (mbmi->num_proj_ref > 1) {
1662
0
        mbmi->num_proj_ref = av1_selectSamples(
1663
0
            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1664
0
      }
1665
1666
      // Compute the warped motion parameters with a least squares fit
1667
      //  using the collected samples
1668
0
      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1669
0
                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1670
0
                               &mbmi->wm_params, mi_row, mi_col)) {
1671
0
        assert(!is_comp_pred);
1672
0
        if (have_newmv_in_inter_mode(this_mode)) {
1673
          // Refine MV for NEWMV mode
1674
0
          const int_mv mv0 = mbmi->mv[0];
1675
0
          const WarpedMotionParams wm_params0 = mbmi->wm_params;
1676
0
          const int num_proj_ref0 = mbmi->num_proj_ref;
1677
1678
0
          const int_mv ref_mv = av1_get_ref_mv(x, 0);
1679
0
          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1680
0
          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1681
0
                                            &ref_mv.as_mv, NULL);
1682
1683
          // Refine MV in a small range.
1684
0
          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1685
0
                               total_samples, cpi->sf.mv_sf.warp_search_method,
1686
0
                               cpi->sf.mv_sf.warp_search_iters);
1687
1688
0
          if (mv0.as_int != mbmi->mv[0].as_int) {
1689
            // Keep the refined MV and WM parameters.
1690
0
            tmp_rate_mv = av1_mv_bit_cost(
1691
0
                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1692
0
                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1693
0
            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1694
0
          } else {
1695
            // Restore the old MV and WM parameters.
1696
0
            mbmi->mv[0] = mv0;
1697
0
            mbmi->wm_params = wm_params0;
1698
0
            mbmi->num_proj_ref = num_proj_ref0;
1699
0
          }
1700
0
        }
1701
1702
        // Build the warped predictor
1703
0
        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1704
0
                                      av1_num_planes(cm) - 1);
1705
0
      } else {
1706
0
        continue;
1707
0
      }
1708
0
#endif  // !CONFIG_REALTIME_ONLY
1709
0
    } else if (is_interintra_mode) {
1710
0
      const int ret =
1711
0
          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1712
0
                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
1713
0
      if (ret < 0) continue;
1714
0
    }
1715
1716
    // If we are searching newmv and the mv is the same as refmv, skip the
1717
    // current mode
1718
0
    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1719
1720
    // Update rd_stats for the current motion mode
1721
0
    txfm_info->skip_txfm = 0;
1722
0
    rd_stats->dist = 0;
1723
0
    rd_stats->sse = 0;
1724
0
    rd_stats->skip_txfm = 1;
1725
0
    rd_stats->rate = tmp_rate2;
1726
0
    const ModeCosts *mode_costs = &x->mode_costs;
1727
0
    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1728
0
    if (interintra_allowed) {
1729
0
      rd_stats->rate +=
1730
0
          mode_costs->interintra_cost[size_group_lookup[bsize]]
1731
0
                                     [mbmi->ref_frame[1] == INTRA_FRAME];
1732
0
    }
1733
0
    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1734
0
        (mbmi->ref_frame[1] != INTRA_FRAME)) {
1735
0
      if (last_motion_mode_allowed == WARPED_CAUSAL) {
1736
0
        rd_stats->rate +=
1737
0
            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1738
0
      } else {
1739
0
        rd_stats->rate +=
1740
0
            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1741
0
      }
1742
0
    }
1743
1744
0
    int64_t this_yrd = INT64_MAX;
1745
1746
0
    if (!do_tx_search) {
1747
      // Avoid doing a transform search here to speed up the overall mode
1748
      // search. It will be done later in the mode search if the current
1749
      // motion mode seems promising.
1750
0
      int64_t curr_sse = -1;
1751
0
      int64_t sse_y = -1;
1752
0
      int est_residue_cost = 0;
1753
0
      int64_t est_dist = 0;
1754
0
      int64_t est_rd = 0;
1755
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1756
0
        curr_sse = get_sse(cpi, x, &sse_y);
1757
0
        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1758
0
                                                 &est_residue_cost, &est_dist);
1759
0
        (void)has_est_rd;
1760
0
        assert(has_est_rd);
1761
0
      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1762
0
                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1763
0
        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1764
0
            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1765
0
            NULL, &curr_sse, NULL, NULL, NULL);
1766
0
        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1767
0
      }
1768
0
      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1769
0
      if (est_rd * 0.80 > *best_est_rd) {
1770
0
        mbmi->ref_frame[1] = ref_frame_1;
1771
0
        continue;
1772
0
      }
1773
0
      const int mode_rate = rd_stats->rate;
1774
0
      rd_stats->rate += est_residue_cost;
1775
0
      rd_stats->dist = est_dist;
1776
0
      rd_stats->rdcost = est_rd;
1777
0
      if (rd_stats->rdcost < *best_est_rd) {
1778
0
        *best_est_rd = rd_stats->rdcost;
1779
0
        assert(sse_y >= 0);
1780
0
        ref_skip_rd[1] = txfm_rd_gate_level
1781
0
                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1782
0
                             : INT64_MAX;
1783
0
      }
1784
0
      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1785
0
        if (!is_comp_pred) {
1786
0
          assert(curr_sse >= 0);
1787
0
          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1788
0
                                rd_stats->rdcost, rd_stats, rd_stats_y,
1789
0
                                rd_stats_uv, mbmi);
1790
0
        }
1791
0
      } else {
1792
0
        assert(curr_sse >= 0);
1793
0
        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1794
0
                              rd_stats->rdcost, rd_stats, rd_stats_y,
1795
0
                              rd_stats_uv, mbmi);
1796
0
      }
1797
0
      mbmi->skip_txfm = 0;
1798
0
    } else {
1799
      // Perform full transform search
1800
0
      int64_t skip_rd = INT64_MAX;
1801
0
      int64_t skip_rdy = INT64_MAX;
1802
0
      if (txfm_rd_gate_level) {
1803
        // Check if the mode is good enough based on skip RD
1804
0
        int64_t sse_y = INT64_MAX;
1805
0
        int64_t curr_sse = get_sse(cpi, x, &sse_y);
1806
0
        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1807
0
        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1808
0
        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1809
0
                                        txfm_rd_gate_level, 0);
1810
0
        if (!eval_txfm) continue;
1811
0
      }
1812
1813
      // Do transform search
1814
0
      const int mode_rate = rd_stats->rate;
1815
0
      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1816
0
                           rd_stats->rate, ref_best_rd)) {
1817
0
        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1818
0
          return INT64_MAX;
1819
0
        }
1820
0
        continue;
1821
0
      }
1822
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
1823
0
      const int y_rate =
1824
0
          rd_stats->skip_txfm
1825
0
              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1826
0
              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1827
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1828
1829
0
      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1830
0
      if (curr_rd < ref_best_rd) {
1831
0
        ref_best_rd = curr_rd;
1832
0
        ref_skip_rd[0] = skip_rd;
1833
0
        ref_skip_rd[1] = skip_rdy;
1834
0
      }
1835
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1836
0
        inter_mode_data_push(
1837
0
            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1838
0
            rd_stats_y->rate + rd_stats_uv->rate +
1839
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1840
0
      }
1841
0
    }
1842
1843
0
    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1844
0
      if (is_nontrans_global_motion(xd, xd->mi[0])) {
1845
0
        mbmi->interp_filters =
1846
0
            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1847
0
      }
1848
0
    }
1849
1850
0
    adjust_cost(cpi, x, &this_yrd);
1851
0
    adjust_rdcost(cpi, x, rd_stats);
1852
0
    adjust_rdcost(cpi, x, rd_stats_y);
1853
1854
0
    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1855
0
    if (mode_index == 0) {
1856
0
      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1857
0
    }
1858
0
    int64_t best_scaled_rd = best_rd;
1859
0
    int64_t this_scaled_rd = tmp_rd;
1860
0
    if (mode_index != 0)
1861
0
      increase_warp_mode_rd(&best_mbmi, mbmi, &best_scaled_rd, &this_scaled_rd,
1862
0
                            cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
1863
1864
0
    if (mode_index == 0 || this_scaled_rd < best_scaled_rd) {
1865
      // Update best_rd data if this is the best motion mode so far
1866
0
      best_mbmi = *mbmi;
1867
0
      best_rd = tmp_rd;
1868
0
      best_rd_stats = *rd_stats;
1869
0
      best_rd_stats_y = *rd_stats_y;
1870
0
      best_rate_mv = tmp_rate_mv;
1871
0
      *yrd = this_yrd;
1872
0
      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1873
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
1874
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1875
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1876
0
      best_xskip_txfm = mbmi->skip_txfm;
1877
0
    }
1878
0
  }
1879
  // Update RD and mbmi stats for selected motion mode
1880
0
  mbmi->ref_frame[1] = ref_frame_1;
1881
0
  *rate_mv = best_rate_mv;
1882
0
  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1883
0
    av1_invalid_rd_stats(rd_stats);
1884
0
    restore_dst_buf(xd, *orig_dst, num_planes);
1885
0
    return INT64_MAX;
1886
0
  }
1887
0
  *mbmi = best_mbmi;
1888
0
  *rd_stats = best_rd_stats;
1889
0
  *rd_stats_y = best_rd_stats_y;
1890
0
  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1891
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
1892
0
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1893
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1894
0
  txfm_info->skip_txfm = best_xskip_txfm;
1895
1896
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1897
0
  return 0;
1898
0
}
1899
1900
static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1901
                            MACROBLOCK *const x, BLOCK_SIZE bsize,
1902
0
                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
1903
0
  assert(bsize < BLOCK_SIZES_ALL);
1904
0
  const AV1_COMMON *cm = &cpi->common;
1905
0
  const int num_planes = av1_num_planes(cm);
1906
0
  MACROBLOCKD *const xd = &x->e_mbd;
1907
0
  const int mi_row = xd->mi_row;
1908
0
  const int mi_col = xd->mi_col;
1909
0
  int64_t total_sse = 0;
1910
0
  int64_t this_rd = INT64_MAX;
1911
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1912
0
  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1913
1914
0
  for (int plane = 0; plane < num_planes; ++plane) {
1915
    // Call av1_enc_build_inter_predictor() for one plane at a time.
1916
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1917
0
                                  plane, plane);
1918
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
1919
0
    const BLOCK_SIZE plane_bsize =
1920
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1921
1922
0
    av1_subtract_plane(x, plane_bsize, plane);
1923
1924
0
    int64_t sse =
1925
0
        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1926
0
    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1927
0
    sse <<= 4;
1928
0
    total_sse += sse;
1929
    // When current rd cost is more than the best rd, skip evaluation of
1930
    // remaining planes.
1931
0
    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1932
0
    if (this_rd > best_rd) break;
1933
0
  }
1934
1935
0
  rd_stats->dist = rd_stats->sse = total_sse;
1936
0
  rd_stats->rdcost = this_rd;
1937
1938
0
  restore_dst_buf(xd, *orig_dst, num_planes);
1939
0
  return 0;
1940
0
}
1941
1942
// Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1943
// mode
1944
// Note(rachelbarker): This speed feature currently does not interact correctly
1945
// with global motion. The issue is that, when global motion is used, GLOBALMV
1946
// produces a different prediction to NEARESTMV/NEARMV even if the motion
1947
// vectors are the same. Thus GLOBALMV should not be pruned in this case.
1948
static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1949
                                      int ref_idx,
1950
                                      const MV_REFERENCE_FRAME *ref_frame,
1951
0
                                      PREDICTION_MODE single_mode) {
1952
0
  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1953
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1954
0
  assert(single_mode != NEWMV);
1955
0
  if (single_mode == NEARESTMV) {
1956
0
    return 0;
1957
0
  } else if (single_mode == NEARMV) {
1958
    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1959
    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1960
0
    if (ref_mv_count < 2) return 1;
1961
0
  } else if (single_mode == GLOBALMV) {
1962
    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1963
0
    if (ref_mv_count == 0) return 1;
1964
    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1965
0
    else if (ref_mv_count == 1)
1966
0
      return 0;
1967
1968
0
    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1969
    // Check GLOBALMV is matching with any mv in ref_mv_stack
1970
0
    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1971
0
      int_mv this_mv;
1972
1973
0
      if (ref_idx == 0)
1974
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1975
0
      else
1976
0
        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1977
1978
0
      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1979
0
        return 1;
1980
0
    }
1981
0
  }
1982
0
  return 0;
1983
0
}
1984
1985
static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1986
                              int ref_idx, int ref_mv_idx,
1987
                              int skip_repeated_ref_mv,
1988
                              const MV_REFERENCE_FRAME *ref_frame,
1989
0
                              const MB_MODE_INFO_EXT *mbmi_ext) {
1990
0
  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1991
0
  assert(is_inter_singleref_mode(single_mode));
1992
0
  if (single_mode == NEWMV) {
1993
0
    this_mv->as_int = INVALID_MV;
1994
0
  } else if (single_mode == GLOBALMV) {
1995
0
    if (skip_repeated_ref_mv &&
1996
0
        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1997
0
      return 0;
1998
0
    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1999
0
  } else {
2000
0
    assert(single_mode == NEARMV || single_mode == NEARESTMV);
2001
0
    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2002
0
    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
2003
0
    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
2004
0
      assert(ref_mv_offset >= 0);
2005
0
      if (ref_idx == 0) {
2006
0
        *this_mv =
2007
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
2008
0
      } else {
2009
0
        *this_mv =
2010
0
            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
2011
0
      }
2012
0
    } else {
2013
0
      if (skip_repeated_ref_mv &&
2014
0
          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
2015
0
        return 0;
2016
0
      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
2017
0
    }
2018
0
  }
2019
0
  return 1;
2020
0
}
2021
2022
// Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
2023
// population
2024
static inline int skip_nearest_near_mv_using_refmv_weight(
2025
    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
2026
0
    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
2027
0
  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
2028
  // Do not skip the mode if the current block has not yet obtained a valid
2029
  // inter mode.
2030
0
  if (!is_inter_mode(best_mode)) return 0;
2031
2032
0
  const MACROBLOCKD *xd = &x->e_mbd;
2033
  // Do not skip the mode if both the top and left neighboring blocks are not
2034
  // available.
2035
0
  if (!xd->left_available || !xd->up_available) return 0;
2036
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2037
0
  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
2038
0
  const int ref_mv_count =
2039
0
      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
2040
2041
0
  if (ref_mv_count == 0) return 0;
2042
  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
2043
0
  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
2044
2045
  // Count number of ref mvs populated from nearest candidates
2046
0
  int nearest_refmv_count = 0;
2047
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
2048
0
    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
2049
0
  }
2050
2051
  // nearest_refmv_count indicates the closeness of block motion characteristics
2052
  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
2053
  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
2054
  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
2055
  // mode since these modes work well for blocks that shares similar motion
2056
  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
2057
  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
2058
  // mode is pruned if none of the ref mvs are populated from nearest candidate.
2059
0
  const int prune_thresh = 1 + (ref_mv_count >= 2);
2060
0
  if (nearest_refmv_count < prune_thresh) return 1;
2061
0
  return 0;
2062
0
}
2063
2064
// This function update the non-new mv for the current prediction mode
2065
static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
2066
                               const AV1_COMMON *cm, const MACROBLOCK *x,
2067
0
                               int skip_repeated_ref_mv) {
2068
0
  const MACROBLOCKD *xd = &x->e_mbd;
2069
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2070
0
  const int is_comp_pred = has_second_ref(mbmi);
2071
2072
0
  int ret = 1;
2073
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2074
0
    int_mv this_mv;
2075
0
    this_mv.as_int = INVALID_MV;
2076
0
    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
2077
0
                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
2078
0
    if (!ret) return 0;
2079
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
2080
0
    if (single_mode == NEWMV) {
2081
0
      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2082
0
      cur_mv[i] =
2083
0
          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2084
0
                         .this_mv
2085
0
                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
2086
0
                         .comp_mv;
2087
0
    } else {
2088
0
      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
2089
0
    }
2090
0
  }
2091
0
  return ret;
2092
0
}
2093
2094
static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
2095
                               const MB_MODE_INFO_EXT *mbmi_ext,
2096
                               const int (*const drl_mode_cost0)[2],
2097
0
                               int8_t ref_frame_type) {
2098
0
  int cost = 0;
2099
0
  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
2100
0
    for (int idx = 0; idx < 2; ++idx) {
2101
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2102
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2103
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
2104
0
        if (mbmi->ref_mv_idx == idx) return cost;
2105
0
      }
2106
0
    }
2107
0
    return cost;
2108
0
  }
2109
2110
0
  if (have_nearmv_in_inter_mode(mbmi->mode)) {
2111
0
    for (int idx = 1; idx < 3; ++idx) {
2112
0
      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
2113
0
        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
2114
0
        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
2115
0
        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
2116
0
      }
2117
0
    }
2118
0
    return cost;
2119
0
  }
2120
0
  return cost;
2121
0
}
2122
2123
static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
2124
                                        const MB_MODE_INFO *const mbmi,
2125
0
                                        PREDICTION_MODE this_mode) {
2126
0
  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
2127
0
    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
2128
0
    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
2129
0
    if (single_mode == NEWMV &&
2130
0
        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
2131
0
      return 0;
2132
0
    }
2133
0
  }
2134
0
  return 1;
2135
0
}
2136
2137
static int get_drl_refmv_count(const MACROBLOCK *const x,
2138
                               const MV_REFERENCE_FRAME *ref_frame,
2139
0
                               PREDICTION_MODE mode) {
2140
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2141
0
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
2142
0
  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
2143
0
  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
2144
0
  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
2145
0
  const int has_drl =
2146
0
      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
2147
0
  const int ref_set =
2148
0
      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
2149
2150
0
  return ref_set;
2151
0
}
2152
2153
// Checks if particular ref_mv_idx should be pruned.
2154
static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
2155
                                         const int qindex,
2156
0
                                         const int ref_mv_idx) {
2157
0
  if (reduce_inter_modes >= 3) return 1;
2158
  // Q-index logic based pruning is enabled only for
2159
  // reduce_inter_modes = 2.
2160
0
  assert(reduce_inter_modes == 2);
2161
  // When reduce_inter_modes=2, pruning happens as below based on q index.
2162
  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
2163
  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
2164
  // For q index range between 171 and 255: no pruning.
2165
0
  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
2166
0
  return (ref_mv_idx >= min_prune_ref_mv_idx);
2167
0
}
2168
2169
// Whether this reference motion vector can be skipped, based on initial
2170
// heuristics.
2171
static bool ref_mv_idx_early_breakout(
2172
    const SPEED_FEATURES *const sf,
2173
    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
2174
    const HandleInterModeArgs *const args, int64_t ref_best_rd,
2175
0
    int ref_mv_idx) {
2176
0
  MACROBLOCKD *xd = &x->e_mbd;
2177
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2178
0
  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2179
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2180
0
  const int is_comp_pred = has_second_ref(mbmi);
2181
0
  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
2182
0
    if (mbmi->ref_frame[0] == LAST2_FRAME ||
2183
0
        mbmi->ref_frame[0] == LAST3_FRAME ||
2184
0
        mbmi->ref_frame[1] == LAST2_FRAME ||
2185
0
        mbmi->ref_frame[1] == LAST3_FRAME) {
2186
0
      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2187
0
      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2188
0
          REF_CAT_LEVEL) {
2189
0
        return true;
2190
0
      }
2191
0
    }
2192
    // TODO(any): Experiment with reduce_inter_modes for compound prediction
2193
0
    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
2194
0
        have_newmv_in_inter_mode(mbmi->mode)) {
2195
0
      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
2196
0
          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
2197
0
        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
2198
0
        const int do_prune = prune_ref_mv_idx_using_qindex(
2199
0
            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
2200
0
        if (do_prune &&
2201
0
            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
2202
0
             REF_CAT_LEVEL)) {
2203
0
          return true;
2204
0
        }
2205
0
      }
2206
0
    }
2207
0
  }
2208
2209
0
  mbmi->ref_mv_idx = ref_mv_idx;
2210
0
  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
2211
0
    return true;
2212
0
  }
2213
0
  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
2214
0
  const int drl_cost = get_drl_cost(
2215
0
      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
2216
0
  est_rd_rate += drl_cost;
2217
0
  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
2218
0
      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2219
0
    return true;
2220
0
  }
2221
0
  return false;
2222
0
}
2223
2224
// Compute the estimated RD cost for the motion vector with simple translation.
2225
static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
2226
                                          RD_STATS *rd_stats,
2227
                                          HandleInterModeArgs *args,
2228
                                          int ref_mv_idx, int64_t ref_best_rd,
2229
0
                                          BLOCK_SIZE bsize) {
2230
0
  MACROBLOCKD *xd = &x->e_mbd;
2231
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2232
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2233
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2234
0
  const AV1_COMMON *cm = &cpi->common;
2235
0
  const int is_comp_pred = has_second_ref(mbmi);
2236
0
  const ModeCosts *mode_costs = &x->mode_costs;
2237
2238
0
  struct macroblockd_plane *p = xd->plane;
2239
0
  const BUFFER_SET orig_dst = {
2240
0
    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2241
0
    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2242
0
  };
2243
0
  av1_init_rd_stats(rd_stats);
2244
2245
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2246
0
  mbmi->comp_group_idx = 0;
2247
0
  mbmi->compound_idx = 1;
2248
0
  if (mbmi->ref_frame[1] == INTRA_FRAME) {
2249
0
    mbmi->ref_frame[1] = NONE_FRAME;
2250
0
  }
2251
0
  int16_t mode_ctx =
2252
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2253
2254
0
  mbmi->num_proj_ref = 0;
2255
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2256
0
  mbmi->ref_mv_idx = ref_mv_idx;
2257
2258
0
  rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
2259
0
  const int drl_cost =
2260
0
      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2261
0
  rd_stats->rate += drl_cost;
2262
2263
0
  int_mv cur_mv[2];
2264
0
  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2265
0
    return INT64_MAX;
2266
0
  }
2267
0
  assert(have_nearmv_in_inter_mode(mbmi->mode));
2268
0
  for (int i = 0; i < is_comp_pred + 1; ++i) {
2269
0
    mbmi->mv[i].as_int = cur_mv[i].as_int;
2270
0
  }
2271
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2272
0
  rd_stats->rate += ref_mv_cost;
2273
2274
0
  if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2275
0
    return INT64_MAX;
2276
0
  }
2277
2278
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
2279
0
  mbmi->num_proj_ref = 0;
2280
0
  if (is_comp_pred) {
2281
    // Only compound_average
2282
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2283
0
    mbmi->comp_group_idx = 0;
2284
0
    mbmi->compound_idx = 1;
2285
0
  }
2286
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2287
2288
0
  const int mi_row = xd->mi_row;
2289
0
  const int mi_col = xd->mi_col;
2290
0
  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2291
0
                                AOM_PLANE_Y, AOM_PLANE_Y);
2292
0
  int est_rate;
2293
0
  int64_t est_dist;
2294
0
  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2295
0
                                  NULL, NULL, NULL, NULL, NULL);
2296
0
  return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2297
0
}
2298
2299
// Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2300
// an integer. 0 for the i-th bit means that integer is excluded, 1 means
2301
// it is included.
2302
0
static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2303
2304
0
static inline bool mask_check_bit(int mask, int index) {
2305
0
  return (mask >> index) & 0x1;
2306
0
}
2307
2308
// Before performing the full MV search in handle_inter_mode, do a simple
2309
// translation search and see if we can eliminate any motion vectors.
2310
// Returns an integer where, if the i-th bit is set, it means that the i-th
2311
// motion vector should be searched. This is only set for NEAR_MV.
2312
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2313
                                RD_STATS *rd_stats,
2314
                                HandleInterModeArgs *const args,
2315
                                int64_t ref_best_rd, BLOCK_SIZE bsize,
2316
0
                                const int ref_set) {
2317
  // If the number of ref mv count is equal to 1, do not prune the same. It
2318
  // is better to evaluate the same than to prune it.
2319
0
  if (ref_set == 1) return 1;
2320
0
  AV1_COMMON *const cm = &cpi->common;
2321
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2322
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
2323
0
  const PREDICTION_MODE this_mode = mbmi->mode;
2324
2325
  // Only search indices if they have some chance of being good.
2326
0
  int good_indices = 0;
2327
0
  for (int i = 0; i < ref_set; ++i) {
2328
0
    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2329
0
                                  ref_best_rd, i)) {
2330
0
      continue;
2331
0
    }
2332
0
    mask_set_bit(&good_indices, i);
2333
0
  }
2334
2335
  // Only prune in NEARMV mode, if the speed feature is set, and the block size
2336
  // is large enough. If these conditions are not met, return all good indices
2337
  // found so far.
2338
0
  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2339
0
    return good_indices;
2340
0
  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2341
0
  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2342
  // Do not prune when there is internal resizing. TODO(elliottk) fix this
2343
  // so b/2384 can be resolved.
2344
0
  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2345
0
      (mbmi->ref_frame[1] > 0 &&
2346
0
       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2347
0
    return good_indices;
2348
0
  }
2349
2350
  // Calculate the RD cost for the motion vectors using simple translation.
2351
0
  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2352
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2353
    // If this index is bad, ignore it.
2354
0
    if (!mask_check_bit(good_indices, ref_mv_idx)) {
2355
0
      continue;
2356
0
    }
2357
0
    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2358
0
        cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2359
0
  }
2360
  // Find the index with the best RD cost.
2361
0
  int best_idx = 0;
2362
0
  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2363
0
    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2364
0
      best_idx = i;
2365
0
    }
2366
0
  }
2367
  // Only include indices that are good and within a % of the best.
2368
0
  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2369
  // If the simple translation cost is not within this multiple of the
2370
  // best RD, skip it. Note that the cutoff is derived experimentally.
2371
0
  const double ref_dth = 5;
2372
0
  int result = 0;
2373
0
  for (int i = 0; i < ref_set; ++i) {
2374
0
    if (mask_check_bit(good_indices, i) &&
2375
0
        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2376
0
        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2377
0
      mask_set_bit(&result, i);
2378
0
    }
2379
0
  }
2380
0
  return result;
2381
0
}
2382
2383
/*!\brief Motion mode information for inter mode search speedup.
2384
 *
2385
 * Used in a speed feature to search motion modes other than
2386
 * SIMPLE_TRANSLATION only on winning candidates.
2387
 */
2388
typedef struct motion_mode_candidate {
2389
  /*!
2390
   * Mode info for the motion mode candidate.
2391
   */
2392
  MB_MODE_INFO mbmi;
2393
  /*!
2394
   * Rate describing the cost of the motion vectors for this candidate.
2395
   */
2396
  int rate_mv;
2397
  /*!
2398
   * Rate before motion mode search and transform coding is applied.
2399
   */
2400
  int rate2_nocoeff;
2401
  /*!
2402
   * An integer value 0 or 1 which indicates whether or not to skip the motion
2403
   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2404
   * candidate.
2405
   */
2406
  int skip_motion_mode;
2407
  /*!
2408
   * Total RD cost for this candidate.
2409
   */
2410
  int64_t rd_cost;
2411
} motion_mode_candidate;
2412
2413
/*!\cond */
2414
typedef struct motion_mode_best_st_candidate {
2415
  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2416
  int num_motion_mode_cand;
2417
} motion_mode_best_st_candidate;
2418
2419
// Checks if the current reference frame matches with neighbouring block's
2420
// (top/left) reference frames
2421
static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2422
0
                                               MB_MODE_INFO *nb_mbmi) {
2423
0
  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2424
0
                                          nb_mbmi->ref_frame[1] };
2425
0
  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2426
0
                                           cur_mbmi->ref_frame[1] };
2427
0
  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2428
0
  int match_found = 0;
2429
2430
0
  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2431
0
    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2432
0
        (cur_ref_frames[i] == nb_ref_frames[1]))
2433
0
      match_found = 1;
2434
0
  }
2435
0
  return match_found;
2436
0
}
2437
2438
static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2439
0
                                              MACROBLOCKD *xd) {
2440
0
  if (!xd->up_available) return 1;
2441
0
  const int mi_col = xd->mi_col;
2442
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2443
  // prev_row_mi points into the mi array, starting at the beginning of the
2444
  // previous row.
2445
0
  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2446
0
  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2447
0
  uint8_t mi_step;
2448
0
  for (int above_mi_col = mi_col; above_mi_col < end_col;
2449
0
       above_mi_col += mi_step) {
2450
0
    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2451
0
    mi_step = mi_size_wide[above_mi[0]->bsize];
2452
0
    int match_found = 0;
2453
0
    if (is_inter_block(*above_mi))
2454
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2455
0
    if (match_found) return 1;
2456
0
  }
2457
0
  return 0;
2458
0
}
2459
2460
static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2461
0
                                             MACROBLOCKD *xd) {
2462
0
  if (!xd->left_available) return 1;
2463
0
  const int mi_row = xd->mi_row;
2464
0
  MB_MODE_INFO **cur_mbmi = xd->mi;
2465
  // prev_col_mi points into the mi array, starting at the top of the
2466
  // previous column
2467
0
  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2468
0
  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2469
0
  uint8_t mi_step;
2470
0
  for (int left_mi_row = mi_row; left_mi_row < end_row;
2471
0
       left_mi_row += mi_step) {
2472
0
    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2473
0
    mi_step = mi_size_high[left_mi[0]->bsize];
2474
0
    int match_found = 0;
2475
0
    if (is_inter_block(*left_mi))
2476
0
      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2477
0
    if (match_found) return 1;
2478
0
  }
2479
0
  return 0;
2480
0
}
2481
/*!\endcond */
2482
2483
/*! \brief Struct used to hold TPL data to
2484
 * narrow down parts of the inter mode search.
2485
 */
2486
typedef struct {
2487
  /*!
2488
   * The best inter cost out of all of the reference frames.
2489
   */
2490
  int64_t best_inter_cost;
2491
  /*!
2492
   * The inter cost for each reference frame.
2493
   */
2494
  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2495
} PruneInfoFromTpl;
2496
2497
#if !CONFIG_REALTIME_ONLY
2498
// TODO(Remya): Check if get_tpl_stats_b() can be reused
2499
static inline void get_block_level_tpl_stats(
2500
    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2501
0
    PruneInfoFromTpl *inter_cost_info_from_tpl) {
2502
0
  AV1_COMMON *const cm = &cpi->common;
2503
2504
0
  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2505
0
                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2506
0
  const int tpl_idx = cpi->gf_frame_index;
2507
0
  TplParams *const tpl_data = &cpi->ppi->tpl_data;
2508
0
  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2509
0
  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2510
0
  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2511
0
  const int mi_wide = mi_size_wide[bsize];
2512
0
  const int mi_high = mi_size_high[bsize];
2513
0
  const int tpl_stride = tpl_frame->stride;
2514
0
  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2515
0
  const int mi_col_sr =
2516
0
      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2517
0
  const int mi_col_end_sr =
2518
0
      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2519
0
  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2520
2521
0
  const int row_step = step;
2522
0
  const int col_step_sr =
2523
0
      coded_to_superres_mi(step, cm->superres_scale_denominator);
2524
0
  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2525
0
       row += row_step) {
2526
0
    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2527
0
         col += col_step_sr) {
2528
0
      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2529
0
          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2530
2531
      // Sums up the inter cost of corresponding ref frames
2532
0
      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2533
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2534
0
            this_stats->pred_error[ref_idx];
2535
0
      }
2536
0
    }
2537
0
  }
2538
2539
  // Computes the best inter cost (minimum inter_cost)
2540
0
  int64_t best_inter_cost = INT64_MAX;
2541
0
  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2542
0
    const int64_t cur_inter_cost =
2543
0
        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2544
    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2545
    // calculating the minimum inter_cost
2546
0
    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2547
0
        valid_refs[ref_idx])
2548
0
      best_inter_cost = cur_inter_cost;
2549
0
  }
2550
0
  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2551
0
}
2552
#endif
2553
2554
static inline int prune_modes_based_on_tpl_stats(
2555
    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2556
0
    const PREDICTION_MODE this_mode, int prune_mode_level) {
2557
0
  const int have_newmv = have_newmv_in_inter_mode(this_mode);
2558
0
  if ((prune_mode_level < 2) && have_newmv) return 0;
2559
2560
0
  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2561
0
  if (best_inter_cost == INT64_MAX) return 0;
2562
2563
0
  const int prune_level = prune_mode_level - 1;
2564
0
  int64_t cur_inter_cost;
2565
2566
0
  const int is_globalmv =
2567
0
      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2568
0
  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2569
2570
  // Thresholds used for pruning:
2571
  // Lower value indicates aggressive pruning and higher value indicates
2572
  // conservative pruning which is set based on ref_mv_idx and speed feature.
2573
  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2574
  // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2575
0
  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2576
0
    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2577
0
  };
2578
2579
0
  const int is_comp_pred = (refs[1] > INTRA_FRAME);
2580
0
  if (!is_comp_pred) {
2581
0
    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2582
0
  } else {
2583
0
    const int64_t inter_cost_ref0 =
2584
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2585
0
    const int64_t inter_cost_ref1 =
2586
0
        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2587
    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2588
    // more aggressive pruning
2589
0
    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2590
0
  }
2591
2592
  // Prune the mode if cur_inter_cost is greater than threshold times
2593
  // best_inter_cost
2594
0
  if (cur_inter_cost >
2595
0
      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2596
0
        best_inter_cost) >>
2597
0
       2))
2598
0
    return 1;
2599
0
  return 0;
2600
0
}
2601
2602
/*!\brief High level function to select parameters for compound mode.
2603
 *
2604
 * \ingroup inter_mode_search
2605
 * The main search functionality is done in the call to av1_compound_type_rd().
2606
 *
2607
 * \param[in]     cpi               Top-level encoder structure.
2608
 * \param[in]     x                 Pointer to struct holding all the data for
2609
 *                                  the current macroblock.
2610
 * \param[in]     args              HandleInterModeArgs struct holding
2611
 *                                  miscellaneous arguments for inter mode
2612
 *                                  search. See the documentation for this
2613
 *                                  struct for a description of each member.
2614
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2615
 *                                  It is used for early termination of this
2616
 *                                  search if the RD exceeds this value.
2617
 * \param[in,out] cur_mv            Current motion vector.
2618
 * \param[in]     bsize             Current block size.
2619
 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2620
                                    compound mode.
2621
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2622
 *                                  allocated buffers for the compound
2623
 *                                  predictors and masks in the compound type
2624
 *                                  search.
2625
 * \param[in,out] orig_dst          A prediction buffer to hold a computed
2626
 *                                  prediction. This will eventually hold the
2627
 *                                  final prediction, and the tmp_dst info will
2628
 *                                  be copied here.
2629
 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2630
 *                                  computed prediction.
2631
 * \param[in,out] rate_mv           The rate associated with the motion vectors.
2632
 *                                  This will be modified if a motion search is
2633
 *                                  done in the motion mode search.
2634
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2635
 *                                  information.
2636
 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2637
 *                                  best total RD for a skip mode so far, and
2638
 *                                  skip_rd[1] is the best RD for a skip mode so
2639
 *                                  far in luma. This is used as a speed feature
2640
 *                                  to skip the transform search if the computed
2641
 *                                  skip RD for the current mode is not better
2642
 *                                  than the best skip_rd so far.
2643
 * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2644
 *                                  predictor. If this is 0, the inter predictor
2645
 *                                  has already been built and thus we can avoid
2646
 *                                  repeating computation.
2647
 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2648
 * a viable candidate.
2649
 */
2650
static int process_compound_inter_mode(
2651
    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2652
    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2653
    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2654
    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2655
0
    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2656
0
  MACROBLOCKD *xd = &x->e_mbd;
2657
0
  MB_MODE_INFO *mbmi = xd->mi[0];
2658
0
  const AV1_COMMON *cm = &cpi->common;
2659
0
  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2660
0
                                   cm->seq_params->enable_masked_compound;
2661
0
  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2662
0
                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2663
2664
0
  const int num_planes = av1_num_planes(cm);
2665
0
  const int mi_row = xd->mi_row;
2666
0
  const int mi_col = xd->mi_col;
2667
0
  int is_luma_interp_done = 0;
2668
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
2669
2670
0
  int64_t best_rd_compound;
2671
0
  int64_t rd_thresh;
2672
0
  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2673
0
  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2674
0
  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2675
0
                                         comp_type_rd_scale);
2676
  // Select compound type and any parameters related to that type
2677
  // (for example, the mask parameters if it is a masked mode) and compute
2678
  // the RD
2679
0
  *compmode_interinter_cost = av1_compound_type_rd(
2680
0
      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2681
0
      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2682
0
      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2683
0
  if (ref_best_rd < INT64_MAX &&
2684
0
      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2685
0
          ref_best_rd) {
2686
0
    restore_dst_buf(xd, *orig_dst, num_planes);
2687
0
    return 1;
2688
0
  }
2689
2690
  // Build only uv predictor for COMPOUND_AVERAGE.
2691
  // Note there is no need to call av1_enc_build_inter_predictor
2692
  // for luma if COMPOUND_AVERAGE is selected because it is the first
2693
  // candidate in av1_compound_type_rd, which means it used the dst_buf
2694
  // rather than the tmp_buf.
2695
0
  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2696
0
    if (num_planes > 1) {
2697
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2698
0
                                    AOM_PLANE_U, num_planes - 1);
2699
0
    }
2700
0
    *skip_build_pred = 1;
2701
0
  }
2702
0
  return 0;
2703
0
}
2704
2705
// Speed feature to prune out MVs that are similar to previous MVs if they
2706
// don't achieve the best RD advantage.
2707
static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2708
                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2709
0
                                   MB_MODE_INFO *mbmi, int pruning_factor) {
2710
0
  int i;
2711
0
  const int is_comp_pred = has_second_ref(mbmi);
2712
0
  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2713
2714
  // Skip the evaluation if an MV match is found.
2715
0
  if (ref_mv_idx > 0) {
2716
0
    for (int idx = 0; idx < ref_mv_idx; ++idx) {
2717
0
      if (save_mv[idx][0].as_int == INVALID_MV) continue;
2718
2719
0
      int mv_diff = 0;
2720
0
      for (i = 0; i < 1 + is_comp_pred; ++i) {
2721
0
        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2722
0
                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2723
0
      }
2724
2725
      // If this mode is not the best one, and current MV is similar to
2726
      // previous stored MV, terminate this ref_mv_idx evaluation.
2727
0
      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2728
0
    }
2729
0
  }
2730
2731
0
  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2732
0
    for (i = 0; i < is_comp_pred + 1; ++i)
2733
0
      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2734
0
  }
2735
2736
0
  return 0;
2737
0
}
2738
2739
/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2740
 *
2741
 * \ingroup inter_mode_search
2742
 *
2743
 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2744
 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2745
 * Else returns 0.
2746
 *
2747
 * Note that the sse of here comes from single_motion_search. So it is
2748
 * interpolated with the filter in motion search, not the actual interpolation
2749
 * filter used in encoding.
2750
 *
2751
 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2752
 * \param[in]     x                 Pointer to struct holding all the data for
2753
 *                                  the current macroblock.
2754
 * \param[in]     bsize             The current block_size.
2755
 * \param[in]     args              The args to handle_inter_mode, used to track
2756
 *                                  the best SSE.
2757
 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2758
 *                                       prune_zero_mv_with_sse value
2759
 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2760
 */
2761
static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2762
                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
2763
                                         const HandleInterModeArgs *args,
2764
0
                                         int prune_zero_mv_with_sse) {
2765
0
  const MACROBLOCKD *xd = &x->e_mbd;
2766
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
2767
2768
0
  const int is_comp_pred = has_second_ref(mbmi);
2769
0
  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2770
2771
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2772
0
    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2773
      // Pruning logic only works for IDENTITY type models
2774
      // Note: In theory we could apply similar logic for TRANSLATION
2775
      // type models, but we do not code these due to a spec bug
2776
      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2777
0
      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2778
0
      return 0;
2779
0
    }
2780
2781
    // Don't prune if we have invalid data
2782
0
    assert(mbmi->mv[idx].as_int == 0);
2783
0
    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2784
0
      return 0;
2785
0
    }
2786
0
  }
2787
2788
  // Sum up the sse of ZEROMV and best NEWMV
2789
0
  unsigned int this_sse_sum = 0;
2790
0
  unsigned int best_sse_sum = 0;
2791
0
  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2792
0
    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2793
0
    const struct macroblockd_plane *pd = xd->plane;
2794
0
    const struct buf_2d *src_buf = &p->src;
2795
0
    const struct buf_2d *ref_buf = &pd->pre[idx];
2796
0
    const uint8_t *src = src_buf->buf;
2797
0
    const uint8_t *ref = ref_buf->buf;
2798
0
    const int src_stride = src_buf->stride;
2799
0
    const int ref_stride = ref_buf->stride;
2800
2801
0
    unsigned int this_sse;
2802
0
    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2803
0
    this_sse_sum += this_sse;
2804
2805
0
    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2806
0
    best_sse_sum += best_sse;
2807
0
  }
2808
2809
0
  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2810
0
  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2811
0
    return 1;
2812
0
  }
2813
2814
0
  return 0;
2815
0
}
2816
2817
/*!\brief Searches for interpolation filter in realtime mode during winner eval
2818
 *
2819
 * \ingroup inter_mode_search
2820
 *
2821
 * Does a simple interpolation filter search during winner mode evaluation. This
2822
 * is currently only used by realtime mode as \ref
2823
 * av1_interpolation_filter_search is not called during realtime encoding.
2824
 *
2825
 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2826
 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2827
 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2828
 *  *
2829
 * \param[in]     cpi               Pointer to the compressor. Used for feature
2830
 *                                  flags.
2831
 * \param[in,out] x                 Pointer to macroblock. This is primarily
2832
 *                                  used to access the buffers.
2833
 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2834
 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2835
 * \param[in]     bsize             The current block_size.
2836
 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2837
 */
2838
static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2839
                                      int mi_row, int mi_col,
2840
0
                                      BLOCK_SIZE bsize) {
2841
0
  static const InterpFilters filters_ref_set[3] = {
2842
0
    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2843
0
    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2844
0
    { MULTITAP_SHARP, MULTITAP_SHARP }
2845
0
  };
2846
2847
0
  const AV1_COMMON *const cm = &cpi->common;
2848
0
  MACROBLOCKD *const xd = &x->e_mbd;
2849
0
  MB_MODE_INFO *const mi = xd->mi[0];
2850
0
  int64_t best_cost = INT64_MAX;
2851
0
  int best_filter_index = -1;
2852
  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2853
0
  const int num_planes = av1_num_planes(cm);
2854
0
  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2855
0
  assert(is_inter_mode(mi->mode));
2856
0
  assert(mi->motion_mode == SIMPLE_TRANSLATION);
2857
0
  assert(!is_inter_compound_mode(mi->mode));
2858
2859
0
  if (!av1_is_interp_needed(xd)) {
2860
0
    return false;
2861
0
  }
2862
2863
0
  struct macroblockd_plane *pd = xd->plane;
2864
0
  const BUFFER_SET orig_dst = {
2865
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2866
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2867
0
  };
2868
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2869
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2870
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
2871
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2872
0
  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2873
2874
0
  for (int i = 0; i < 3; ++i) {
2875
0
    if (is_240p_or_lesser) {
2876
0
      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2877
0
        continue;
2878
0
      }
2879
0
    } else {
2880
0
      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2881
0
        continue;
2882
0
      }
2883
0
    }
2884
0
    int64_t cost;
2885
0
    RD_STATS tmp_rd = { 0 };
2886
2887
0
    mi->interp_filters.as_filters = filters_ref_set[i];
2888
0
    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2889
2890
0
    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2891
0
                       ? MODELRD_LEGACY
2892
0
                       : MODELRD_TYPE_INTERP_FILTER](
2893
0
        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2894
0
        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2895
2896
0
    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2897
0
                                           cm->seq_params->enable_dual_filter);
2898
0
    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2899
0
    if (cost < best_cost) {
2900
0
      best_filter_index = i;
2901
0
      best_cost = cost;
2902
0
      swap_dst_buf(xd, dst_bufs, num_planes);
2903
0
    }
2904
0
  }
2905
0
  assert(best_filter_index >= 0);
2906
2907
0
  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2908
2909
0
  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2910
2911
0
  if (is_best_pred_in_orig) {
2912
0
    swap_dst_buf(xd, dst_bufs, num_planes);
2913
0
  } else {
2914
    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2915
    // is_best_pred_in_orig is false, that means the current buffer is the
2916
    // original one.
2917
0
    assert(&orig_dst == dst_bufs[0]);
2918
0
    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2919
0
    const int width = block_size_wide[bsize];
2920
0
    const int height = block_size_high[bsize];
2921
0
#if CONFIG_AV1_HIGHBITDEPTH
2922
0
    const bool is_hbd = is_cur_buf_hbd(xd);
2923
0
    if (is_hbd) {
2924
0
      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2925
0
                               tmp_dst.stride[AOM_PLANE_Y],
2926
0
                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2927
0
                               orig_dst.stride[AOM_PLANE_Y], width, height);
2928
0
    } else {
2929
0
      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2930
0
                        orig_dst.plane[AOM_PLANE_Y],
2931
0
                        orig_dst.stride[AOM_PLANE_Y], width, height);
2932
0
    }
2933
#else
2934
    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2935
                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2936
                      width, height);
2937
#endif
2938
0
  }
2939
2940
  // Build the YUV predictor.
2941
0
  if (num_planes > 1) {
2942
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2943
0
                                  AOM_PLANE_U, AOM_PLANE_V);
2944
0
  }
2945
2946
0
  return true;
2947
0
}
2948
2949
/*!\brief AV1 inter mode RD computation
2950
 *
2951
 * \ingroup inter_mode_search
2952
 * Do the RD search for a given inter mode and compute all information relevant
2953
 * to the input mode. It will compute the best MV,
2954
 * compound parameters (if the mode is a compound mode) and interpolation filter
2955
 * parameters.
2956
 *
2957
 * \param[in]     cpi               Top-level encoder structure.
2958
 * \param[in]     tile_data         Pointer to struct holding adaptive
2959
 *                                  data/contexts/models for the tile during
2960
 *                                  encoding.
2961
 * \param[in]     x                 Pointer to structure holding all the data
2962
 *                                  for the current macroblock.
2963
 * \param[in]     bsize             Current block size.
2964
 * \param[in,out] rd_stats          Struct to keep track of the overall RD
2965
 *                                  information.
2966
 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2967
 *                                  for only the Y plane.
2968
 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2969
 *                                  for only the UV planes.
2970
 * \param[in]     args              HandleInterModeArgs struct holding
2971
 *                                  miscellaneous arguments for inter mode
2972
 *                                  search. See the documentation for this
2973
 *                                  struct for a description of each member.
2974
 * \param[in]     ref_best_rd       Best RD found so far for this block.
2975
 *                                  It is used for early termination of this
2976
 *                                  search if the RD exceeds this value.
2977
 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2978
 *                                  built in this search.
2979
 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2980
 *                                  allocated buffers for the compound
2981
 *                                  predictors and masks in the compound type
2982
 *                                  search.
2983
 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2984
 *                                  do_tx_search (see below) is 0.
2985
 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2986
 *                                  a full transform search. This will compute
2987
 *                                  an estimated RD for the modes without the
2988
 *                                  transform search and later perform the full
2989
 *                                  transform search on the best candidates.
2990
 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2991
 *                                  information to perform a full transform
2992
 *                                  search only on winning candidates searched
2993
 *                                  with an estimate for transform coding RD.
2994
 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2995
 *                                  motion mode information used in a speed
2996
 *                                  feature to search motion modes other than
2997
 *                                  SIMPLE_TRANSLATION only on winning
2998
 *                                  candidates.
2999
 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
3000
 *                                  best total RD for a skip mode so far, and
3001
 *                                  skip_rd[1] is the best RD for a skip mode so
3002
 *                                  far in luma. This is used as a speed feature
3003
 *                                  to skip the transform search if the computed
3004
 *                                  skip RD for the current mode is not better
3005
 *                                  than the best skip_rd so far.
3006
 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
3007
 *                                         narrow down the search based on data
3008
 *                                         collected in the TPL model.
3009
 * \param[out]    yrd               Stores the rdcost corresponding to encoding
3010
 *                                  the luma plane.
3011
 *
3012
 * \return The RD cost for the mode being searched.
3013
 */
3014
static int64_t handle_inter_mode(
3015
    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
3016
    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
3017
    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
3018
    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
3019
    int64_t *best_est_rd, const int do_tx_search,
3020
    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
3021
    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
3022
0
    int64_t *yrd) {
3023
0
  const AV1_COMMON *cm = &cpi->common;
3024
0
  const int num_planes = av1_num_planes(cm);
3025
0
  MACROBLOCKD *xd = &x->e_mbd;
3026
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3027
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3028
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3029
0
  const int is_comp_pred = has_second_ref(mbmi);
3030
0
  const PREDICTION_MODE this_mode = mbmi->mode;
3031
3032
#if CONFIG_REALTIME_ONLY
3033
  const int prune_modes_based_on_tpl = 0;
3034
#else   // CONFIG_REALTIME_ONLY
3035
0
  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
3036
0
  const int prune_modes_based_on_tpl =
3037
0
      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
3038
0
      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
3039
0
#endif  // CONFIG_REALTIME_ONLY
3040
0
  int i;
3041
  // Reference frames for this mode
3042
0
  const int refs[2] = { mbmi->ref_frame[0],
3043
0
                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3044
0
  int rate_mv = 0;
3045
0
  int64_t rd = INT64_MAX;
3046
  // Do first prediction into the destination buffer. Do the next
3047
  // prediction into a temporary buffer. Then keep track of which one
3048
  // of these currently holds the best predictor, and use the other
3049
  // one for future predictions. In the end, copy from tmp_buf to
3050
  // dst if necessary.
3051
0
  struct macroblockd_plane *pd = xd->plane;
3052
0
  const BUFFER_SET orig_dst = {
3053
0
    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
3054
0
    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
3055
0
  };
3056
0
  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
3057
0
                                 tmp_buf + 2 * MAX_SB_SQUARE },
3058
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
3059
3060
0
  int64_t ret_val = INT64_MAX;
3061
0
  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3062
0
  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
3063
0
  int64_t best_rd = INT64_MAX;
3064
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
3065
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3066
0
  int64_t best_yrd = INT64_MAX;
3067
0
  MB_MODE_INFO best_mbmi = *mbmi;
3068
0
  int best_xskip_txfm = 0;
3069
0
  int64_t newmv_ret_val = INT64_MAX;
3070
0
  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
3071
3072
  // Do not prune the mode based on inter cost from tpl if the current ref frame
3073
  // is the winner ref in neighbouring blocks.
3074
0
  int ref_match_found_in_above_nb = 0;
3075
0
  int ref_match_found_in_left_nb = 0;
3076
0
  if (prune_modes_based_on_tpl) {
3077
0
    ref_match_found_in_above_nb =
3078
0
        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
3079
0
    ref_match_found_in_left_nb =
3080
0
        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
3081
0
  }
3082
3083
  // First, perform a simple translation search for each of the indices. If
3084
  // an index performs well, it will be fully searched in the main loop
3085
  // of this function.
3086
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
3087
  // Save MV results from first 2 ref_mv_idx.
3088
0
  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
3089
0
  int best_ref_mv_idx = -1;
3090
0
  const int idx_mask =
3091
0
      ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
3092
0
  const int16_t mode_ctx =
3093
0
      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
3094
0
  const ModeCosts *mode_costs = &x->mode_costs;
3095
0
  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
3096
0
  const int base_rate =
3097
0
      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
3098
3099
0
  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
3100
0
    save_mv[i][0].as_int = INVALID_MV;
3101
0
    save_mv[i][1].as_int = INVALID_MV;
3102
0
  }
3103
0
  args->start_mv_cnt = 0;
3104
3105
  // Main loop of this function. This will  iterate over all of the ref mvs
3106
  // in the dynamic reference list and do the following:
3107
  //    1.) Get the current MV. Create newmv MV if necessary
3108
  //    2.) Search compound type and parameters if applicable
3109
  //    3.) Do interpolation filter search
3110
  //    4.) Build the inter predictor
3111
  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
3112
  //        WARPED_CAUSAL)
3113
  //    6.) Update stats if best so far
3114
0
  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
3115
0
    mbmi->ref_mv_idx = ref_mv_idx;
3116
3117
0
    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
3118
0
    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
3119
0
    const int drl_cost = get_drl_cost(
3120
0
        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
3121
0
    mode_info[ref_mv_idx].drl_cost = drl_cost;
3122
0
    mode_info[ref_mv_idx].skip = 0;
3123
3124
0
    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
3125
      // MV did not perform well in simple translation search. Skip it.
3126
0
      continue;
3127
0
    }
3128
0
    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
3129
0
        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
3130
      // Skip mode if TPL model indicates it will not be beneficial.
3131
0
      if (prune_modes_based_on_tpl_stats(
3132
0
              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
3133
0
              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
3134
0
        continue;
3135
0
    }
3136
0
    av1_init_rd_stats(rd_stats);
3137
3138
    // Initialize compound mode data
3139
0
    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3140
0
    mbmi->comp_group_idx = 0;
3141
0
    mbmi->compound_idx = 1;
3142
0
    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
3143
3144
0
    mbmi->num_proj_ref = 0;
3145
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3146
3147
    // Compute cost for signalling this DRL index
3148
0
    rd_stats->rate = base_rate;
3149
0
    rd_stats->rate += drl_cost;
3150
3151
0
    int rs = 0;
3152
0
    int compmode_interinter_cost = 0;
3153
3154
0
    int_mv cur_mv[2];
3155
3156
    // TODO(Cherma): Extend this speed feature to support compound mode
3157
0
    int skip_repeated_ref_mv =
3158
0
        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
3159
    // Generate the current mv according to the prediction mode
3160
0
    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
3161
0
      continue;
3162
0
    }
3163
3164
    // The above call to build_cur_mv does not handle NEWMV modes. Build
3165
    // the mv here if we have NEWMV for any predictors.
3166
0
    if (have_newmv_in_inter_mode(this_mode)) {
3167
#if CONFIG_COLLECT_COMPONENT_TIMING
3168
      start_timing(cpi, handle_newmv_time);
3169
#endif
3170
0
      newmv_ret_val =
3171
0
          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
3172
#if CONFIG_COLLECT_COMPONENT_TIMING
3173
      end_timing(cpi, handle_newmv_time);
3174
#endif
3175
3176
0
      if (newmv_ret_val != 0) continue;
3177
3178
0
      if (is_inter_singleref_mode(this_mode) &&
3179
0
          cur_mv[0].as_int != INVALID_MV) {
3180
0
        const MV_REFERENCE_FRAME ref = refs[0];
3181
0
        const unsigned int this_sse = x->pred_sse[ref];
3182
0
        if (this_sse < args->best_single_sse_in_refs[ref]) {
3183
0
          args->best_single_sse_in_refs[ref] = this_sse;
3184
0
        }
3185
3186
0
        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
3187
0
          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
3188
0
          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
3189
0
          const double scale_factor[3][11] = {
3190
0
            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
3191
0
            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
3192
0
            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
3193
0
          };
3194
0
          assert(pix_idx >= 0);
3195
0
          assert(th_idx <= 2);
3196
0
          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
3197
0
            continue;
3198
0
        }
3199
0
      }
3200
3201
0
      rd_stats->rate += rate_mv;
3202
0
    }
3203
    // Copy the motion vector for this mode into mbmi struct
3204
0
    for (i = 0; i < is_comp_pred + 1; ++i) {
3205
0
      mbmi->mv[i].as_int = cur_mv[i].as_int;
3206
0
    }
3207
3208
0
    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
3209
0
        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
3210
0
      continue;
3211
0
    }
3212
3213
    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
3214
    // is enabled, and the current MV is similar to a previous one.
3215
0
    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
3216
0
        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
3217
0
                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
3218
0
      continue;
3219
3220
0
    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
3221
0
        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
3222
0
      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
3223
0
                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
3224
0
        continue;
3225
0
      }
3226
0
    }
3227
3228
0
    int skip_build_pred = 0;
3229
0
    const int mi_row = xd->mi_row;
3230
0
    const int mi_col = xd->mi_col;
3231
3232
    // Handle a compound predictor, continue if it is determined this
3233
    // cannot be the best compound mode
3234
0
    if (is_comp_pred) {
3235
#if CONFIG_COLLECT_COMPONENT_TIMING
3236
      start_timing(cpi, compound_type_rd_time);
3237
#endif
3238
0
      const int not_best_mode = process_compound_inter_mode(
3239
0
          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
3240
0
          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
3241
0
          &skip_build_pred);
3242
#if CONFIG_COLLECT_COMPONENT_TIMING
3243
      end_timing(cpi, compound_type_rd_time);
3244
#endif
3245
0
      if (not_best_mode) continue;
3246
0
    }
3247
3248
0
    if (!args->skip_ifs) {
3249
#if CONFIG_COLLECT_COMPONENT_TIMING
3250
      start_timing(cpi, interpolation_filter_search_time);
3251
#endif
3252
      // Determine the interpolation filter for this mode
3253
0
      ret_val = av1_interpolation_filter_search(
3254
0
          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
3255
0
          &skip_build_pred, args, ref_best_rd);
3256
#if CONFIG_COLLECT_COMPONENT_TIMING
3257
      end_timing(cpi, interpolation_filter_search_time);
3258
#endif
3259
0
      if (args->modelled_rd != NULL && !is_comp_pred) {
3260
0
        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3261
0
      }
3262
0
      if (ret_val != 0) {
3263
0
        restore_dst_buf(xd, orig_dst, num_planes);
3264
0
        continue;
3265
0
      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3266
0
                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3267
0
        restore_dst_buf(xd, orig_dst, num_planes);
3268
0
        continue;
3269
0
      }
3270
3271
      // Compute modelled RD if enabled
3272
0
      if (args->modelled_rd != NULL) {
3273
0
        if (is_comp_pred) {
3274
0
          const int mode0 = compound_ref0_mode(this_mode);
3275
0
          const int mode1 = compound_ref1_mode(this_mode);
3276
0
          const int64_t mrd =
3277
0
              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3278
0
                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3279
0
          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3280
0
            restore_dst_buf(xd, orig_dst, num_planes);
3281
0
            continue;
3282
0
          }
3283
0
        }
3284
0
      }
3285
0
    }
3286
3287
0
    rd_stats->rate += compmode_interinter_cost;
3288
0
    if (skip_build_pred != 1) {
3289
      // Build this inter predictor if it has not been previously built
3290
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3291
0
                                    av1_num_planes(cm) - 1);
3292
0
    }
3293
3294
#if CONFIG_COLLECT_COMPONENT_TIMING
3295
    start_timing(cpi, motion_mode_rd_time);
3296
#endif
3297
0
    int rate2_nocoeff = rd_stats->rate;
3298
    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3299
    // OBMC_CAUSAL or WARPED_CAUSAL
3300
0
    int64_t this_yrd;
3301
0
    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3302
0
                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3303
0
                             &orig_dst, best_est_rd, do_tx_search,
3304
0
                             inter_modes_info, 0, &this_yrd);
3305
#if CONFIG_COLLECT_COMPONENT_TIMING
3306
    end_timing(cpi, motion_mode_rd_time);
3307
#endif
3308
0
    assert(
3309
0
        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3310
3311
0
    if (ret_val != INT64_MAX) {
3312
0
      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3313
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
3314
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3315
      // Collect mode stats for multiwinner mode processing
3316
0
      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3317
0
                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3318
0
                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
3319
0
                              do_tx_search);
3320
0
      if (tmp_rd < best_rd) {
3321
0
        best_yrd = this_yrd;
3322
        // Update the best rd stats if we found the best mode so far
3323
0
        best_rd_stats = *rd_stats;
3324
0
        best_rd_stats_y = *rd_stats_y;
3325
0
        best_rd_stats_uv = *rd_stats_uv;
3326
0
        best_rd = tmp_rd;
3327
0
        best_mbmi = *mbmi;
3328
0
        best_xskip_txfm = txfm_info->skip_txfm;
3329
0
        memcpy(best_blk_skip, txfm_info->blk_skip,
3330
0
               sizeof(best_blk_skip[0]) * xd->height * xd->width);
3331
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map,
3332
0
                       xd->height * xd->width);
3333
0
        motion_mode_cand->rate_mv = rate_mv;
3334
0
        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3335
0
      }
3336
3337
0
      if (tmp_rd < ref_best_rd) {
3338
0
        ref_best_rd = tmp_rd;
3339
0
        best_ref_mv_idx = ref_mv_idx;
3340
0
      }
3341
0
    }
3342
0
    restore_dst_buf(xd, orig_dst, num_planes);
3343
0
  }
3344
3345
0
  if (best_rd == INT64_MAX) return INT64_MAX;
3346
3347
  // re-instate status of the best choice
3348
0
  *rd_stats = best_rd_stats;
3349
0
  *rd_stats_y = best_rd_stats_y;
3350
0
  *rd_stats_uv = best_rd_stats_uv;
3351
0
  *yrd = best_yrd;
3352
0
  *mbmi = best_mbmi;
3353
0
  txfm_info->skip_txfm = best_xskip_txfm;
3354
0
  assert(IMPLIES(mbmi->comp_group_idx == 1,
3355
0
                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3356
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
3357
0
         sizeof(best_blk_skip[0]) * xd->height * xd->width);
3358
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3359
3360
0
  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3361
3362
0
  return rd_stats->rdcost;
3363
0
}
3364
3365
/*!\brief Search for the best intrabc predictor
3366
 *
3367
 * \ingroup intra_mode_search
3368
 * \callergraph
3369
 * This function performs a motion search to find the best intrabc predictor.
3370
 *
3371
 * \returns Returns the best overall rdcost (including the non-intrabc modes
3372
 * search before this function).
3373
 */
3374
static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3375
                                       PICK_MODE_CONTEXT *ctx,
3376
                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
3377
0
                                       int64_t best_rd) {
3378
0
  const AV1_COMMON *const cm = &cpi->common;
3379
0
  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3380
0
      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3381
0
    return INT64_MAX;
3382
0
  if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 &&
3383
0
      bsize != BLOCK_8X8 && bsize != BLOCK_16X16) {
3384
0
    return INT64_MAX;
3385
0
  }
3386
0
  const int num_planes = av1_num_planes(cm);
3387
3388
0
  MACROBLOCKD *const xd = &x->e_mbd;
3389
0
  const TileInfo *tile = &xd->tile;
3390
0
  MB_MODE_INFO *mbmi = xd->mi[0];
3391
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3392
3393
0
  const int mi_row = xd->mi_row;
3394
0
  const int mi_col = xd->mi_col;
3395
0
  const int w = block_size_wide[bsize];
3396
0
  const int h = block_size_high[bsize];
3397
0
  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3398
0
  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3399
3400
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3401
0
  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3402
0
  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3403
0
                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3404
0
                   mbmi_ext->mode_context);
3405
  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3406
  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3407
0
  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3408
0
  int_mv nearestmv, nearmv;
3409
0
  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3410
0
                                   0);
3411
3412
0
  if (nearestmv.as_int == INVALID_MV) {
3413
0
    nearestmv.as_int = 0;
3414
0
  }
3415
0
  if (nearmv.as_int == INVALID_MV) {
3416
0
    nearmv.as_int = 0;
3417
0
  }
3418
3419
0
  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3420
0
  if (dv_ref.as_int == 0) {
3421
0
    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3422
0
  }
3423
  // Ref DV should not have sub-pel.
3424
0
  assert((dv_ref.as_mv.col & 7) == 0);
3425
0
  assert((dv_ref.as_mv.row & 7) == 0);
3426
0
  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3427
3428
0
  struct buf_2d yv12_mb[MAX_MB_PLANE];
3429
0
  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3430
0
  for (int i = 0; i < num_planes; ++i) {
3431
0
    xd->plane[i].pre[0] = yv12_mb[i];
3432
0
  }
3433
3434
0
  enum IntrabcMotionDirection {
3435
0
    IBC_MOTION_ABOVE,
3436
0
    IBC_MOTION_LEFT,
3437
0
    IBC_MOTION_DIRECTIONS
3438
0
  };
3439
3440
0
  MB_MODE_INFO best_mbmi = *mbmi;
3441
0
  RD_STATS best_rdstats = *rd_stats;
3442
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3443
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3444
0
  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3445
3446
0
  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3447
0
  const SEARCH_METHODS search_method =
3448
0
      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3449
0
  const search_site_config *lookahead_search_sites =
3450
0
      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3451
0
  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3452
0
  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3453
0
                                     &dv_ref.as_mv, start_mv,
3454
0
                                     lookahead_search_sites, search_method,
3455
0
                                     /*fine_search_interval=*/0);
3456
0
  const IntraBCMVCosts *const dv_costs = x->dv_costs;
3457
0
  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3458
3459
0
  const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level
3460
0
                                                  ? IBC_MOTION_LEFT
3461
0
                                                  : IBC_MOTION_DIRECTIONS;
3462
3463
0
  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir;
3464
0
       ++dir) {
3465
0
    switch (dir) {
3466
0
      case IBC_MOTION_ABOVE:
3467
0
        fullms_params.mv_limits.col_min =
3468
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3469
0
        fullms_params.mv_limits.col_max =
3470
0
            (tile->mi_col_end - mi_col) * MI_SIZE - w;
3471
0
        fullms_params.mv_limits.row_min =
3472
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3473
0
        fullms_params.mv_limits.row_max =
3474
0
            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3475
0
        break;
3476
0
      case IBC_MOTION_LEFT:
3477
0
        fullms_params.mv_limits.col_min =
3478
0
            (tile->mi_col_start - mi_col) * MI_SIZE;
3479
0
        fullms_params.mv_limits.col_max =
3480
0
            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3481
        // TODO(aconverse@google.com): Minimize the overlap between above and
3482
        // left areas.
3483
0
        fullms_params.mv_limits.row_min =
3484
0
            (tile->mi_row_start - mi_row) * MI_SIZE;
3485
0
        int bottom_coded_mi_edge =
3486
0
            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3487
0
        fullms_params.mv_limits.row_max =
3488
0
            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3489
0
        break;
3490
0
      default: assert(0);
3491
0
    }
3492
0
    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3493
0
    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3494
0
    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3495
0
    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3496
3497
0
    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3498
3499
0
    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3500
0
        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3501
0
      continue;
3502
0
    }
3503
3504
0
    const int step_param = cpi->mv_search_params.mv_step_param;
3505
0
    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3506
0
    int_mv best_mv;
3507
0
    FULLPEL_MV_STATS best_mv_stats;
3508
0
    int bestsme = INT_MAX;
3509
3510
    // Perform a hash search first, and see if we get any matches.
3511
0
    if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
3512
0
      bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params,
3513
0
                                        intrabc_hash_info, &best_mv.as_fullmv);
3514
0
    }
3515
3516
    // If intrabc_search_level is not 0 and we found a hash search match, do
3517
    // not proceed with pixel search as the hash match is very likely to be the
3518
    // best intrabc candidate anyway.
3519
0
    if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) {
3520
0
      int_mv best_pixel_mv;
3521
0
      const int pixelsme =
3522
0
          av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3523
0
                                &best_pixel_mv.as_fullmv, &best_mv_stats, NULL);
3524
0
      if (pixelsme < bestsme) {
3525
0
        bestsme = pixelsme;
3526
0
        best_mv = best_pixel_mv;
3527
0
      }
3528
0
    }
3529
0
    if (bestsme == INT_MAX) continue;
3530
0
    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3531
0
    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3532
0
                                get_fullmv_from_mv(&dv)))
3533
0
      continue;
3534
0
    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3535
0
                         cm->seq_params->mib_size_log2))
3536
0
      continue;
3537
3538
    // DV should not have sub-pel.
3539
0
    assert((dv.col & 7) == 0);
3540
0
    assert((dv.row & 7) == 0);
3541
0
    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3542
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
3543
0
    mbmi->use_intrabc = 1;
3544
0
    mbmi->mode = DC_PRED;
3545
0
    mbmi->uv_mode = UV_DC_PRED;
3546
0
    mbmi->motion_mode = SIMPLE_TRANSLATION;
3547
0
    mbmi->mv[0].as_mv = dv;
3548
0
    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3549
0
    mbmi->skip_txfm = 0;
3550
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3551
0
                                  av1_num_planes(cm) - 1);
3552
3553
    // TODO(aconverse@google.com): The full motion field defining discount
3554
    // in MV_COST_WEIGHT is too large. Explore other values.
3555
0
    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3556
0
                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3557
0
    const int rate_mode = x->mode_costs.intrabc_cost[1];
3558
0
    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3559
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3560
0
                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3561
0
      continue;
3562
0
    rd_stats_yuv.rdcost =
3563
0
        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3564
0
    if (rd_stats_yuv.rdcost < best_rd) {
3565
0
      best_rd = rd_stats_yuv.rdcost;
3566
0
      best_mbmi = *mbmi;
3567
0
      best_rdstats = rd_stats_yuv;
3568
0
      memcpy(best_blk_skip, txfm_info->blk_skip,
3569
0
             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3570
0
      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3571
0
    }
3572
0
  }
3573
0
  *mbmi = best_mbmi;
3574
0
  *rd_stats = best_rdstats;
3575
0
  memcpy(txfm_info->blk_skip, best_blk_skip,
3576
0
         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3577
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3578
#if CONFIG_RD_DEBUG
3579
  mbmi->rd_stats = *rd_stats;
3580
#endif
3581
0
  return best_rd;
3582
0
}
3583
3584
// TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3585
// typedef here because Doxygen doesn't know about the typedefs yet. So using
3586
// the typedef will prevent doxygen from finding this function and generating
3587
// the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3588
// doxygen, we can revert back to using the typedefs.
3589
void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3590
                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3591
0
                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3592
0
  const AV1_COMMON *const cm = &cpi->common;
3593
0
  MACROBLOCKD *const xd = &x->e_mbd;
3594
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3595
0
  const int num_planes = av1_num_planes(cm);
3596
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3597
0
  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3598
0
  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3599
0
  int64_t dist_y = 0, dist_uv = 0;
3600
3601
0
  ctx->rd_stats.skip_txfm = 0;
3602
0
  mbmi->ref_frame[0] = INTRA_FRAME;
3603
0
  mbmi->ref_frame[1] = NONE_FRAME;
3604
0
  mbmi->use_intrabc = 0;
3605
0
  mbmi->mv[0].as_int = 0;
3606
0
  mbmi->skip_mode = 0;
3607
3608
0
  const int64_t intra_yrd =
3609
0
      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3610
0
                                 &y_skip_txfm, bsize, best_rd, ctx);
3611
3612
  // Initialize default mode evaluation params
3613
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3614
3615
0
  if (intra_yrd < best_rd) {
3616
    // Search intra modes for uv planes if needed
3617
0
    if (num_planes > 1) {
3618
      // Set up the tx variables for reproducing the y predictions in case we
3619
      // need it for chroma-from-luma.
3620
0
      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3621
0
        memcpy(txfm_info->blk_skip, ctx->blk_skip,
3622
0
               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3623
0
        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3624
0
      }
3625
0
      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3626
0
      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3627
0
                                  &dist_uv, &uv_skip_txfm, bsize,
3628
0
                                  max_uv_tx_size);
3629
0
    }
3630
3631
    // Intra block is always coded as non-skip
3632
0
    rd_cost->rate =
3633
0
        rate_y + rate_uv +
3634
0
        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3635
0
    rd_cost->dist = dist_y + dist_uv;
3636
0
    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3637
0
    rd_cost->skip_txfm = 0;
3638
0
  } else {
3639
0
    rd_cost->rate = INT_MAX;
3640
0
  }
3641
3642
0
  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3643
0
    best_rd = rd_cost->rdcost;
3644
0
  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3645
0
    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3646
0
    memcpy(ctx->blk_skip, txfm_info->blk_skip,
3647
0
           sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3648
0
    assert(rd_cost->rate != INT_MAX);
3649
0
  }
3650
0
  if (rd_cost->rate == INT_MAX) return;
3651
3652
0
  ctx->mic = *xd->mi[0];
3653
0
  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3654
0
                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
3655
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3656
0
}
3657
3658
static inline void calc_target_weighted_pred(
3659
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3660
    const uint8_t *above, int above_stride, const uint8_t *left,
3661
    int left_stride);
3662
3663
static inline void rd_pick_skip_mode(
3664
    RD_STATS *rd_cost, InterModeSearchState *search_state,
3665
    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3666
0
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3667
0
  const AV1_COMMON *const cm = &cpi->common;
3668
0
  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3669
0
  const int num_planes = av1_num_planes(cm);
3670
0
  MACROBLOCKD *const xd = &x->e_mbd;
3671
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3672
3673
0
  x->compound_idx = 1;  // COMPOUND_AVERAGE
3674
0
  RD_STATS skip_mode_rd_stats;
3675
0
  av1_invalid_rd_stats(&skip_mode_rd_stats);
3676
3677
0
  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3678
0
      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3679
0
    return;
3680
0
  }
3681
3682
0
  const MV_REFERENCE_FRAME ref_frame =
3683
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3684
0
  const MV_REFERENCE_FRAME second_ref_frame =
3685
0
      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3686
0
  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3687
0
  const THR_MODES mode_index =
3688
0
      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3689
3690
0
  if (mode_index == THR_INVALID) {
3691
0
    return;
3692
0
  }
3693
3694
0
  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3695
0
       cpi->sf.inter_sf.disable_onesided_comp) &&
3696
0
      cpi->all_one_sided_refs) {
3697
0
    return;
3698
0
  }
3699
3700
0
  mbmi->mode = this_mode;
3701
0
  mbmi->uv_mode = UV_DC_PRED;
3702
0
  mbmi->ref_frame[0] = ref_frame;
3703
0
  mbmi->ref_frame[1] = second_ref_frame;
3704
0
  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3705
0
  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3706
0
    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3707
0
    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3708
0
        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3709
0
      return;
3710
0
    }
3711
0
    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3712
0
                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3713
0
                     mbmi_ext->mode_context);
3714
    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3715
    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3716
0
    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3717
0
  }
3718
3719
0
  assert(this_mode == NEAREST_NEARESTMV);
3720
0
  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3721
0
    return;
3722
0
  }
3723
3724
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
3725
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3726
0
  mbmi->comp_group_idx = 0;
3727
0
  mbmi->compound_idx = x->compound_idx;
3728
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3729
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
3730
0
  mbmi->ref_mv_idx = 0;
3731
0
  mbmi->skip_mode = mbmi->skip_txfm = 1;
3732
0
  mbmi->palette_mode_info.palette_size[0] = 0;
3733
0
  mbmi->palette_mode_info.palette_size[1] = 0;
3734
3735
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
3736
3737
0
  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3738
0
  for (int i = 0; i < num_planes; i++) {
3739
0
    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3740
0
    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3741
0
  }
3742
3743
0
  BUFFER_SET orig_dst;
3744
0
  for (int i = 0; i < num_planes; i++) {
3745
0
    orig_dst.plane[i] = xd->plane[i].dst.buf;
3746
0
    orig_dst.stride[i] = xd->plane[i].dst.stride;
3747
0
  }
3748
3749
  // Compare the use of skip_mode with the best intra/inter mode obtained.
3750
0
  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3751
0
  int64_t best_intra_inter_mode_cost = INT64_MAX;
3752
0
  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3753
0
    const ModeCosts *mode_costs = &x->mode_costs;
3754
0
    best_intra_inter_mode_cost = RDCOST(
3755
0
        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3756
0
        rd_cost->dist);
3757
    // Account for non-skip mode rate in total rd stats
3758
0
    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3759
0
    av1_rd_cost_update(x->rdmult, rd_cost);
3760
0
  }
3761
3762
  // Obtain the rdcost for skip_mode.
3763
0
  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3764
0
               best_intra_inter_mode_cost);
3765
3766
0
  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3767
0
      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3768
0
    assert(mode_index != THR_INVALID);
3769
0
    search_state->best_mbmode.skip_mode = 1;
3770
0
    search_state->best_mbmode = *mbmi;
3771
0
    memset(search_state->best_mbmode.inter_tx_size,
3772
0
           search_state->best_mbmode.tx_size,
3773
0
           sizeof(search_state->best_mbmode.inter_tx_size));
3774
0
    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3775
0
                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3776
0
                  xd);
3777
0
    search_state->best_mode_index = mode_index;
3778
3779
    // Update rd_cost
3780
0
    rd_cost->rate = skip_mode_rd_stats.rate;
3781
0
    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3782
0
    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3783
3784
0
    search_state->best_rd = rd_cost->rdcost;
3785
0
    search_state->best_skip2 = 1;
3786
0
    search_state->best_mode_skippable = 1;
3787
3788
0
    x->txfm_search_info.skip_txfm = 1;
3789
0
  }
3790
0
}
3791
3792
// Get winner mode stats of given mode index
3793
static inline MB_MODE_INFO *get_winner_mode_stats(
3794
    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3795
    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3796
    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3797
    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3798
0
    int mode_idx) {
3799
0
  MB_MODE_INFO *winner_mbmi;
3800
0
  if (multi_winner_mode_type) {
3801
0
    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3802
0
    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3803
0
    winner_mbmi = &winner_mode_stat->mbmi;
3804
3805
0
    *winner_rd_cost = &winner_mode_stat->rd_cost;
3806
0
    *winner_rate_y = winner_mode_stat->rate_y;
3807
0
    *winner_rate_uv = winner_mode_stat->rate_uv;
3808
0
    *winner_mode_index = winner_mode_stat->mode_index;
3809
0
  } else {
3810
0
    winner_mbmi = best_mbmode;
3811
0
    *winner_rd_cost = best_rd_cost;
3812
0
    *winner_rate_y = best_rate_y;
3813
0
    *winner_rate_uv = best_rate_uv;
3814
0
    *winner_mode_index = *best_mode_index;
3815
0
  }
3816
0
  return winner_mbmi;
3817
0
}
3818
3819
// speed feature: fast intra/inter transform type search
3820
// Used for speed >= 2
3821
// When this speed feature is on, in rd mode search, only DCT is used.
3822
// After the mode is determined, this function is called, to select
3823
// transform types and get accurate rdcost.
3824
static inline void refine_winner_mode_tx(
3825
    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3826
    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3827
    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3828
0
    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3829
0
  const AV1_COMMON *const cm = &cpi->common;
3830
0
  MACROBLOCKD *const xd = &x->e_mbd;
3831
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
3832
0
  TxfmSearchParams *txfm_params = &x->txfm_search_params;
3833
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3834
0
  int64_t best_rd;
3835
0
  const int num_planes = av1_num_planes(cm);
3836
3837
0
  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3838
0
                                         rd_cost->skip_txfm))
3839
0
    return;
3840
3841
  // Set params for winner mode evaluation
3842
0
  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3843
3844
  // No best mode identified so far
3845
0
  if (*best_mode_index == THR_INVALID) return;
3846
3847
0
  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3848
0
  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3849
0
    RD_STATS *winner_rd_stats = NULL;
3850
0
    int winner_rate_y = 0, winner_rate_uv = 0;
3851
0
    THR_MODES winner_mode_index = 0;
3852
3853
    // TODO(any): Combine best mode and multi-winner mode processing paths
3854
    // Get winner mode stats for current mode index
3855
0
    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3856
0
        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3857
0
        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3858
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3859
3860
0
    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3861
0
        winner_mode_index != THR_INVALID &&
3862
0
        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3863
0
                                          rd_cost->skip_txfm)) {
3864
0
      RD_STATS rd_stats = *winner_rd_stats;
3865
0
      int skip_blk = 0;
3866
0
      RD_STATS rd_stats_y, rd_stats_uv;
3867
0
      const int skip_ctx = av1_get_skip_txfm_context(xd);
3868
3869
0
      *mbmi = *winner_mbmi;
3870
3871
0
      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3872
3873
      // Select prediction reference frames.
3874
0
      for (int i = 0; i < num_planes; i++) {
3875
0
        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3876
0
        if (has_second_ref(mbmi))
3877
0
          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3878
0
      }
3879
3880
0
      if (is_inter_mode(mbmi->mode)) {
3881
0
        const int mi_row = xd->mi_row;
3882
0
        const int mi_col = xd->mi_col;
3883
0
        bool is_predictor_built = false;
3884
0
        const PREDICTION_MODE prediction_mode = mbmi->mode;
3885
        // Do interpolation filter search for realtime mode if applicable.
3886
0
        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3887
0
            cpi->oxcf.mode == REALTIME &&
3888
0
            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3889
0
            is_inter_mode(prediction_mode) &&
3890
0
            mbmi->motion_mode == SIMPLE_TRANSLATION &&
3891
0
            !is_inter_compound_mode(prediction_mode)) {
3892
0
          is_predictor_built =
3893
0
              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3894
0
        }
3895
0
        if (!is_predictor_built) {
3896
0
          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3897
0
                                        av1_num_planes(cm) - 1);
3898
0
        }
3899
0
        if (mbmi->motion_mode == OBMC_CAUSAL)
3900
0
          av1_build_obmc_inter_predictors_sb(cm, xd);
3901
3902
0
        av1_subtract_plane(x, bsize, 0);
3903
0
        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3904
0
            !xd->lossless[mbmi->segment_id]) {
3905
0
          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3906
0
                                              INT64_MAX);
3907
0
          assert(rd_stats_y.rate != INT_MAX);
3908
0
        } else {
3909
0
          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3910
0
                                            INT64_MAX);
3911
0
          memset(mbmi->inter_tx_size, mbmi->tx_size,
3912
0
                 sizeof(mbmi->inter_tx_size));
3913
0
          for (int i = 0; i < xd->height * xd->width; ++i)
3914
0
            set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3915
0
        }
3916
0
      } else {
3917
0
        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3918
0
                                          INT64_MAX);
3919
0
      }
3920
3921
0
      if (num_planes > 1) {
3922
0
        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3923
0
      } else {
3924
0
        av1_init_rd_stats(&rd_stats_uv);
3925
0
      }
3926
3927
0
      const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
3928
3929
0
      const ModeCosts *mode_costs = &x->mode_costs;
3930
0
      if (is_inter_mode(mbmi->mode) &&
3931
0
          (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
3932
0
          RDCOST(x->rdmult,
3933
0
                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3934
0
                     rd_stats_uv.rate,
3935
0
                 (rd_stats_y.dist + rd_stats_uv.dist)) >
3936
0
              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3937
0
                     (rd_stats_y.sse + rd_stats_uv.sse))) {
3938
0
        skip_blk = 1;
3939
0
        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3940
0
        rd_stats_uv.rate = 0;
3941
0
        rd_stats_y.dist = rd_stats_y.sse;
3942
0
        rd_stats_uv.dist = rd_stats_uv.sse;
3943
0
      } else {
3944
0
        skip_blk = 0;
3945
0
        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3946
0
      }
3947
0
      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3948
0
                      winner_rate_y - winner_rate_uv;
3949
0
      int64_t this_rd =
3950
0
          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3951
0
      if (best_rd > this_rd) {
3952
0
        *best_mbmode = *mbmi;
3953
0
        *best_mode_index = winner_mode_index;
3954
0
        av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3955
0
        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3956
0
        rd_cost->rate = this_rate;
3957
0
        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3958
0
        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3959
0
        rd_cost->rdcost = this_rd;
3960
0
        best_rd = this_rd;
3961
0
        *best_skip2 = skip_blk;
3962
0
      }
3963
0
    }
3964
0
  }
3965
0
}
3966
3967
/*!\cond */
3968
typedef struct {
3969
  // Mask for each reference frame, specifying which prediction modes to NOT try
3970
  // during search.
3971
  uint32_t pred_modes[REF_FRAMES];
3972
  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3973
  // reference frames (i, j).
3974
  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3975
  // (NONE_FRAME).
3976
  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3977
} mode_skip_mask_t;
3978
/*!\endcond */
3979
3980
// Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
3981
static inline void disable_reference(
3982
0
    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3983
0
  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3984
0
    ref_combo[ref][ref2 + 1] = true;
3985
0
  }
3986
0
}
3987
3988
// Update 'ref_combo' mask to disable all inter references except ALTREF.
3989
static inline void disable_inter_references_except_altref(
3990
0
    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3991
0
  disable_reference(LAST_FRAME, ref_combo);
3992
0
  disable_reference(LAST2_FRAME, ref_combo);
3993
0
  disable_reference(LAST3_FRAME, ref_combo);
3994
0
  disable_reference(GOLDEN_FRAME, ref_combo);
3995
0
  disable_reference(BWDREF_FRAME, ref_combo);
3996
0
  disable_reference(ALTREF2_FRAME, ref_combo);
3997
0
}
3998
3999
static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
4000
  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
4001
  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
4002
  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
4003
  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
4004
  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
4005
  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
4006
  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
4007
  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
4008
};
4009
4010
typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
4011
4012
0
static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
4013
0
  if (ref_set == REF_SET_FULL) {
4014
    // Everything available by default.
4015
0
    memset(mask, 0, sizeof(*mask));
4016
0
  } else {
4017
    // All modes available by default.
4018
0
    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
4019
    // All references disabled first.
4020
0
    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
4021
0
      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
4022
0
        mask->ref_combo[ref1][ref2 + 1] = true;
4023
0
      }
4024
0
    }
4025
0
    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
4026
0
    int num_ref_combos;
4027
4028
    // Then enable reduced set of references explicitly.
4029
0
    switch (ref_set) {
4030
0
      case REF_SET_REDUCED:
4031
0
        ref_set_combos = reduced_ref_combos;
4032
0
        num_ref_combos =
4033
0
            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
4034
0
        break;
4035
0
      case REF_SET_REALTIME:
4036
0
        ref_set_combos = real_time_ref_combos;
4037
0
        num_ref_combos =
4038
0
            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
4039
0
        break;
4040
0
      default: assert(0); num_ref_combos = 0;
4041
0
    }
4042
4043
0
    for (int i = 0; i < num_ref_combos; ++i) {
4044
0
      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
4045
0
      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
4046
0
    }
4047
0
  }
4048
0
}
4049
4050
static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
4051
                                       const AV1_COMP *cpi, MACROBLOCK *x,
4052
0
                                       BLOCK_SIZE bsize) {
4053
0
  const AV1_COMMON *const cm = &cpi->common;
4054
0
  const struct segmentation *const seg = &cm->seg;
4055
0
  MACROBLOCKD *const xd = &x->e_mbd;
4056
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4057
0
  unsigned char segment_id = mbmi->segment_id;
4058
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4059
0
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
4060
0
  REF_SET ref_set = REF_SET_FULL;
4061
4062
0
  if (sf->rt_sf.use_real_time_ref_set)
4063
0
    ref_set = REF_SET_REALTIME;
4064
0
  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
4065
0
    ref_set = REF_SET_REDUCED;
4066
4067
0
  default_skip_mask(mask, ref_set);
4068
4069
0
  int min_pred_mv_sad = INT_MAX;
4070
0
  MV_REFERENCE_FRAME ref_frame;
4071
0
  if (ref_set == REF_SET_REALTIME) {
4072
    // For real-time encoding, we only look at a subset of ref frames. So the
4073
    // threshold for pruning should be computed from this subset as well.
4074
0
    const int num_rt_refs =
4075
0
        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
4076
0
    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
4077
0
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
4078
0
      if (ref != INTRA_FRAME) {
4079
0
        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
4080
0
      }
4081
0
    }
4082
0
  } else {
4083
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
4084
0
      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
4085
0
  }
4086
4087
0
  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4088
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
4089
      // Skip checking missing reference in both single and compound reference
4090
      // modes.
4091
0
      disable_reference(ref_frame, mask->ref_combo);
4092
0
    } else {
4093
      // Skip fixed mv modes for poor references
4094
0
      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
4095
0
        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4096
0
      }
4097
0
    }
4098
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4099
0
        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4100
      // Reference not used for the segment.
4101
0
      disable_reference(ref_frame, mask->ref_combo);
4102
0
    }
4103
0
  }
4104
  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
4105
  // is disabled for this segment. This is to prevent the possibility that we
4106
  // end up unable to pick any mode.
4107
0
  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4108
    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
4109
    // unless ARNR filtering is enabled in which case we want
4110
    // an unfiltered alternative. We allow near/nearest as well
4111
    // because they may result in zero-zero MVs but be cheaper.
4112
0
    if (cpi->rc.is_src_frame_alt_ref &&
4113
0
        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
4114
0
      disable_inter_references_except_altref(mask->ref_combo);
4115
4116
0
      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4117
0
      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
4118
0
      int_mv near_mv, nearest_mv, global_mv;
4119
0
      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
4120
0
                  &x->mbmi_ext);
4121
0
      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4122
0
      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
4123
4124
0
      if (near_mv.as_int != global_mv.as_int)
4125
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
4126
0
      if (nearest_mv.as_int != global_mv.as_int)
4127
0
        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
4128
0
    }
4129
0
  }
4130
4131
0
  if (cpi->rc.is_src_frame_alt_ref) {
4132
0
    if (inter_sf->alt_ref_search_fp &&
4133
0
        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
4134
0
      mask->pred_modes[ALTREF_FRAME] = 0;
4135
0
      disable_inter_references_except_altref(mask->ref_combo);
4136
0
      disable_reference(INTRA_FRAME, mask->ref_combo);
4137
0
    }
4138
0
  }
4139
4140
0
  if (inter_sf->alt_ref_search_fp) {
4141
0
    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
4142
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
4143
      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
4144
      // those are past frames
4145
0
      MV_REFERENCE_FRAME start_frame =
4146
0
          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
4147
0
      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
4148
0
        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4149
0
            0) {
4150
          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
4151
          // to the relative dist of LAST_FRAME.
4152
0
          if (inter_sf->alt_ref_search_fp == 1 &&
4153
0
              (abs(cpi->ref_frame_dist_info
4154
0
                       .ref_relative_dist[ref_frame - LAST_FRAME]) >
4155
0
               1.5 * abs(cpi->ref_frame_dist_info
4156
0
                             .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
4157
0
            continue;
4158
0
          }
4159
0
          if (x->pred_mv_sad[ref_frame] > sad_thresh)
4160
0
            mask->pred_modes[ref_frame] |= INTER_ALL;
4161
0
        }
4162
0
      }
4163
0
    }
4164
0
  }
4165
4166
0
  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4167
0
    if (x->best_pred_mv_sad[0] < INT_MAX) {
4168
0
      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
4169
0
      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
4170
4171
      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
4172
0
      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
4173
0
        ref_frame = prune_ref_list[ref_idx];
4174
0
        if (x->pred_mv_sad[ref_frame] > sad_thresh)
4175
0
          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4176
0
      }
4177
0
    }
4178
0
  }
4179
4180
0
  if (bsize > sf->part_sf.max_intra_bsize) {
4181
0
    disable_reference(INTRA_FRAME, mask->ref_combo);
4182
0
  }
4183
4184
0
  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
4185
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4186
0
      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
4187
0
      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
4188
0
    }
4189
0
  }
4190
4191
0
  mask->pred_modes[INTRA_FRAME] |=
4192
0
      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
4193
4194
  // Prune reference frames which are not the closest to the current
4195
  // frame and with large pred_mv_sad.
4196
0
  if (inter_sf->prune_single_ref) {
4197
0
    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
4198
0
    const double prune_threshes[2] = { 1.20, 1.05 };
4199
4200
0
    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4201
0
      const RefFrameDistanceInfo *const ref_frame_dist_info =
4202
0
          &cpi->ref_frame_dist_info;
4203
0
      const int is_closest_ref =
4204
0
          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
4205
0
          (ref_frame == ref_frame_dist_info->nearest_future_ref);
4206
4207
0
      if (!is_closest_ref) {
4208
0
        const int dir =
4209
0
            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
4210
0
                ? 0
4211
0
                : 1;
4212
0
        if (x->best_pred_mv_sad[dir] < INT_MAX &&
4213
0
            x->pred_mv_sad[ref_frame] >
4214
0
                prune_threshes[inter_sf->prune_single_ref - 1] *
4215
0
                    x->best_pred_mv_sad[dir])
4216
0
          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
4217
0
      }
4218
0
    }
4219
0
  }
4220
0
}
4221
4222
static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
4223
                                          HandleInterModeArgs *const args,
4224
0
                                          int is_hbd) {
4225
0
  if (is_hbd) {
4226
0
    const int len = sizeof(uint16_t);
4227
0
    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
4228
0
    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
4229
0
                                                 (MAX_SB_SQUARE >> 1) * len);
4230
0
    args->above_pred_buf[2] =
4231
0
        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
4232
0
    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
4233
0
    args->left_pred_buf[1] =
4234
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
4235
0
    args->left_pred_buf[2] =
4236
0
        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
4237
0
  } else {
4238
0
    args->above_pred_buf[0] = obmc_buffer->above_pred;
4239
0
    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
4240
0
    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
4241
0
    args->left_pred_buf[0] = obmc_buffer->left_pred;
4242
0
    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
4243
0
    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
4244
0
  }
4245
0
}
4246
4247
static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
4248
0
                                  MV_REFERENCE_FRAME ref_frame) {
4249
0
  const AV1_COMMON *const cm = &cpi->common;
4250
0
  MV_REFERENCE_FRAME rf[2];
4251
0
  av1_set_ref_frame(rf, ref_frame);
4252
4253
0
  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
4254
4255
0
  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
4256
0
                                       cm->cur_frame->ref_display_order_hint)) {
4257
0
    return 1;
4258
0
  }
4259
4260
0
  return 0;
4261
0
}
4262
4263
static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
4264
0
                                                    int skip_ref_frame_mask) {
4265
0
  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
4266
0
    if (!(skip_ref_frame_mask & (1 << r))) {
4267
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
4268
0
      if (rf[0] == ref_frame || rf[1] == ref_frame) {
4269
0
        return 1;
4270
0
      }
4271
0
    }
4272
0
  }
4273
0
  return 0;
4274
0
}
4275
4276
static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
4277
0
                                             const MB_MODE_INFO *mi_cache) {
4278
0
  if (!mi_cache) {
4279
0
    return 0;
4280
0
  }
4281
4282
0
  if (ref_frame < REF_FRAMES) {
4283
0
    return (ref_frame == mi_cache->ref_frame[0] ||
4284
0
            ref_frame == mi_cache->ref_frame[1]);
4285
0
  }
4286
4287
  // if we are here, then the current mode is compound.
4288
0
  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4289
0
  return ref_frame == cached_ref_type;
4290
0
}
4291
4292
// Please add/modify parameter setting in this function, making it consistent
4293
// and easy to read and maintain.
4294
static inline void set_params_rd_pick_inter_mode(
4295
    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4296
    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4297
    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4298
0
    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4299
0
  const AV1_COMMON *const cm = &cpi->common;
4300
0
  MACROBLOCKD *const xd = &x->e_mbd;
4301
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
4302
0
  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4303
0
  unsigned char segment_id = mbmi->segment_id;
4304
4305
0
  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4306
0
  av1_collect_neighbors_ref_counts(xd);
4307
0
  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4308
0
                           ref_costs_comp);
4309
4310
0
  const int mi_row = xd->mi_row;
4311
0
  const int mi_col = xd->mi_col;
4312
0
  x->best_pred_mv_sad[0] = INT_MAX;
4313
0
  x->best_pred_mv_sad[1] = INT_MAX;
4314
4315
0
  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4316
0
       ++ref_frame) {
4317
0
    x->pred_mv_sad[ref_frame] = INT_MAX;
4318
0
    mbmi_ext->mode_context[ref_frame] = 0;
4319
0
    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4320
0
    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4321
      // Skip the ref frame if the mask says skip and the ref is not used by
4322
      // compound ref.
4323
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4324
0
          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4325
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4326
0
        continue;
4327
0
      }
4328
0
      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4329
0
      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4330
0
    }
4331
0
    if (cpi->sf.inter_sf.alt_ref_search_fp ||
4332
0
        cpi->sf.inter_sf.prune_single_ref ||
4333
0
        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4334
      // Store the best pred_mv_sad across all past frames
4335
0
      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4336
0
          0)
4337
0
        x->best_pred_mv_sad[0] =
4338
0
            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4339
0
      else
4340
        // Store the best pred_mv_sad across all future frames
4341
0
        x->best_pred_mv_sad[1] =
4342
0
            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4343
0
    }
4344
0
  }
4345
4346
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4347
    // No second reference on RT ref set, so no need to initialize
4348
0
    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4349
0
         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4350
0
      mbmi_ext->mode_context[ref_frame] = 0;
4351
0
      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4352
0
      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4353
0
      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4354
0
            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4355
0
        continue;
4356
0
      }
4357
4358
0
      if (skip_ref_frame_mask & (1 << ref_frame) &&
4359
0
          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4360
0
        continue;
4361
0
      }
4362
      // Ref mv list population is not required, when compound references are
4363
      // pruned.
4364
0
      if (prune_ref_frame(cpi, x, ref_frame)) continue;
4365
4366
0
      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4367
0
                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4368
0
                       mbmi_ext->mode_context);
4369
      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4370
      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4371
0
      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4372
0
    }
4373
0
  }
4374
4375
0
  av1_count_overlappable_neighbors(cm, xd);
4376
0
  const FRAME_UPDATE_TYPE update_type =
4377
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4378
0
  int use_actual_frame_probs = 1;
4379
0
  int prune_obmc;
4380
#if CONFIG_FPMT_TEST
4381
  use_actual_frame_probs =
4382
      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4383
  if (!use_actual_frame_probs) {
4384
    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4385
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4386
  }
4387
#endif
4388
0
  if (use_actual_frame_probs) {
4389
0
    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4390
0
                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4391
0
  }
4392
0
  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4393
0
    if (check_num_overlappable_neighbors(mbmi) &&
4394
0
        is_motion_variation_allowed_bsize(bsize)) {
4395
0
      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4396
0
      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4397
0
                                       MAX_SB_SIZE >> 1 };
4398
0
      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4399
0
                                        MAX_SB_SIZE >> 1 };
4400
0
      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4401
0
      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4402
0
                                          dst_width1, dst_height1,
4403
0
                                          args->above_pred_stride);
4404
0
      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4405
0
                                         dst_width2, dst_height2,
4406
0
                                         args->left_pred_stride);
4407
0
      const int num_planes = av1_num_planes(cm);
4408
0
      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4409
0
                           mi_col, 0, num_planes);
4410
0
      calc_target_weighted_pred(
4411
0
          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4412
0
          args->left_pred_buf[0], args->left_pred_stride[0]);
4413
0
    }
4414
0
  }
4415
4416
0
  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4417
4418
  // Set params for mode evaluation
4419
0
  set_mode_eval_params(cpi, x, MODE_EVAL);
4420
4421
0
  x->comp_rd_stats_idx = 0;
4422
4423
0
  for (int idx = 0; idx < REF_FRAMES; idx++) {
4424
0
    args->best_single_sse_in_refs[idx] = INT32_MAX;
4425
0
  }
4426
0
}
4427
4428
static inline void init_single_inter_mode_search_state(
4429
0
    InterModeSearchState *search_state) {
4430
0
  for (int dir = 0; dir < 2; ++dir) {
4431
0
    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4432
0
      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4433
0
        SingleInterModeState *state;
4434
4435
0
        state = &search_state->single_state[dir][mode][ref_frame];
4436
0
        state->ref_frame = NONE_FRAME;
4437
0
        state->rd = INT64_MAX;
4438
4439
0
        state = &search_state->single_state_modelled[dir][mode][ref_frame];
4440
0
        state->ref_frame = NONE_FRAME;
4441
0
        state->rd = INT64_MAX;
4442
4443
0
        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4444
0
      }
4445
0
    }
4446
0
  }
4447
4448
0
  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4449
0
    search_state->best_single_rd[ref_frame] = INT64_MAX;
4450
0
    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4451
0
  }
4452
0
  av1_zero(search_state->single_state_cnt);
4453
0
  av1_zero(search_state->single_state_modelled_cnt);
4454
0
}
4455
4456
static inline void init_inter_mode_search_state(
4457
    InterModeSearchState *search_state, const AV1_COMP *cpi,
4458
0
    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4459
0
  init_intra_mode_search_state(&search_state->intra_search_state);
4460
0
  av1_invalid_rd_stats(&search_state->best_y_rdcost);
4461
4462
0
  search_state->best_rd = best_rd_so_far;
4463
0
  search_state->best_skip_rd[0] = INT64_MAX;
4464
0
  search_state->best_skip_rd[1] = INT64_MAX;
4465
4466
0
  av1_zero(search_state->best_mbmode);
4467
4468
0
  search_state->best_rate_y = INT_MAX;
4469
4470
0
  search_state->best_rate_uv = INT_MAX;
4471
4472
0
  search_state->best_mode_skippable = 0;
4473
4474
0
  search_state->best_skip2 = 0;
4475
4476
0
  search_state->best_mode_index = THR_INVALID;
4477
4478
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4479
0
  const MB_MODE_INFO *const mbmi = xd->mi[0];
4480
0
  const unsigned char segment_id = mbmi->segment_id;
4481
4482
0
  search_state->num_available_refs = 0;
4483
0
  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4484
0
  memset(search_state->dist_order_refs, -1,
4485
0
         sizeof(search_state->dist_order_refs));
4486
4487
0
  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4488
0
    search_state->mode_threshold[i] = 0;
4489
0
  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4490
0
  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4491
0
    search_state->mode_threshold[i] =
4492
0
        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4493
0
        RD_THRESH_FAC_FRAC_BITS;
4494
4495
0
  search_state->best_intra_rd = INT64_MAX;
4496
4497
0
  search_state->best_pred_sse = UINT_MAX;
4498
4499
0
  av1_zero(search_state->single_newmv);
4500
0
  av1_zero(search_state->single_newmv_rate);
4501
0
  av1_zero(search_state->single_newmv_valid);
4502
0
  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4503
0
    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4504
0
      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4505
0
        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4506
0
        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4507
0
      }
4508
0
    }
4509
0
  }
4510
4511
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
4512
0
    search_state->best_pred_rd[i] = INT64_MAX;
4513
0
  }
4514
4515
0
  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4516
0
    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4517
0
      search_state->mode_threshold[i] =
4518
0
          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4519
0
          RD_THRESH_FAC_FRAC_BITS;
4520
4521
0
    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4522
0
      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4523
0
        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4524
0
          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4525
0
          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4526
0
        }
4527
0
      }
4528
0
    }
4529
4530
0
    init_single_inter_mode_search_state(search_state);
4531
0
  }
4532
0
}
4533
4534
static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4535
                           const MV_REFERENCE_FRAME *ref_frame,
4536
0
                           const PREDICTION_MODE this_mode) {
4537
0
  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4538
0
    return true;
4539
0
  }
4540
4541
0
  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4542
0
}
4543
4544
static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4545
                                      BLOCK_SIZE bsize,
4546
                                      PREDICTION_MODE curr_mode,
4547
0
                                      const MV_REFERENCE_FRAME *ref_frames) {
4548
0
  const int comp_pred = ref_frames[1] > INTRA_FRAME;
4549
0
  if (comp_pred) {
4550
0
    if (!is_comp_ref_allowed(bsize)) return 1;
4551
0
    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4552
0
      return 1;
4553
0
    }
4554
4555
0
    const AV1_COMMON *const cm = &cpi->common;
4556
0
    if (frame_is_intra_only(cm)) return 1;
4557
4558
0
    const CurrentFrame *const current_frame = &cm->current_frame;
4559
0
    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4560
4561
0
    const struct segmentation *const seg = &cm->seg;
4562
0
    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4563
    // Do not allow compound prediction if the segment level reference frame
4564
    // feature is in use as in this case there can only be one reference.
4565
0
    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4566
0
  }
4567
4568
0
  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4569
    // Mode must be compatible
4570
0
    if (!is_interintra_allowed_bsize(bsize)) return 1;
4571
0
    if (!is_interintra_allowed_mode(curr_mode)) return 1;
4572
0
  }
4573
4574
0
  return 0;
4575
0
}
4576
4577
static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4578
0
                                        BLOCK_SIZE bsize, int mib_size) {
4579
0
  const int sb_size_mask = mib_size - 1;
4580
0
  const MACROBLOCKD *const xd = &x->e_mbd;
4581
0
  const int mi_row = xd->mi_row;
4582
0
  const int mi_col = xd->mi_col;
4583
0
  const int mi_row_in_sb = mi_row & sb_size_mask;
4584
0
  const int mi_col_in_sb = mi_col & sb_size_mask;
4585
0
  const int mi_w = mi_size_wide[bsize];
4586
0
  const int mi_h = mi_size_high[bsize];
4587
0
  int picked_ref_frames_mask = 0;
4588
0
  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4589
0
    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4590
0
      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4591
0
    }
4592
0
  }
4593
0
  return picked_ref_frames_mask;
4594
0
}
4595
4596
// Check if reference frame pair of the current block matches with the given
4597
// block.
4598
static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4599
0
                                       const MV_REFERENCE_FRAME *ref_frames) {
4600
0
  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4601
0
          (ref_frames[1] == mbmi->ref_frame[1]));
4602
0
}
4603
4604
// Case 1: return 0, means don't skip this mode
4605
// Case 2: return 1, means skip this mode completely
4606
// Case 3: return 2, means skip compound only, but still try single motion modes
4607
static int inter_mode_search_order_independent_skip(
4608
    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4609
    InterModeSearchState *search_state, int skip_ref_frame_mask,
4610
0
    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4611
0
  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4612
0
    return 1;
4613
0
  }
4614
4615
0
  const int ref_type = av1_ref_frame_type(ref_frame);
4616
0
  if (!cpi->sf.rt_sf.use_real_time_ref_set)
4617
0
    if (prune_ref_frame(cpi, x, ref_type)) return 1;
4618
4619
  // This is only used in motion vector unit test.
4620
0
  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4621
0
      ref_frame[0] == INTRA_FRAME)
4622
0
    return 1;
4623
4624
0
  const AV1_COMMON *const cm = &cpi->common;
4625
0
  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4626
0
    return 1;
4627
0
  }
4628
4629
  // Reuse the prediction mode in cache
4630
0
  if (x->use_mb_mode_cache) {
4631
0
    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4632
0
    const PREDICTION_MODE cached_mode = cached_mi->mode;
4633
0
    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4634
0
    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4635
4636
    // If the cached mode is intra, then we just need to match the mode.
4637
0
    if (is_mode_intra(cached_mode) && mode != cached_mode) {
4638
0
      return 1;
4639
0
    }
4640
4641
    // If the cached mode is single inter mode, then we match the mode and
4642
    // reference frame.
4643
0
    if (cached_mode_is_single) {
4644
0
      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4645
0
        return 1;
4646
0
      }
4647
0
    } else {
4648
      // If the cached mode is compound, then we need to consider several cases.
4649
0
      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4650
0
      if (mode_is_single) {
4651
        // If the mode is single, we know the modes can't match. But we might
4652
        // still want to search it if compound mode depends on the current mode.
4653
0
        int skip_motion_mode_only = 0;
4654
0
        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4655
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4656
0
        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4657
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4658
0
        } else if (cached_mode == NEW_NEWMV) {
4659
0
          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4660
0
                                   ref_frame[0] == cached_frame[1]);
4661
0
        }
4662
4663
0
        return 1 + skip_motion_mode_only;
4664
0
      } else {
4665
        // If both modes are compound, then everything must match.
4666
0
        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4667
0
            ref_frame[1] != cached_frame[1]) {
4668
0
          return 1;
4669
0
        }
4670
0
      }
4671
0
    }
4672
0
  }
4673
4674
0
  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4675
  // If no valid mode has been found so far in PARTITION_NONE when finding a
4676
  // valid partition is required, do not skip mode.
4677
0
  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4678
0
      x->must_find_valid_partition)
4679
0
    return 0;
4680
4681
0
  const SPEED_FEATURES *const sf = &cpi->sf;
4682
  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4683
  // frames
4684
0
  if (sf->inter_sf.prune_nearmv_using_neighbors &&
4685
0
      (mode == NEAR_NEARMV || mode == NEARMV)) {
4686
0
    const MACROBLOCKD *const xd = &x->e_mbd;
4687
0
    if (search_state->best_rd != INT64_MAX && xd->left_available &&
4688
0
        xd->up_available) {
4689
0
      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4690
0
                                                    { 1, 1, 0 },
4691
0
                                                    { 2, 1, 0 } };
4692
0
      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4693
4694
0
      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4695
0
             qindex_sub_range < 3);
4696
0
      const int num_ref_frame_pair_match_thresh =
4697
0
          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4698
0
                    [qindex_sub_range];
4699
4700
0
      assert(num_ref_frame_pair_match_thresh <= 2 &&
4701
0
             num_ref_frame_pair_match_thresh >= 0);
4702
0
      int num_ref_frame_pair_match = 0;
4703
4704
0
      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4705
0
      num_ref_frame_pair_match +=
4706
0
          match_ref_frame_pair(xd->above_mbmi, ref_frame);
4707
4708
      // Pruning based on ref frame pair match with neighbors.
4709
0
      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4710
0
    }
4711
0
  }
4712
4713
0
  int skip_motion_mode = 0;
4714
0
  if (mbmi->partition != PARTITION_NONE) {
4715
0
    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4716
0
    if (ref_type <= ALTREF_FRAME && skip_ref) {
4717
      // Since the compound ref modes depends on the motion estimation result of
4718
      // two single ref modes (best mv of single ref modes as the start point),
4719
      // if current single ref mode is marked skip, we need to check if it will
4720
      // be used in compound ref modes.
4721
0
      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4722
        // Found a not skipped compound ref mode which contains current
4723
        // single ref. So this single ref can't be skipped completely
4724
        // Just skip its motion mode search, still try its simple
4725
        // transition mode.
4726
0
        skip_motion_mode = 1;
4727
0
        skip_ref = 0;
4728
0
      }
4729
0
    }
4730
    // If we are reusing the prediction from cache, and the current frame is
4731
    // required by the cache, then we cannot prune it.
4732
0
    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4733
0
      skip_ref = 0;
4734
      // If the cache only needs the current reference type for compound
4735
      // prediction, then we can skip motion mode search.
4736
0
      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4737
0
                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4738
0
    }
4739
0
    if (skip_ref) return 1;
4740
0
  }
4741
4742
0
  if (ref_frame[0] == INTRA_FRAME) {
4743
0
    if (mode != DC_PRED) {
4744
      // Disable intra modes other than DC_PRED for blocks with low variance
4745
      // Threshold for intra skipping based on source variance
4746
      // TODO(debargha): Specialize the threshold for super block sizes
4747
0
      const unsigned int skip_intra_var_thresh = 64;
4748
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4749
0
          x->source_variance < skip_intra_var_thresh)
4750
0
        return 1;
4751
0
    }
4752
0
  }
4753
4754
0
  if (skip_motion_mode) return 2;
4755
4756
0
  return 0;
4757
0
}
4758
4759
static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4760
                             const MV_REFERENCE_FRAME *ref_frames,
4761
0
                             const AV1_COMMON *cm) {
4762
0
  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4763
0
  mbmi->ref_mv_idx = 0;
4764
0
  mbmi->mode = curr_mode;
4765
0
  mbmi->uv_mode = UV_DC_PRED;
4766
0
  mbmi->ref_frame[0] = ref_frames[0];
4767
0
  mbmi->ref_frame[1] = ref_frames[1];
4768
0
  pmi->palette_size[0] = 0;
4769
0
  pmi->palette_size[1] = 0;
4770
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
4771
0
  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4772
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
4773
0
  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4774
0
  set_default_interp_filters(mbmi, cm->features.interp_filter);
4775
0
}
4776
4777
static inline void collect_single_states(MACROBLOCK *x,
4778
                                         InterModeSearchState *search_state,
4779
0
                                         const MB_MODE_INFO *const mbmi) {
4780
0
  int i, j;
4781
0
  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4782
0
  const PREDICTION_MODE this_mode = mbmi->mode;
4783
0
  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4784
0
  const int mode_offset = INTER_OFFSET(this_mode);
4785
0
  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4786
4787
  // Simple rd
4788
0
  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4789
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4790
0
    const int64_t rd =
4791
0
        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4792
0
    if (rd < simple_rd) simple_rd = rd;
4793
0
  }
4794
4795
  // Insertion sort of single_state
4796
0
  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4797
0
  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4798
0
  i = search_state->single_state_cnt[dir][mode_offset];
4799
0
  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4800
0
    state_s[j] = state_s[j - 1];
4801
0
  state_s[j] = this_state_s;
4802
0
  search_state->single_state_cnt[dir][mode_offset]++;
4803
4804
  // Modelled rd
4805
0
  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4806
0
  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4807
0
    const int64_t rd =
4808
0
        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4809
0
    if (rd < modelled_rd) modelled_rd = rd;
4810
0
  }
4811
4812
  // Insertion sort of single_state_modelled
4813
0
  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4814
0
  SingleInterModeState *state_m =
4815
0
      search_state->single_state_modelled[dir][mode_offset];
4816
0
  i = search_state->single_state_modelled_cnt[dir][mode_offset];
4817
0
  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4818
0
    state_m[j] = state_m[j - 1];
4819
0
  state_m[j] = this_state_m;
4820
0
  search_state->single_state_modelled_cnt[dir][mode_offset]++;
4821
0
}
4822
4823
static inline void analyze_single_states(const AV1_COMP *cpi,
4824
0
                                         InterModeSearchState *search_state) {
4825
0
  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4826
0
  assert(prune_level >= 1);
4827
0
  int i, j, dir, mode;
4828
4829
0
  for (dir = 0; dir < 2; ++dir) {
4830
0
    int64_t best_rd;
4831
0
    SingleInterModeState(*state)[FWD_REFS];
4832
0
    const int prune_factor = prune_level >= 2 ? 6 : 5;
4833
4834
    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4835
    // reference frames for all the modes (NEARESTMV and NEARMV may not
4836
    // have same motion vectors). Always keep the best of each mode
4837
    // because it might form the best possible combination with other mode.
4838
0
    state = search_state->single_state[dir];
4839
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4840
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4841
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4842
0
      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4843
0
        if (state[mode][i].rd != INT64_MAX &&
4844
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4845
0
          state[mode][i].valid = 0;
4846
0
        }
4847
0
      }
4848
0
    }
4849
4850
0
    state = search_state->single_state_modelled[dir];
4851
0
    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4852
0
                     state[INTER_OFFSET(GLOBALMV)][0].rd);
4853
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4854
0
      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4855
0
        if (state[mode][i].rd != INT64_MAX &&
4856
0
            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4857
0
          state[mode][i].valid = 0;
4858
0
        }
4859
0
      }
4860
0
    }
4861
0
  }
4862
4863
  // Ordering by simple rd first, then by modelled rd
4864
0
  for (dir = 0; dir < 2; ++dir) {
4865
0
    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4866
0
      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4867
0
      const int state_cnt_m =
4868
0
          search_state->single_state_modelled_cnt[dir][mode];
4869
0
      SingleInterModeState *state_s = search_state->single_state[dir][mode];
4870
0
      SingleInterModeState *state_m =
4871
0
          search_state->single_state_modelled[dir][mode];
4872
0
      int count = 0;
4873
0
      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4874
0
      for (i = 0; i < state_cnt_s; ++i) {
4875
0
        if (state_s[i].rd == INT64_MAX) break;
4876
0
        if (state_s[i].valid) {
4877
0
          search_state->single_rd_order[dir][mode][count++] =
4878
0
              state_s[i].ref_frame;
4879
0
        }
4880
0
      }
4881
0
      if (count >= max_candidates) continue;
4882
4883
0
      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4884
0
        if (state_m[i].rd == INT64_MAX) break;
4885
0
        if (!state_m[i].valid) continue;
4886
0
        const int ref_frame = state_m[i].ref_frame;
4887
0
        int match = 0;
4888
        // Check if existing already
4889
0
        for (j = 0; j < count; ++j) {
4890
0
          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4891
0
            match = 1;
4892
0
            break;
4893
0
          }
4894
0
        }
4895
0
        if (match) continue;
4896
        // Check if this ref_frame is removed in simple rd
4897
0
        int valid = 1;
4898
0
        for (j = 0; j < state_cnt_s; ++j) {
4899
0
          if (ref_frame == state_s[j].ref_frame) {
4900
0
            valid = state_s[j].valid;
4901
0
            break;
4902
0
          }
4903
0
        }
4904
0
        if (valid) {
4905
0
          search_state->single_rd_order[dir][mode][count++] = ref_frame;
4906
0
        }
4907
0
      }
4908
0
    }
4909
0
  }
4910
0
}
4911
4912
static int compound_skip_get_candidates(
4913
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4914
0
    const int dir, const PREDICTION_MODE mode) {
4915
0
  const int mode_offset = INTER_OFFSET(mode);
4916
0
  const SingleInterModeState *state =
4917
0
      search_state->single_state[dir][mode_offset];
4918
0
  const SingleInterModeState *state_modelled =
4919
0
      search_state->single_state_modelled[dir][mode_offset];
4920
4921
0
  int max_candidates = 0;
4922
0
  for (int i = 0; i < FWD_REFS; ++i) {
4923
0
    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4924
0
    max_candidates++;
4925
0
  }
4926
4927
0
  int candidates = max_candidates;
4928
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4929
0
    candidates = AOMMIN(2, max_candidates);
4930
0
  }
4931
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4932
0
    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4933
0
        state[0].ref_frame == state_modelled[0].ref_frame)
4934
0
      candidates = 1;
4935
0
    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4936
0
  }
4937
4938
0
  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4939
    // Limit the number of candidates to 1 in each direction for compound
4940
    // prediction
4941
0
    candidates = AOMMIN(1, candidates);
4942
0
  }
4943
0
  return candidates;
4944
0
}
4945
4946
static int compound_skip_by_single_states(
4947
    const AV1_COMP *cpi, const InterModeSearchState *search_state,
4948
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4949
0
    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4950
0
  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4951
0
  const int mode[2] = { compound_ref0_mode(this_mode),
4952
0
                        compound_ref1_mode(this_mode) };
4953
0
  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4954
0
  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4955
0
                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4956
0
  int ref_searched[2] = { 0, 0 };
4957
0
  int ref_mv_match[2] = { 1, 1 };
4958
0
  int i, j;
4959
4960
0
  for (i = 0; i < 2; ++i) {
4961
0
    const SingleInterModeState *state =
4962
0
        search_state->single_state[mode_dir[i]][mode_offset[i]];
4963
0
    const int state_cnt =
4964
0
        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4965
0
    for (j = 0; j < state_cnt; ++j) {
4966
0
      if (state[j].ref_frame == refs[i]) {
4967
0
        ref_searched[i] = 1;
4968
0
        break;
4969
0
      }
4970
0
    }
4971
0
  }
4972
4973
0
  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4974
0
  for (i = 0; i < 2; ++i) {
4975
0
    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4976
0
      continue;
4977
0
    }
4978
0
    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4979
0
    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4980
0
      int_mv single_mv;
4981
0
      int_mv comp_mv;
4982
0
      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4983
0
                  &x->mbmi_ext);
4984
0
      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4985
0
      if (single_mv.as_int != comp_mv.as_int) {
4986
0
        ref_mv_match[i] = 0;
4987
0
        break;
4988
0
      }
4989
0
    }
4990
0
  }
4991
4992
0
  for (i = 0; i < 2; ++i) {
4993
0
    if (!ref_searched[i] || !ref_mv_match[i]) continue;
4994
0
    const int candidates =
4995
0
        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4996
0
    const MV_REFERENCE_FRAME *ref_order =
4997
0
        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4998
0
    int match = 0;
4999
0
    for (j = 0; j < candidates; ++j) {
5000
0
      if (refs[i] == ref_order[j]) {
5001
0
        match = 1;
5002
0
        break;
5003
0
      }
5004
0
    }
5005
0
    if (!match) return 1;
5006
0
  }
5007
5008
0
  return 0;
5009
0
}
5010
5011
// Check if ref frames of current block matches with given block.
5012
static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
5013
                                   const MV_REFERENCE_FRAME *ref_frames,
5014
0
                                   int *const is_ref_match) {
5015
0
  if (is_inter_block(mbmi)) {
5016
0
    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
5017
0
    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
5018
0
    if (has_second_ref(mbmi)) {
5019
0
      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
5020
0
      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
5021
0
    }
5022
0
  }
5023
0
}
5024
5025
// Prune compound mode using ref frames of neighbor blocks.
5026
static inline int compound_skip_using_neighbor_refs(
5027
    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
5028
0
    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
5029
  // Exclude non-extended compound modes from pruning
5030
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5031
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5032
0
    return 0;
5033
5034
0
  if (prune_ext_comp_using_neighbors >= 3) return 1;
5035
5036
0
  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
5037
                                // 1 - match for backward refs
5038
  // Check if ref frames of this block matches with left neighbor.
5039
0
  if (xd->left_available)
5040
0
    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
5041
5042
  // Check if ref frames of this block matches with above neighbor.
5043
0
  if (xd->up_available)
5044
0
    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
5045
5046
  // Combine ref frame match with neighbors in forward and backward refs.
5047
0
  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
5048
5049
  // Pruning based on ref frame match with neighbors.
5050
0
  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
5051
0
  return 1;
5052
0
}
5053
5054
// Update best single mode for the given reference frame based on simple rd.
5055
static inline void update_best_single_mode(InterModeSearchState *search_state,
5056
                                           const PREDICTION_MODE this_mode,
5057
                                           const MV_REFERENCE_FRAME ref_frame,
5058
0
                                           int64_t this_rd) {
5059
0
  if (this_rd < search_state->best_single_rd[ref_frame]) {
5060
0
    search_state->best_single_rd[ref_frame] = this_rd;
5061
0
    search_state->best_single_mode[ref_frame] = this_mode;
5062
0
  }
5063
0
}
5064
5065
// Prune compound mode using best single mode for the same reference.
5066
static inline int skip_compound_using_best_single_mode_ref(
5067
    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
5068
    const PREDICTION_MODE *best_single_mode,
5069
0
    int prune_comp_using_best_single_mode_ref) {
5070
  // Exclude non-extended compound modes from pruning
5071
0
  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5072
0
      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
5073
0
    return 0;
5074
5075
0
  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
5076
0
  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
5077
  // Get ref frame direction corresponding to NEWMV
5078
  // 0 - NEWMV corresponding to forward direction
5079
  // 1 - NEWMV corresponding to backward direction
5080
0
  const int newmv_dir = comp_mode_ref0 != NEWMV;
5081
5082
  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
5083
  // have NEWMV as single mode winner.
5084
  // Example: For an extended-compound mode,
5085
  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
5086
  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
5087
  // - Avoid pruning this mode, if best single mode corresponding to ref frame
5088
  //   ALTREF_FRAME is NEWMV
5089
0
  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
5090
0
  if (single_mode == NEWMV) return 0;
5091
5092
  // Avoid pruning the compound mode when best single mode is not available
5093
0
  if (prune_comp_using_best_single_mode_ref == 1)
5094
0
    if (single_mode == MB_MODE_COUNT) return 0;
5095
0
  return 1;
5096
0
}
5097
5098
0
static int compare_int64(const void *a, const void *b) {
5099
0
  int64_t a64 = *((int64_t *)a);
5100
0
  int64_t b64 = *((int64_t *)b);
5101
0
  if (a64 < b64) {
5102
0
    return -1;
5103
0
  } else if (a64 == b64) {
5104
0
    return 0;
5105
0
  } else {
5106
0
    return 1;
5107
0
  }
5108
0
}
5109
5110
static inline void update_search_state(
5111
    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
5112
    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
5113
    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
5114
0
    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
5115
0
  const MACROBLOCKD *xd = &x->e_mbd;
5116
0
  const MB_MODE_INFO *mbmi = xd->mi[0];
5117
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5118
0
  const int skip_txfm =
5119
0
      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
5120
0
  const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5121
5122
0
  search_state->best_rd = new_best_rd_stats->rdcost;
5123
0
  search_state->best_mode_index = new_best_mode;
5124
0
  *best_rd_stats_dst = *new_best_rd_stats;
5125
0
  search_state->best_mbmode = *mbmi;
5126
0
  search_state->best_skip2 = skip_txfm;
5127
0
  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
5128
  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
5129
  // rate_uv because av1_txfm_search process is replaced by rd estimation.
5130
  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
5131
  // These two values will be updated when av1_txfm_search is called.
5132
0
  if (txfm_search_done) {
5133
0
    search_state->best_rate_y =
5134
0
        new_best_rd_stats_y->rate +
5135
0
        x->mode_costs.skip_txfm_cost[skip_ctx]
5136
0
                                    [new_best_rd_stats->skip_txfm || skip_txfm];
5137
0
    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
5138
0
  }
5139
0
  search_state->best_y_rdcost = *new_best_rd_stats_y;
5140
0
  memcpy(ctx->blk_skip, txfm_info->blk_skip,
5141
0
         sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
5142
0
  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5143
0
}
5144
5145
// Find the best RD for a reference frame (among single reference modes)
5146
// and store +10% of it in the 0-th element in ref_frame_rd.
5147
0
static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
5148
0
  assert(ref_frame_rd[0] == INT64_MAX);
5149
0
  int64_t ref_copy[REF_FRAMES - 1];
5150
0
  memcpy(ref_copy, ref_frame_rd + 1,
5151
0
         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
5152
0
  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
5153
5154
0
  int64_t cutoff = ref_copy[0];
5155
  // The cut-off is within 10% of the best.
5156
0
  if (cutoff != INT64_MAX) {
5157
0
    assert(cutoff < INT64_MAX / 200);
5158
0
    cutoff = (110 * cutoff) / 100;
5159
0
  }
5160
0
  ref_frame_rd[0] = cutoff;
5161
0
}
5162
5163
// Check if either frame is within the cutoff.
5164
static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
5165
                                        MV_REFERENCE_FRAME frame1,
5166
0
                                        MV_REFERENCE_FRAME frame2) {
5167
0
  assert(frame2 > 0);
5168
0
  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
5169
0
         ref_frame_rd[frame2] <= ref_frame_rd[0];
5170
0
}
5171
5172
static inline void evaluate_motion_mode_for_winner_candidates(
5173
    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
5174
    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
5175
    PICK_MODE_CONTEXT *const ctx,
5176
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
5177
    const motion_mode_best_st_candidate *const best_motion_mode_cands,
5178
    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
5179
0
    InterModeSearchState *const search_state, int64_t *yrd) {
5180
0
  const AV1_COMMON *const cm = &cpi->common;
5181
0
  const int num_planes = av1_num_planes(cm);
5182
0
  MACROBLOCKD *const xd = &x->e_mbd;
5183
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5184
0
  InterModesInfo *const inter_modes_info = x->inter_modes_info;
5185
0
  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
5186
5187
0
  for (int cand = 0; cand < num_best_cand; cand++) {
5188
0
    RD_STATS rd_stats;
5189
0
    RD_STATS rd_stats_y;
5190
0
    RD_STATS rd_stats_uv;
5191
0
    av1_init_rd_stats(&rd_stats);
5192
0
    av1_init_rd_stats(&rd_stats_y);
5193
0
    av1_init_rd_stats(&rd_stats_uv);
5194
0
    int rate_mv;
5195
5196
0
    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
5197
0
    args->skip_motion_mode =
5198
0
        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
5199
0
    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
5200
0
    rd_stats.rate =
5201
0
        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
5202
5203
    // Continue if the best candidate is compound.
5204
0
    if (!is_inter_singleref_mode(mbmi->mode)) continue;
5205
5206
0
    x->txfm_search_info.skip_txfm = 0;
5207
0
    struct macroblockd_plane *pd = xd->plane;
5208
0
    const BUFFER_SET orig_dst = {
5209
0
      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
5210
0
      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
5211
0
    };
5212
5213
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5214
    // Initialize motion mode to simple translation
5215
    // Calculation of switchable rate depends on it.
5216
0
    mbmi->motion_mode = 0;
5217
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5218
0
    for (int i = 0; i < num_planes; i++) {
5219
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5220
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5221
0
    }
5222
5223
0
    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
5224
0
                           search_state->best_skip_rd[1] };
5225
0
    int64_t this_yrd = INT64_MAX;
5226
0
    int64_t ret_value = motion_mode_rd(
5227
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
5228
0
        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
5229
0
        do_tx_search, inter_modes_info, 1, &this_yrd);
5230
5231
0
    if (ret_value != INT64_MAX) {
5232
0
      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5233
0
      const THR_MODES mode_enum = get_prediction_mode_idx(
5234
0
          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5235
      // Collect mode stats for multiwinner mode processing
5236
0
      store_winner_mode_stats(
5237
0
          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
5238
0
          mode_enum, NULL, bsize, rd_stats.rdcost,
5239
0
          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
5240
5241
0
      int64_t best_scaled_rd = search_state->best_rd;
5242
0
      int64_t this_scaled_rd = rd_stats.rdcost;
5243
0
      if (search_state->best_mode_index != THR_INVALID)
5244
0
        increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5245
0
                              &this_scaled_rd,
5246
0
                              cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
5247
5248
0
      if (this_scaled_rd < best_scaled_rd) {
5249
0
        *yrd = this_yrd;
5250
0
        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5251
0
                            &rd_stats_uv, mode_enum, x, do_tx_search);
5252
0
        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
5253
0
      }
5254
0
    }
5255
0
  }
5256
0
}
5257
5258
/*!\cond */
5259
// Arguments for speed feature pruning of inter mode search
5260
typedef struct {
5261
  int *skip_motion_mode;
5262
  mode_skip_mask_t *mode_skip_mask;
5263
  InterModeSearchState *search_state;
5264
  int skip_ref_frame_mask;
5265
  int reach_first_comp_mode;
5266
  int mode_thresh_mul_fact;
5267
  int num_single_modes_processed;
5268
  int prune_cpd_using_sr_stats_ready;
5269
} InterModeSFArgs;
5270
/*!\endcond */
5271
5272
static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
5273
                           int64_t *ref_frame_rd, int midx,
5274
0
                           InterModeSFArgs *args, int is_low_temp_var) {
5275
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5276
0
  MACROBLOCKD *const xd = &x->e_mbd;
5277
  // Get the actual prediction mode we are trying in this iteration
5278
0
  const THR_MODES mode_enum = av1_default_mode_order[midx];
5279
0
  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5280
0
  const PREDICTION_MODE this_mode = mode_def->mode;
5281
0
  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5282
0
  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5283
0
  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5284
0
  const int comp_pred = second_ref_frame > INTRA_FRAME;
5285
5286
0
  if (ref_frame == INTRA_FRAME) return 1;
5287
5288
0
  const FRAME_UPDATE_TYPE update_type =
5289
0
      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5290
0
  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5291
0
      comp_pred) {
5292
0
    return 1;
5293
0
  }
5294
5295
  // This is for real time encoding.
5296
0
  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5297
0
      this_mode != NEARESTMV)
5298
0
    return 1;
5299
5300
  // Check if this mode should be skipped because it is incompatible with the
5301
  // current frame
5302
0
  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5303
0
    return 1;
5304
0
  const int ret = inter_mode_search_order_independent_skip(
5305
0
      cpi, x, args->mode_skip_mask, args->search_state,
5306
0
      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5307
0
  if (ret == 1) return 1;
5308
0
  *(args->skip_motion_mode) = (ret == 2);
5309
5310
  // We've reached the first compound prediction mode, get stats from the
5311
  // single reference predictors to help with pruning.
5312
  // Disable this pruning logic if interpolation filter search was skipped for
5313
  // single prediction modes as it can result in aggressive pruning of compound
5314
  // prediction modes due to the absence of modelled_rd populated by
5315
  // av1_interpolation_filter_search().
5316
  // TODO(Remya): Check the impact of the sf
5317
  // 'prune_comp_search_by_single_result' if compound prediction modes are
5318
  // enabled in future for REALTIME encode.
5319
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5320
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5321
0
      args->reach_first_comp_mode == 0) {
5322
0
    analyze_single_states(cpi, args->search_state);
5323
0
    args->reach_first_comp_mode = 1;
5324
0
  }
5325
5326
  // Prune aggressively when best mode is skippable.
5327
0
  int mul_fact = args->search_state->best_mode_skippable
5328
0
                     ? args->mode_thresh_mul_fact
5329
0
                     : (1 << MODE_THRESH_QBITS);
5330
0
  int64_t mode_threshold =
5331
0
      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5332
0
      MODE_THRESH_QBITS;
5333
5334
0
  if (args->search_state->best_rd < mode_threshold) return 1;
5335
5336
  // Skip this compound mode based on the RD results from the single prediction
5337
  // modes
5338
0
  if (!sf->interp_sf.skip_interp_filter_search &&
5339
0
      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5340
0
    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5341
0
                                       ref_frame, second_ref_frame, x))
5342
0
      return 1;
5343
0
  }
5344
5345
0
  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5346
    // After we done with single reference modes, find the 2nd best RD
5347
    // for a reference frame. Only search compound modes that have a reference
5348
    // frame at least as good as the 2nd best.
5349
0
    if (!args->prune_cpd_using_sr_stats_ready &&
5350
0
        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5351
0
      find_top_ref(ref_frame_rd);
5352
0
      args->prune_cpd_using_sr_stats_ready = 1;
5353
0
    }
5354
0
    if (args->prune_cpd_using_sr_stats_ready &&
5355
0
        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5356
0
      return 1;
5357
0
  }
5358
5359
  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5360
0
  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5361
0
      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5362
0
    return 1;
5363
0
  }
5364
5365
0
  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5366
0
    if (compound_skip_using_neighbor_refs(
5367
0
            xd, this_mode, ref_frames,
5368
0
            sf->inter_sf.prune_ext_comp_using_neighbors))
5369
0
      return 1;
5370
0
  }
5371
5372
0
  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5373
0
    if (skip_compound_using_best_single_mode_ref(
5374
0
            this_mode, ref_frames, args->search_state->best_single_mode,
5375
0
            sf->inter_sf.prune_comp_using_best_single_mode_ref))
5376
0
      return 1;
5377
0
  }
5378
5379
0
  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5380
0
    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5381
0
    if (skip_nearest_near_mv_using_refmv_weight(
5382
0
            x, this_mode, ref_frame_type,
5383
0
            args->search_state->best_mbmode.mode)) {
5384
      // Ensure the mode is pruned only when the current block has obtained a
5385
      // valid inter mode.
5386
0
      assert(is_inter_mode(args->search_state->best_mbmode.mode));
5387
0
      return 1;
5388
0
    }
5389
0
  }
5390
5391
0
  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5392
0
      ref_frame == GOLDEN_FRAME && !comp_pred) {
5393
0
    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5394
0
    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5395
0
        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5396
0
      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5397
0
        return 1;
5398
0
    }
5399
0
  }
5400
5401
0
  return 0;
5402
0
}
5403
5404
static void record_best_compound(REFERENCE_MODE reference_mode,
5405
                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
5406
                                 InterModeSearchState *search_state,
5407
0
                                 int compmode_cost) {
5408
0
  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5409
5410
0
  if (reference_mode == REFERENCE_MODE_SELECT) {
5411
0
    single_rate = rd_stats->rate - compmode_cost;
5412
0
    hybrid_rate = rd_stats->rate;
5413
0
  } else {
5414
0
    single_rate = rd_stats->rate;
5415
0
    hybrid_rate = rd_stats->rate + compmode_cost;
5416
0
  }
5417
5418
0
  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5419
0
  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5420
5421
0
  if (!comp_pred) {
5422
0
    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5423
0
      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5424
0
  } else {
5425
0
    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5426
0
      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5427
0
  }
5428
0
  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5429
0
    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5430
0
}
5431
5432
// Does a transform search over a list of the best inter mode candidates.
5433
// This is called if the original mode search computed an RD estimate
5434
// for the transform search rather than doing a full search.
5435
static void tx_search_best_inter_candidates(
5436
    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5437
    int64_t best_rd_so_far, BLOCK_SIZE bsize,
5438
    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5439
    InterModeSearchState *search_state, RD_STATS *rd_cost,
5440
0
    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5441
0
  AV1_COMMON *const cm = &cpi->common;
5442
0
  MACROBLOCKD *const xd = &x->e_mbd;
5443
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5444
0
  const ModeCosts *mode_costs = &x->mode_costs;
5445
0
  const int num_planes = av1_num_planes(cm);
5446
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5447
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5448
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
5449
0
  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5450
0
  search_state->best_rd = best_rd_so_far;
5451
0
  search_state->best_mode_index = THR_INVALID;
5452
  // Initialize best mode stats for winner mode processing
5453
0
  x->winner_mode_count = 0;
5454
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5455
0
                          NULL, bsize, best_rd_so_far,
5456
0
                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5457
0
  inter_modes_info->num =
5458
0
      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5459
0
          ? inter_modes_info->num
5460
0
          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5461
0
  const int64_t top_est_rd =
5462
0
      inter_modes_info->num > 0
5463
0
          ? inter_modes_info
5464
0
                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5465
0
          : INT64_MAX;
5466
0
  *yrd = INT64_MAX;
5467
0
  int64_t best_rd_in_this_partition = INT64_MAX;
5468
0
  int num_inter_mode_cands = inter_modes_info->num;
5469
0
  int newmv_mode_evaled = 0;
5470
0
  int max_allowed_cands = INT_MAX;
5471
0
  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5472
    // The bound on the no. of inter mode candidates, beyond which the
5473
    // candidates are limited if a newmv mode got evaluated, is set as
5474
    // max_allowed_cands + 1.
5475
0
    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5476
0
    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5477
0
    max_allowed_cands =
5478
0
        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5479
0
  }
5480
5481
0
  int num_mode_thresh = INT_MAX;
5482
0
  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5483
    // Bound the no. of transform searches per prediction mode beyond a
5484
    // threshold.
5485
0
    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5486
0
    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5487
0
    num_mode_thresh =
5488
0
        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5489
0
  }
5490
5491
0
  int num_tx_cands = 0;
5492
0
  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5493
  // Iterate over best inter mode candidates and perform tx search
5494
0
  for (int j = 0; j < num_inter_mode_cands; ++j) {
5495
0
    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5496
0
    *mbmi = inter_modes_info->mbmi_arr[data_idx];
5497
0
    const PREDICTION_MODE prediction_mode = mbmi->mode;
5498
0
    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5499
0
    if (curr_est_rd * 0.80 > top_est_rd) break;
5500
5501
0
    if (num_tx_cands > num_mode_thresh) {
5502
0
      if ((prediction_mode != NEARESTMV &&
5503
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5504
0
          (prediction_mode == NEARESTMV &&
5505
0
           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5506
0
        continue;
5507
0
    }
5508
5509
0
    txfm_info->skip_txfm = 0;
5510
0
    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5511
5512
    // Select prediction reference frames.
5513
0
    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5514
0
    for (int i = 0; i < num_planes; i++) {
5515
0
      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5516
0
      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5517
0
    }
5518
5519
0
    bool is_predictor_built = false;
5520
5521
    // Initialize RD stats
5522
0
    RD_STATS rd_stats;
5523
0
    RD_STATS rd_stats_y;
5524
0
    RD_STATS rd_stats_uv;
5525
0
    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5526
0
    int64_t skip_rd = INT64_MAX;
5527
0
    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5528
0
        cm->seq_params->enable_masked_compound,
5529
0
        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5530
0
        /*eval_motion_mode=*/0);
5531
0
    if (txfm_rd_gate_level) {
5532
      // Check if the mode is good enough based on skip RD
5533
0
      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5534
0
      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5535
0
      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5536
0
                                      skip_rd, txfm_rd_gate_level, 0);
5537
0
      if (!eval_txfm) continue;
5538
0
    }
5539
5540
    // Build the prediction for this mode
5541
0
    if (!is_predictor_built) {
5542
0
      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5543
0
                                    av1_num_planes(cm) - 1);
5544
0
    }
5545
0
    if (mbmi->motion_mode == OBMC_CAUSAL) {
5546
0
      av1_build_obmc_inter_predictors_sb(cm, xd);
5547
0
    }
5548
5549
0
    num_tx_cands++;
5550
0
    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5551
0
    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5552
0
    int64_t this_yrd = INT64_MAX;
5553
    // Do the transform search
5554
0
    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5555
0
                         mode_rate, search_state->best_rd)) {
5556
0
      continue;
5557
0
    } else {
5558
0
      const int y_rate =
5559
0
          rd_stats.skip_txfm
5560
0
              ? mode_costs->skip_txfm_cost[skip_ctx][1]
5561
0
              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5562
0
      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5563
5564
0
      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5565
0
        inter_mode_data_push(
5566
0
            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5567
0
            rd_stats_y.rate + rd_stats_uv.rate +
5568
0
                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5569
0
      }
5570
0
    }
5571
5572
0
    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5573
5574
0
    const THR_MODES mode_enum = get_prediction_mode_idx(
5575
0
        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5576
5577
    // Collect mode stats for multiwinner mode processing
5578
0
    const int txfm_search_done = 1;
5579
0
    store_winner_mode_stats(
5580
0
        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5581
0
        NULL, bsize, rd_stats.rdcost,
5582
0
        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5583
5584
0
    int64_t best_scaled_rd = search_state->best_rd;
5585
0
    int64_t this_scaled_rd = rd_stats.rdcost;
5586
0
    increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
5587
0
                          &this_scaled_rd,
5588
0
                          cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
5589
0
    if (this_scaled_rd < best_rd_in_this_partition) {
5590
0
      best_rd_in_this_partition = rd_stats.rdcost;
5591
0
      *yrd = this_yrd;
5592
0
    }
5593
5594
0
    if (this_scaled_rd < best_scaled_rd) {
5595
0
      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5596
0
                          &rd_stats_uv, mode_enum, x, txfm_search_done);
5597
0
      search_state->best_skip_rd[0] = skip_rd;
5598
      // Limit the total number of modes to be evaluated if the first is valid
5599
      // and transform skip or compound
5600
0
      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5601
0
        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5602
          // Evaluate more candidates at high quantizers where occurrence of
5603
          // transform skip is high.
5604
0
          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5605
0
          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5606
0
          num_inter_mode_cands =
5607
0
              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5608
0
        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5609
0
          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5610
          // Evaluate more candidates at low quantizers where occurrence of
5611
          // single reference mode is high.
5612
0
          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5613
0
                                                { 10, 7, 5, 3 } };
5614
0
          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5615
0
          num_inter_mode_cands = AOMMIN(
5616
0
              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5617
0
        }
5618
0
      }
5619
0
    }
5620
    // If the number of candidates evaluated exceeds max_allowed_cands, break if
5621
    // a newmv mode was evaluated already.
5622
0
    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5623
0
  }
5624
0
}
5625
5626
// Indicates number of winner simple translation modes to be used
5627
static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5628
5629
// Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5630
// speed feature. This list consists of modes that have only searched
5631
// SIMPLE_TRANSLATION. The final list will be used to search other motion
5632
// modes after the initial RD search.
5633
static void handle_winner_cand(
5634
    MB_MODE_INFO *const mbmi,
5635
    motion_mode_best_st_candidate *best_motion_mode_cands,
5636
    int max_winner_motion_mode_cand, int64_t this_rd,
5637
0
    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5638
  // Number of current motion mode candidates in list
5639
0
  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5640
0
  int valid_motion_mode_cand_loc = num_motion_mode_cand;
5641
5642
  // find the best location to insert new motion mode candidate
5643
0
  for (int j = 0; j < num_motion_mode_cand; j++) {
5644
0
    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5645
0
      valid_motion_mode_cand_loc = j;
5646
0
      break;
5647
0
    }
5648
0
  }
5649
5650
  // Insert motion mode if location is found
5651
0
  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5652
0
    if (num_motion_mode_cand > 0 &&
5653
0
        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5654
0
      memmove(
5655
0
          &best_motion_mode_cands
5656
0
               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5657
0
          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5658
0
          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5659
0
           valid_motion_mode_cand_loc) *
5660
0
              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5661
0
    motion_mode_cand->mbmi = *mbmi;
5662
0
    motion_mode_cand->rd_cost = this_rd;
5663
0
    motion_mode_cand->skip_motion_mode = skip_motion_mode;
5664
0
    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5665
0
        *motion_mode_cand;
5666
0
    best_motion_mode_cands->num_motion_mode_cand =
5667
0
        AOMMIN(max_winner_motion_mode_cand,
5668
0
               best_motion_mode_cands->num_motion_mode_cand + 1);
5669
0
  }
5670
0
}
5671
5672
/*!\brief Search intra modes in interframes
5673
 *
5674
 * \ingroup intra_mode_search
5675
 *
5676
 * This function searches for the best intra mode when the current frame is an
5677
 * interframe. This function however does *not* handle luma palette mode.
5678
 * Palette mode is currently handled by \ref av1_search_palette_mode.
5679
 *
5680
 * This function will first iterate through the luma mode candidates to find the
5681
 * best luma intra mode. Once the best luma mode it's found, it will then search
5682
 * for the best chroma mode. Because palette mode is currently not handled by
5683
 * here, a cache of uv mode is stored in
5684
 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5685
 * av1_search_palette_mode.
5686
 *
5687
 * \param[in,out] search_state      Struct keep track of the prediction mode
5688
 *                                  search state in interframe.
5689
 *
5690
 * \param[in]     cpi               Top-level encoder structure.
5691
 * \param[in,out] x                 Pointer to struct holding all the data for
5692
 *                                  the current prediction block.
5693
 * \param[out]    rd_cost           Stores the best rd_cost among all the
5694
 *                                  prediction modes searched.
5695
 * \param[in]     bsize             Current block size.
5696
 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5697
 *                                  copy the tx_type and txfm_skip arrays.
5698
 *                                  for only the Y plane.
5699
 * \param[in]     sf_args           Stores the list of intra mode candidates
5700
 *                                  to be searched.
5701
 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5702
 *                                      current ref frame is an intra frame.
5703
 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5704
 *                                  terminate chroma intra mode search.
5705
 *
5706
 * \remark If a new best mode is found, search_state and rd_costs are updated
5707
 * correspondingly. While x is also modified, it is only used as a temporary
5708
 * buffer, and the final decisions are stored in search_state.
5709
 */
5710
static inline void search_intra_modes_in_interframe(
5711
    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5712
    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5713
    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5714
0
    int64_t yrd_threshold) {
5715
0
  const AV1_COMMON *const cm = &cpi->common;
5716
0
  const SPEED_FEATURES *const sf = &cpi->sf;
5717
0
  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5718
0
  MACROBLOCKD *const xd = &x->e_mbd;
5719
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
5720
0
  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5721
5722
0
  int is_best_y_mode_intra = 0;
5723
0
  RD_STATS best_intra_rd_stats_y;
5724
0
  int64_t best_rd_y = INT64_MAX;
5725
0
  int best_mode_cost_y = -1;
5726
0
  MB_MODE_INFO best_mbmi = *xd->mi[0];
5727
0
  THR_MODES best_mode_enum = THR_INVALID;
5728
0
  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5729
0
  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5730
0
  const int num_4x4 = bsize_to_num_blk(bsize);
5731
5732
  // Performs luma search
5733
0
  int64_t best_model_rd = INT64_MAX;
5734
0
  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5735
0
  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5736
0
    top_intra_model_rd[i] = INT64_MAX;
5737
0
  }
5738
5739
0
  if (cpi->oxcf.algo_cfg.sharpness) {
5740
0
    int bh = mi_size_high[bsize];
5741
0
    int bw = mi_size_wide[bsize];
5742
0
    if (bh > 4 || bw > 4) return;
5743
0
  }
5744
5745
0
  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5746
0
    if (sf->intra_sf.skip_intra_in_interframe &&
5747
0
        search_state->intra_search_state.skip_intra_modes)
5748
0
      break;
5749
0
    set_y_mode_and_delta_angle(
5750
0
        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5751
0
    assert(mbmi->mode < INTRA_MODE_END);
5752
5753
    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5754
0
    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5755
0
      continue;
5756
5757
0
    const THR_MODES mode_enum =
5758
0
        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5759
0
    if ((!intra_mode_cfg->enable_smooth_intra ||
5760
0
         cpi->sf.intra_sf.disable_smooth_intra) &&
5761
0
        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5762
0
         mbmi->mode == SMOOTH_V_PRED))
5763
0
      continue;
5764
0
    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5765
0
      continue;
5766
0
    if (av1_is_directional_mode(mbmi->mode) &&
5767
0
        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5768
0
        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5769
0
      continue;
5770
0
    const PREDICTION_MODE this_mode = mbmi->mode;
5771
5772
0
    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5773
0
    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5774
0
    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5775
0
    x->txfm_search_info.skip_txfm = 0;
5776
5777
0
    if (this_mode != DC_PRED) {
5778
      // Only search the oblique modes if the best so far is
5779
      // one of the neighboring directional modes
5780
0
      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5781
0
          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5782
0
        if (search_state->best_mode_index != THR_INVALID &&
5783
0
            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5784
0
          continue;
5785
0
      }
5786
0
      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5787
0
        if (conditional_skipintra(
5788
0
                this_mode, search_state->intra_search_state.best_intra_mode))
5789
0
          continue;
5790
0
      }
5791
0
    }
5792
5793
0
    RD_STATS intra_rd_stats_y;
5794
0
    int mode_cost_y;
5795
0
    int64_t intra_rd_y = INT64_MAX;
5796
0
    const int is_luma_result_valid = av1_handle_intra_y_mode(
5797
0
        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5798
0
        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5799
0
        &best_model_rd, top_intra_model_rd);
5800
5801
0
    if (intra_rd_y < INT64_MAX) {
5802
0
      adjust_cost(cpi, x, &intra_rd_y);
5803
0
    }
5804
5805
0
    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5806
0
      is_best_y_mode_intra = 1;
5807
0
      if (intra_rd_y < best_rd_y) {
5808
0
        best_intra_rd_stats_y = intra_rd_stats_y;
5809
0
        best_mode_cost_y = mode_cost_y;
5810
0
        best_rd_y = intra_rd_y;
5811
0
        best_mbmi = *mbmi;
5812
0
        best_mode_enum = mode_enum;
5813
0
        memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5814
0
               sizeof(best_blk_skip[0]) * num_4x4);
5815
0
        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5816
0
      }
5817
0
    }
5818
0
  }
5819
5820
0
  if (!is_best_y_mode_intra) {
5821
0
    return;
5822
0
  }
5823
5824
0
  assert(best_rd_y < INT64_MAX);
5825
5826
  // Restores the best luma mode
5827
0
  *mbmi = best_mbmi;
5828
0
  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5829
0
         sizeof(best_blk_skip[0]) * num_4x4);
5830
0
  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5831
5832
  // Performs chroma search
5833
0
  RD_STATS intra_rd_stats, intra_rd_stats_uv;
5834
0
  av1_init_rd_stats(&intra_rd_stats);
5835
0
  av1_init_rd_stats(&intra_rd_stats_uv);
5836
0
  const int num_planes = av1_num_planes(cm);
5837
0
  if (num_planes > 1) {
5838
0
    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5839
0
        intra_search_state, cpi, x, bsize, &intra_rd_stats,
5840
0
        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5841
5842
0
    if (!intra_uv_mode_valid) {
5843
0
      return;
5844
0
    }
5845
0
  }
5846
5847
  // Merge the luma and chroma rd stats
5848
0
  assert(best_mode_cost_y >= 0);
5849
0
  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5850
0
  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5851
    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5852
    // in the tokenonly rate, but for intra blocks, tx_size is always coded
5853
    // (prediction granularity), so we account for it in the full rate,
5854
    // not the tokenonly rate.
5855
0
    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5856
0
  }
5857
5858
0
  const ModeCosts *mode_costs = &x->mode_costs;
5859
0
  const PREDICTION_MODE mode = mbmi->mode;
5860
0
  if (num_planes > 1 && xd->is_chroma_ref) {
5861
0
    const int uv_mode_cost =
5862
0
        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5863
0
    intra_rd_stats.rate +=
5864
0
        intra_rd_stats_uv.rate +
5865
0
        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5866
0
  }
5867
5868
  // Intra block is always coded as non-skip
5869
0
  intra_rd_stats.skip_txfm = 0;
5870
0
  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5871
  // Add in the cost of the no skip flag.
5872
0
  const int skip_ctx = av1_get_skip_txfm_context(xd);
5873
0
  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5874
  // Calculate the final RD estimate for this mode.
5875
0
  const int64_t this_rd =
5876
0
      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5877
  // Keep record of best intra rd
5878
0
  if (this_rd < search_state->best_intra_rd) {
5879
0
    search_state->best_intra_rd = this_rd;
5880
0
    intra_search_state->best_intra_mode = mode;
5881
0
  }
5882
5883
0
  for (int i = 0; i < REFERENCE_MODES; ++i) {
5884
0
    search_state->best_pred_rd[i] =
5885
0
        AOMMIN(search_state->best_pred_rd[i], this_rd);
5886
0
  }
5887
5888
0
  intra_rd_stats.rdcost = this_rd;
5889
5890
0
  adjust_rdcost(cpi, x, &intra_rd_stats);
5891
5892
  // Collect mode stats for multiwinner mode processing
5893
0
  const int txfm_search_done = 1;
5894
0
  store_winner_mode_stats(
5895
0
      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5896
0
      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5897
0
      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5898
0
  if (intra_rd_stats.rdcost < search_state->best_rd) {
5899
0
    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5900
0
                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
5901
0
                        best_mode_enum, x, txfm_search_done);
5902
0
  }
5903
0
}
5904
5905
#if !CONFIG_REALTIME_ONLY
5906
// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5907
// features in intra mode pruning.
5908
static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5909
                                                MACROBLOCK *x, BLOCK_SIZE bsize,
5910
                                                int mi_row, int mi_col,
5911
                                                int64_t *inter_cost,
5912
0
                                                int64_t *intra_cost) {
5913
0
  const AV1_COMMON *const cm = &cpi->common;
5914
  // Only consider full SB.
5915
0
  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5916
0
  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5917
0
  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5918
0
                  (block_size_high[sb_size] / tpl_bsize_1d);
5919
0
  SuperBlockEnc *sb_enc = &x->sb_enc;
5920
0
  if (sb_enc->tpl_data_count == len) {
5921
0
    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5922
0
    const int tpl_stride = sb_enc->tpl_stride;
5923
0
    const int tplw = mi_size_wide[tpl_bsize];
5924
0
    const int tplh = mi_size_high[tpl_bsize];
5925
0
    const int nw = mi_size_wide[bsize] / tplw;
5926
0
    const int nh = mi_size_high[bsize] / tplh;
5927
0
    if (nw >= 1 && nh >= 1) {
5928
0
      const int of_h = mi_row % mi_size_high[sb_size];
5929
0
      const int of_w = mi_col % mi_size_wide[sb_size];
5930
0
      const int start = of_h / tplh * tpl_stride + of_w / tplw;
5931
5932
0
      for (int k = 0; k < nh; k++) {
5933
0
        for (int l = 0; l < nw; l++) {
5934
0
          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5935
0
          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5936
0
        }
5937
0
      }
5938
0
      *inter_cost /= nw * nh;
5939
0
      *intra_cost /= nw * nh;
5940
0
    }
5941
0
  }
5942
0
}
5943
#endif  // !CONFIG_REALTIME_ONLY
5944
5945
// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5946
// intra mode search.
5947
static inline void skip_intra_modes_in_interframe(
5948
    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5949
    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5950
0
    int64_t inter_cost, int64_t intra_cost) {
5951
0
  MACROBLOCKD *const xd = &x->e_mbd;
5952
0
  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5953
0
  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5954
0
      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5955
0
    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5956
0
    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5957
0
    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5958
0
        x->source_variance > 128) {
5959
0
      search_state->intra_search_state.skip_intra_modes = 1;
5960
0
      return;
5961
0
    }
5962
0
  }
5963
5964
0
  const unsigned int src_var_thresh_intra_skip = 1;
5965
0
  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5966
0
  if (!(skip_intra_in_interframe &&
5967
0
        (x->source_variance > src_var_thresh_intra_skip)))
5968
0
    return;
5969
5970
  // Prune intra search based on best inter mode being transfrom skip.
5971
0
  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5972
0
    const int qindex_thresh[2] = { 200, MAXQ };
5973
0
    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5974
0
    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5975
0
        (x->qindex <= qindex_thresh[ind])) {
5976
0
      search_state->intra_search_state.skip_intra_modes = 1;
5977
0
      return;
5978
0
    } else if ((skip_intra_in_interframe >= 4) &&
5979
0
               (inter_cost < 0 || intra_cost < 0)) {
5980
0
      search_state->intra_search_state.skip_intra_modes = 1;
5981
0
      return;
5982
0
    }
5983
0
  }
5984
  // Use ML model to prune intra search.
5985
0
  if (inter_cost >= 0 && intra_cost >= 0) {
5986
0
    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5987
0
                                     ? &av1_intrap_nn_config
5988
0
                                     : &av1_intrap_hd_nn_config;
5989
0
    float nn_features[6];
5990
0
    float scores[2] = { 0.0f };
5991
5992
0
    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5993
0
    nn_features[1] = (float)mi_size_wide_log2[bsize];
5994
0
    nn_features[2] = (float)mi_size_high_log2[bsize];
5995
0
    nn_features[3] = (float)intra_cost;
5996
0
    nn_features[4] = (float)inter_cost;
5997
0
    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5998
0
    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5999
0
    nn_features[5] = (float)(ac_q_max / ac_q);
6000
6001
0
    av1_nn_predict(nn_features, nn_config, 1, scores);
6002
6003
    // For two parameters, the max prob returned from av1_nn_softmax equals
6004
    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
6005
    // calling of av1_nn_softmax.
6006
0
    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
6007
0
    assert(skip_intra_in_interframe <= 5);
6008
0
    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
6009
0
      search_state->intra_search_state.skip_intra_modes = 1;
6010
0
    }
6011
0
  }
6012
0
}
6013
6014
static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
6015
0
                                             int is_single_pred) {
6016
0
  const MODE encoding_mode = cpi->oxcf.mode;
6017
0
  if (encoding_mode == REALTIME) {
6018
0
    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
6019
0
            (cpi->sf.interp_sf.skip_interp_filter_search ||
6020
0
             cpi->sf.winner_mode_sf.winner_mode_ifs));
6021
0
  } else if (encoding_mode == GOOD) {
6022
    // Skip interpolation filter search for single prediction modes.
6023
0
    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
6024
0
  }
6025
0
  return false;
6026
0
}
6027
6028
static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
6029
0
                                     BLOCK_SIZE bsize) {
6030
0
  const AV1_COMMON *const cm = &cpi->common;
6031
0
  const SPEED_FEATURES *const sf = &cpi->sf;
6032
6033
0
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
6034
0
      !sf->rt_sf.short_circuit_low_temp_var ||
6035
0
      !sf->rt_sf.prune_inter_modes_using_temp_var) {
6036
0
    return 0;
6037
0
  }
6038
6039
0
  const int mi_row = x->e_mbd.mi_row;
6040
0
  const int mi_col = x->e_mbd.mi_col;
6041
0
  int is_low_temp_var = 0;
6042
6043
0
  if (cm->seq_params->sb_size == BLOCK_64X64)
6044
0
    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
6045
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6046
0
  else
6047
0
    is_low_temp_var = av1_get_force_skip_low_temp_var(
6048
0
        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
6049
6050
0
  return is_low_temp_var;
6051
0
}
6052
6053
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
6054
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
6055
                            struct macroblock *x, struct RD_STATS *rd_cost,
6056
                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
6057
0
                            int64_t best_rd_so_far) {
6058
0
  AV1_COMMON *const cm = &cpi->common;
6059
0
  const FeatureFlags *const features = &cm->features;
6060
0
  const int num_planes = av1_num_planes(cm);
6061
0
  const SPEED_FEATURES *const sf = &cpi->sf;
6062
0
  MACROBLOCKD *const xd = &x->e_mbd;
6063
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6064
0
  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
6065
0
  int i;
6066
0
  const ModeCosts *mode_costs = &x->mode_costs;
6067
0
  const int *comp_inter_cost =
6068
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6069
6070
0
  InterModeSearchState search_state;
6071
0
  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
6072
0
  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
6073
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
6074
0
    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
6075
0
  };
6076
0
  HandleInterModeArgs args = { { NULL },
6077
0
                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
6078
0
                               { NULL },
6079
0
                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
6080
0
                                 MAX_SB_SIZE >> 1 },
6081
0
                               NULL,
6082
0
                               NULL,
6083
0
                               NULL,
6084
0
                               search_state.modelled_rd,
6085
0
                               INT_MAX,
6086
0
                               INT_MAX,
6087
0
                               search_state.simple_rd,
6088
0
                               0,
6089
0
                               false,
6090
0
                               interintra_modes,
6091
0
                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
6092
0
                               { { 0, 0 } },
6093
0
                               { 0 },
6094
0
                               0,
6095
0
                               0,
6096
0
                               -1,
6097
0
                               -1,
6098
0
                               -1,
6099
0
                               { 0 },
6100
0
                               { 0 },
6101
0
                               UINT_MAX };
6102
  // Currently, is_low_temp_var is used in real time encoding.
6103
0
  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
6104
6105
0
  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
6106
  // Indicates the appropriate number of simple translation winner modes for
6107
  // exhaustive motion mode evaluation
6108
0
  const int max_winner_motion_mode_cand =
6109
0
      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
6110
0
  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
6111
0
  motion_mode_candidate motion_mode_cand;
6112
0
  motion_mode_best_st_candidate best_motion_mode_cands;
6113
  // Initializing the number of motion mode candidates to zero.
6114
0
  best_motion_mode_cands.num_motion_mode_cand = 0;
6115
0
  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
6116
0
    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
6117
6118
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6119
6120
0
  av1_invalid_rd_stats(rd_cost);
6121
6122
0
  for (i = 0; i < REF_FRAMES; ++i) {
6123
0
    x->warp_sample_info[i].num = -1;
6124
0
  }
6125
6126
  // Ref frames that are selected by square partition blocks.
6127
0
  int picked_ref_frames_mask = 0;
6128
0
  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
6129
0
      mbmi->partition != PARTITION_NONE) {
6130
    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
6131
    // partition blocks. prune_ref_frame_for_rect_partitions >=2
6132
    // implies prune for vert, horiz and extended partition blocks.
6133
0
    if ((mbmi->partition != PARTITION_VERT &&
6134
0
         mbmi->partition != PARTITION_HORZ) ||
6135
0
        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
6136
0
      picked_ref_frames_mask =
6137
0
          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
6138
0
    }
6139
0
  }
6140
6141
#if CONFIG_COLLECT_COMPONENT_TIMING
6142
  start_timing(cpi, set_params_rd_pick_inter_mode_time);
6143
#endif
6144
  // Skip ref frames that never selected by square blocks.
6145
0
  const int skip_ref_frame_mask =
6146
0
      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
6147
0
  mode_skip_mask_t mode_skip_mask;
6148
0
  unsigned int ref_costs_single[REF_FRAMES];
6149
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6150
0
  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
6151
  // init params, set frame modes, speed features
6152
0
  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
6153
0
                                skip_ref_frame_mask, ref_costs_single,
6154
0
                                ref_costs_comp, yv12_mb);
6155
#if CONFIG_COLLECT_COMPONENT_TIMING
6156
  end_timing(cpi, set_params_rd_pick_inter_mode_time);
6157
#endif
6158
6159
0
  int64_t best_est_rd = INT64_MAX;
6160
0
  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
6161
  // If do_tx_search is 0, only estimated RD should be computed.
6162
  // If do_tx_search is 1, all modes have TX search performed.
6163
0
  const int do_tx_search =
6164
0
      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
6165
0
        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
6166
0
         num_pels_log2_lookup[bsize] > 8));
6167
0
  InterModesInfo *inter_modes_info = x->inter_modes_info;
6168
0
  inter_modes_info->num = 0;
6169
6170
  // Temporary buffers used by handle_inter_mode().
6171
0
  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
6172
6173
  // The best RD found for the reference frame, among single reference modes.
6174
  // Note that the 0-th element will contain a cut-off that is later used
6175
  // to determine if we should skip a compound mode.
6176
0
  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
6177
0
                                       INT64_MAX, INT64_MAX, INT64_MAX,
6178
0
                                       INT64_MAX, INT64_MAX };
6179
6180
  // Prepared stats used later to check if we could skip intra mode eval.
6181
0
  int64_t inter_cost = -1;
6182
0
  int64_t intra_cost = -1;
6183
  // Need to tweak the threshold for hdres speed 0 & 1.
6184
0
  const int mi_row = xd->mi_row;
6185
0
  const int mi_col = xd->mi_col;
6186
6187
  // Obtain the relevant tpl stats for pruning inter modes
6188
0
  PruneInfoFromTpl inter_cost_info_from_tpl;
6189
0
#if !CONFIG_REALTIME_ONLY
6190
0
  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
6191
    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
6192
    // prune_ref_by_selective_ref_frame()
6193
    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
6194
    // prune_ref_by_selective_ref_frame()
6195
    // Populating valid_refs[idx] = 1 ensures that
6196
    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
6197
    // pruned ref frame.
6198
0
    int valid_refs[INTER_REFS_PER_FRAME];
6199
0
    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
6200
0
      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
6201
0
      valid_refs[frame - 1] =
6202
0
          x->tpl_keep_ref_frame[frame] ||
6203
0
          !prune_ref_by_selective_ref_frame(
6204
0
              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
6205
0
    }
6206
0
    av1_zero(inter_cost_info_from_tpl);
6207
0
    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
6208
0
                              &inter_cost_info_from_tpl);
6209
0
  }
6210
6211
0
  const int do_pruning =
6212
0
      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
6213
0
  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
6214
0
      cpi->oxcf.algo_cfg.enable_tpl_model)
6215
0
    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
6216
0
                                 &intra_cost);
6217
0
#endif  // !CONFIG_REALTIME_ONLY
6218
6219
  // Initialize best mode stats for winner mode processing.
6220
0
  const int max_winner_mode_count =
6221
0
      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
6222
0
  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
6223
0
  x->winner_mode_count = 0;
6224
0
  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
6225
0
                          NULL, bsize, best_rd_so_far,
6226
0
                          sf->winner_mode_sf.multi_winner_mode_type, 0);
6227
6228
0
  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
6229
0
  if (sf->inter_sf.prune_inter_modes_if_skippable) {
6230
    // Higher multiplication factor values for lower quantizers.
6231
0
    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
6232
0
  }
6233
6234
  // Initialize arguments for mode loop speed features
6235
0
  InterModeSFArgs sf_args = { &args.skip_motion_mode,
6236
0
                              &mode_skip_mask,
6237
0
                              &search_state,
6238
0
                              skip_ref_frame_mask,
6239
0
                              0,
6240
0
                              mode_thresh_mul_fact,
6241
0
                              0,
6242
0
                              0 };
6243
0
  int64_t best_inter_yrd = INT64_MAX;
6244
6245
  // This is the main loop of this function. It loops over all possible inter
6246
  // modes and calls handle_inter_mode() to compute the RD for each.
6247
  // Here midx is just an iterator index that should not be used by itself
6248
  // except to keep track of the number of modes searched. It should be used
6249
  // with av1_default_mode_order to get the enum that defines the mode, which
6250
  // can be used with av1_mode_defs to get the prediction mode and the ref
6251
  // frames.
6252
  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
6253
  // good speedup for real time case. If we decide to use compound mode in real
6254
  // time, maybe we can modify av1_default_mode_order table.
6255
0
  THR_MODES mode_start = THR_INTER_MODE_START;
6256
0
  THR_MODES mode_end = THR_INTER_MODE_END;
6257
0
  const CurrentFrame *const current_frame = &cm->current_frame;
6258
0
  if (current_frame->reference_mode == SINGLE_REFERENCE) {
6259
0
    mode_start = SINGLE_REF_MODE_START;
6260
0
    mode_end = SINGLE_REF_MODE_END;
6261
0
  }
6262
6263
0
  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
6264
    // Get the actual prediction mode we are trying in this iteration
6265
0
    const THR_MODES mode_enum = av1_default_mode_order[midx];
6266
0
    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
6267
0
    const PREDICTION_MODE this_mode = mode_def->mode;
6268
0
    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
6269
6270
0
    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
6271
0
    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
6272
0
    const int is_single_pred =
6273
0
        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
6274
0
    const int comp_pred = second_ref_frame > INTRA_FRAME;
6275
6276
0
    init_mbmi(mbmi, this_mode, ref_frames, cm);
6277
6278
0
    txfm_info->skip_txfm = 0;
6279
0
    sf_args.num_single_modes_processed += is_single_pred;
6280
0
    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
6281
#if CONFIG_COLLECT_COMPONENT_TIMING
6282
    start_timing(cpi, skip_inter_mode_time);
6283
#endif
6284
    // Apply speed features to decide if this inter mode can be skipped
6285
0
    const int is_skip_inter_mode = skip_inter_mode(
6286
0
        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
6287
#if CONFIG_COLLECT_COMPONENT_TIMING
6288
    end_timing(cpi, skip_inter_mode_time);
6289
#endif
6290
0
    if (is_skip_inter_mode) continue;
6291
6292
    // Select prediction reference frames.
6293
0
    for (i = 0; i < num_planes; i++) {
6294
0
      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
6295
0
      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
6296
0
    }
6297
6298
0
    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
6299
0
    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
6300
0
    mbmi->filter_intra_mode_info.use_filter_intra = 0;
6301
0
    mbmi->ref_mv_idx = 0;
6302
6303
0
    const int64_t ref_best_rd = search_state.best_rd;
6304
0
    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
6305
0
    av1_init_rd_stats(&rd_stats);
6306
6307
0
    const int ref_frame_cost = comp_pred
6308
0
                                   ? ref_costs_comp[ref_frame][second_ref_frame]
6309
0
                                   : ref_costs_single[ref_frame];
6310
0
    const int compmode_cost =
6311
0
        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6312
0
    const int real_compmode_cost =
6313
0
        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6314
0
            ? compmode_cost
6315
0
            : 0;
6316
    // Point to variables that are maintained between loop iterations
6317
0
    args.single_newmv = search_state.single_newmv;
6318
0
    args.single_newmv_rate = search_state.single_newmv_rate;
6319
0
    args.single_newmv_valid = search_state.single_newmv_valid;
6320
0
    args.single_comp_cost = real_compmode_cost;
6321
0
    args.ref_frame_cost = ref_frame_cost;
6322
0
    args.best_pred_sse = search_state.best_pred_sse;
6323
0
    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6324
0
    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6325
0
                           search_state.best_skip_rd[1] };
6326
0
    int64_t this_yrd = INT64_MAX;
6327
#if CONFIG_COLLECT_COMPONENT_TIMING
6328
    start_timing(cpi, handle_inter_mode_time);
6329
#endif
6330
0
    int64_t this_rd = handle_inter_mode(
6331
0
        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6332
0
        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6333
0
        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6334
0
        &this_yrd);
6335
#if CONFIG_COLLECT_COMPONENT_TIMING
6336
    end_timing(cpi, handle_inter_mode_time);
6337
#endif
6338
0
    if (current_frame->reference_mode != SINGLE_REFERENCE) {
6339
0
      if (!args.skip_ifs &&
6340
0
          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6341
0
          is_inter_singleref_mode(this_mode)) {
6342
0
        collect_single_states(x, &search_state, mbmi);
6343
0
      }
6344
6345
0
      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6346
0
          is_inter_singleref_mode(this_mode))
6347
0
        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6348
0
    }
6349
6350
0
    if (this_rd == INT64_MAX) continue;
6351
6352
0
    if (mbmi->skip_txfm) {
6353
0
      rd_stats_y.rate = 0;
6354
0
      rd_stats_uv.rate = 0;
6355
0
    }
6356
6357
0
    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6358
0
        this_rd < ref_frame_rd[ref_frame]) {
6359
0
      ref_frame_rd[ref_frame] = this_rd;
6360
0
    }
6361
6362
0
    adjust_cost(cpi, x, &this_rd);
6363
0
    adjust_rdcost(cpi, x, &rd_stats);
6364
6365
    // Did this mode help, i.e., is it the new best mode
6366
0
    if (this_rd < search_state.best_rd) {
6367
0
      assert(IMPLIES(comp_pred,
6368
0
                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
6369
0
      search_state.best_pred_sse = x->pred_sse[ref_frame];
6370
0
      best_inter_yrd = this_yrd;
6371
0
      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6372
0
                          &rd_stats_uv, mode_enum, x, do_tx_search);
6373
0
      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6374
      // skip_rd[0] is the best total rd for a skip mode so far.
6375
      // skip_rd[1] is the best total rd for a skip mode so far in luma.
6376
      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6377
      // When do_tx_search = 0, skip_rd[1] is updated.
6378
0
      search_state.best_skip_rd[1] = skip_rd[1];
6379
0
    }
6380
0
    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6381
      // Add this mode to motion mode candidate list for motion mode search
6382
      // if using motion_mode_for_winner_cand speed feature
6383
0
      handle_winner_cand(mbmi, &best_motion_mode_cands,
6384
0
                         max_winner_motion_mode_cand, this_rd,
6385
0
                         &motion_mode_cand, args.skip_motion_mode);
6386
0
    }
6387
6388
    /* keep record of best compound/single-only prediction */
6389
0
    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6390
0
                         x->rdmult, &search_state, compmode_cost);
6391
0
  }
6392
6393
#if CONFIG_COLLECT_COMPONENT_TIMING
6394
  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6395
#endif
6396
0
  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6397
    // For the single ref winner candidates, evaluate other motion modes (non
6398
    // simple translation).
6399
0
    evaluate_motion_mode_for_winner_candidates(
6400
0
        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6401
0
        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6402
0
        &search_state, &best_inter_yrd);
6403
0
  }
6404
#if CONFIG_COLLECT_COMPONENT_TIMING
6405
  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6406
#endif
6407
6408
#if CONFIG_COLLECT_COMPONENT_TIMING
6409
  start_timing(cpi, do_tx_search_time);
6410
#endif
6411
0
  if (do_tx_search != 1) {
6412
    // A full tx search has not yet been done, do tx search for
6413
    // top mode candidates
6414
0
    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6415
0
                                    yv12_mb, mi_row, mi_col, &search_state,
6416
0
                                    rd_cost, ctx, &best_inter_yrd);
6417
0
  }
6418
#if CONFIG_COLLECT_COMPONENT_TIMING
6419
  end_timing(cpi, do_tx_search_time);
6420
#endif
6421
6422
#if CONFIG_COLLECT_COMPONENT_TIMING
6423
  start_timing(cpi, handle_intra_mode_time);
6424
#endif
6425
  // Gate intra mode evaluation if best of inter is skip except when source
6426
  // variance is extremely low and also based on max intra bsize.
6427
0
  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6428
0
                                 intra_cost);
6429
6430
0
  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6431
0
  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6432
0
                                   &sf_args, intra_ref_frame_cost,
6433
0
                                   best_inter_yrd);
6434
#if CONFIG_COLLECT_COMPONENT_TIMING
6435
  end_timing(cpi, handle_intra_mode_time);
6436
#endif
6437
6438
#if CONFIG_COLLECT_COMPONENT_TIMING
6439
  start_timing(cpi, refine_winner_mode_tx_time);
6440
#endif
6441
0
  int winner_mode_count =
6442
0
      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6443
  // In effect only when fast tx search speed features are enabled.
6444
0
  refine_winner_mode_tx(
6445
0
      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6446
0
      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6447
0
      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6448
#if CONFIG_COLLECT_COMPONENT_TIMING
6449
  end_timing(cpi, refine_winner_mode_tx_time);
6450
#endif
6451
6452
  // Initialize default mode evaluation params
6453
0
  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6454
6455
  // Only try palette mode when the best mode so far is an intra mode.
6456
0
  const int try_palette =
6457
0
      cpi->oxcf.tool_cfg.enable_palette &&
6458
0
      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6459
0
      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6460
0
  RD_STATS this_rd_cost;
6461
0
  int this_skippable = 0;
6462
0
  if (try_palette) {
6463
#if CONFIG_COLLECT_COMPONENT_TIMING
6464
    start_timing(cpi, av1_search_palette_mode_time);
6465
#endif
6466
0
    this_skippable = av1_search_palette_mode(
6467
0
        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6468
0
        ctx, &this_rd_cost, search_state.best_rd);
6469
#if CONFIG_COLLECT_COMPONENT_TIMING
6470
    end_timing(cpi, av1_search_palette_mode_time);
6471
#endif
6472
0
    if (this_rd_cost.rdcost < search_state.best_rd) {
6473
0
      search_state.best_mode_index = THR_DC;
6474
0
      mbmi->mv[0].as_int = 0;
6475
0
      rd_cost->rate = this_rd_cost.rate;
6476
0
      rd_cost->dist = this_rd_cost.dist;
6477
0
      rd_cost->rdcost = this_rd_cost.rdcost;
6478
0
      search_state.best_rd = rd_cost->rdcost;
6479
0
      search_state.best_mbmode = *mbmi;
6480
0
      search_state.best_skip2 = 0;
6481
0
      search_state.best_mode_skippable = this_skippable;
6482
0
      memcpy(ctx->blk_skip, txfm_info->blk_skip,
6483
0
             sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6484
0
      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6485
0
    }
6486
0
  }
6487
6488
0
  search_state.best_mbmode.skip_mode = 0;
6489
0
  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6490
0
      cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) {
6491
0
    const struct segmentation *const seg = &cm->seg;
6492
0
    unsigned char segment_id = mbmi->segment_id;
6493
0
    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6494
0
      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6495
0
    }
6496
0
  }
6497
6498
  // Make sure that the ref_mv_idx is only nonzero when we're
6499
  // using a mode which can support ref_mv_idx
6500
0
  if (search_state.best_mbmode.ref_mv_idx != 0 &&
6501
0
      !(search_state.best_mbmode.mode == NEWMV ||
6502
0
        search_state.best_mbmode.mode == NEW_NEWMV ||
6503
0
        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6504
0
    search_state.best_mbmode.ref_mv_idx = 0;
6505
0
  }
6506
6507
0
  if (search_state.best_mode_index == THR_INVALID ||
6508
0
      search_state.best_rd >= best_rd_so_far) {
6509
0
    rd_cost->rate = INT_MAX;
6510
0
    rd_cost->rdcost = INT64_MAX;
6511
0
    return;
6512
0
  }
6513
6514
0
  const InterpFilter interp_filter = features->interp_filter;
6515
0
  assert((interp_filter == SWITCHABLE) ||
6516
0
         (interp_filter ==
6517
0
          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6518
0
         !is_inter_block(&search_state.best_mbmode));
6519
0
  assert((interp_filter == SWITCHABLE) ||
6520
0
         (interp_filter ==
6521
0
          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6522
0
         !is_inter_block(&search_state.best_mbmode));
6523
6524
0
  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6525
0
    av1_update_rd_thresh_fact(
6526
0
        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6527
0
        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6528
0
  }
6529
6530
  // macroblock modes
6531
0
  *mbmi = search_state.best_mbmode;
6532
0
  txfm_info->skip_txfm |= search_state.best_skip2;
6533
6534
  // Note: this section is needed since the mode may have been forced to
6535
  // GLOBALMV by the all-zero mode handling of ref-mv.
6536
0
  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6537
    // Correct the interp filters for GLOBALMV
6538
0
    if (is_nontrans_global_motion(xd, xd->mi[0])) {
6539
0
      int_interpfilters filters =
6540
0
          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6541
0
      assert(mbmi->interp_filters.as_int == filters.as_int);
6542
0
      (void)filters;
6543
0
    }
6544
0
  }
6545
6546
0
  txfm_info->skip_txfm |= search_state.best_mode_skippable;
6547
6548
0
  assert(search_state.best_mode_index != THR_INVALID);
6549
6550
#if CONFIG_INTERNAL_STATS
6551
  store_coding_context(x, ctx, search_state.best_mode_index,
6552
                       search_state.best_mode_skippable);
6553
#else
6554
0
  store_coding_context(x, ctx, search_state.best_mode_skippable);
6555
0
#endif  // CONFIG_INTERNAL_STATS
6556
6557
0
  if (mbmi->palette_mode_info.palette_size[1] > 0) {
6558
0
    assert(try_palette);
6559
0
    av1_restore_uv_color_map(cpi, x);
6560
0
  }
6561
0
}
6562
6563
void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6564
                                        TileDataEnc *tile_data, MACROBLOCK *x,
6565
                                        int mi_row, int mi_col,
6566
                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
6567
                                        PICK_MODE_CONTEXT *ctx,
6568
0
                                        int64_t best_rd_so_far) {
6569
0
  const AV1_COMMON *const cm = &cpi->common;
6570
0
  const FeatureFlags *const features = &cm->features;
6571
0
  MACROBLOCKD *const xd = &x->e_mbd;
6572
0
  MB_MODE_INFO *const mbmi = xd->mi[0];
6573
0
  unsigned char segment_id = mbmi->segment_id;
6574
0
  const int comp_pred = 0;
6575
0
  int i;
6576
0
  unsigned int ref_costs_single[REF_FRAMES];
6577
0
  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6578
0
  const ModeCosts *mode_costs = &x->mode_costs;
6579
0
  const int *comp_inter_cost =
6580
0
      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6581
0
  InterpFilter best_filter = SWITCHABLE;
6582
0
  int64_t this_rd = INT64_MAX;
6583
0
  int rate2 = 0;
6584
0
  const int64_t distortion2 = 0;
6585
0
  (void)mi_row;
6586
0
  (void)mi_col;
6587
0
  (void)tile_data;
6588
6589
0
  av1_collect_neighbors_ref_counts(xd);
6590
6591
0
  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6592
0
                           ref_costs_comp);
6593
6594
0
  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6595
0
  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6596
6597
0
  rd_cost->rate = INT_MAX;
6598
6599
0
  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6600
6601
0
  mbmi->palette_mode_info.palette_size[0] = 0;
6602
0
  mbmi->palette_mode_info.palette_size[1] = 0;
6603
0
  mbmi->filter_intra_mode_info.use_filter_intra = 0;
6604
0
  mbmi->mode = GLOBALMV;
6605
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6606
0
  mbmi->uv_mode = UV_DC_PRED;
6607
0
  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6608
0
    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6609
0
  else
6610
0
    mbmi->ref_frame[0] = LAST_FRAME;
6611
0
  mbmi->ref_frame[1] = NONE_FRAME;
6612
0
  mbmi->mv[0].as_int =
6613
0
      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6614
0
                           features->allow_high_precision_mv, bsize, mi_col,
6615
0
                           mi_row, features->cur_frame_force_integer_mv)
6616
0
          .as_int;
6617
0
  mbmi->tx_size = max_txsize_lookup[bsize];
6618
0
  x->txfm_search_info.skip_txfm = 1;
6619
6620
0
  mbmi->ref_mv_idx = 0;
6621
6622
0
  mbmi->motion_mode = SIMPLE_TRANSLATION;
6623
0
  av1_count_overlappable_neighbors(cm, xd);
6624
0
  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6625
0
    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6626
0
    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6627
    // Select the samples according to motion vector difference
6628
0
    if (mbmi->num_proj_ref > 1) {
6629
0
      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6630
0
                                             mbmi->num_proj_ref, bsize);
6631
0
    }
6632
0
  }
6633
6634
0
  const InterpFilter interp_filter = features->interp_filter;
6635
0
  set_default_interp_filters(mbmi, interp_filter);
6636
6637
0
  if (interp_filter != SWITCHABLE) {
6638
0
    best_filter = interp_filter;
6639
0
  } else {
6640
0
    best_filter = EIGHTTAP_REGULAR;
6641
0
    if (av1_is_interp_needed(xd)) {
6642
0
      int rs;
6643
0
      int best_rs = INT_MAX;
6644
0
      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6645
0
        mbmi->interp_filters = av1_broadcast_interp_filter(i);
6646
0
        rs = av1_get_switchable_rate(x, xd, interp_filter,
6647
0
                                     cm->seq_params->enable_dual_filter);
6648
0
        if (rs < best_rs) {
6649
0
          best_rs = rs;
6650
0
          best_filter = mbmi->interp_filters.as_filters.y_filter;
6651
0
        }
6652
0
      }
6653
0
    }
6654
0
  }
6655
  // Set the appropriate filter
6656
0
  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6657
0
  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6658
0
                                   cm->seq_params->enable_dual_filter);
6659
6660
0
  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6661
0
    rate2 += comp_inter_cost[comp_pred];
6662
6663
  // Estimate the reference frame signaling cost and add it
6664
  // to the rolling cost variable.
6665
0
  rate2 += ref_costs_single[LAST_FRAME];
6666
0
  this_rd = RDCOST(x->rdmult, rate2, distortion2);
6667
6668
0
  rd_cost->rate = rate2;
6669
0
  rd_cost->dist = distortion2;
6670
0
  rd_cost->rdcost = this_rd;
6671
6672
0
  if (this_rd >= best_rd_so_far) {
6673
0
    rd_cost->rate = INT_MAX;
6674
0
    rd_cost->rdcost = INT64_MAX;
6675
0
    return;
6676
0
  }
6677
6678
0
  assert((interp_filter == SWITCHABLE) ||
6679
0
         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6680
6681
0
  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6682
0
    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6683
0
                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6684
0
                              THR_GLOBALMV, THR_INTER_MODE_START,
6685
0
                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
6686
0
  }
6687
6688
#if CONFIG_INTERNAL_STATS
6689
  store_coding_context(x, ctx, THR_GLOBALMV, 0);
6690
#else
6691
0
  store_coding_context(x, ctx, 0);
6692
0
#endif  // CONFIG_INTERNAL_STATS
6693
0
}
6694
6695
/*!\cond */
6696
struct calc_target_weighted_pred_ctxt {
6697
  const OBMCBuffer *obmc_buffer;
6698
  const uint8_t *tmp;
6699
  int tmp_stride;
6700
  int overlap;
6701
};
6702
/*!\endcond */
6703
6704
static inline void calc_target_weighted_pred_above(
6705
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6706
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6707
0
  (void)nb_mi;
6708
0
  (void)num_planes;
6709
0
  (void)rel_mi_row;
6710
0
  (void)dir;
6711
6712
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6713
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6714
6715
0
  const int bw = xd->width << MI_SIZE_LOG2;
6716
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6717
6718
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6719
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6720
0
  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6721
0
  const int is_hbd = is_cur_buf_hbd(xd);
6722
6723
0
  if (!is_hbd) {
6724
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6725
0
      const uint8_t m0 = mask1d[row];
6726
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6727
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6728
0
        wsrc[col] = m1 * tmp[col];
6729
0
        mask[col] = m0;
6730
0
      }
6731
0
      wsrc += bw;
6732
0
      mask += bw;
6733
0
      tmp += ctxt->tmp_stride;
6734
0
    }
6735
0
  } else {
6736
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6737
6738
0
    for (int row = 0; row < ctxt->overlap; ++row) {
6739
0
      const uint8_t m0 = mask1d[row];
6740
0
      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6741
0
      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6742
0
        wsrc[col] = m1 * tmp16[col];
6743
0
        mask[col] = m0;
6744
0
      }
6745
0
      wsrc += bw;
6746
0
      mask += bw;
6747
0
      tmp16 += ctxt->tmp_stride;
6748
0
    }
6749
0
  }
6750
0
}
6751
6752
static inline void calc_target_weighted_pred_left(
6753
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6754
0
    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6755
0
  (void)nb_mi;
6756
0
  (void)num_planes;
6757
0
  (void)rel_mi_col;
6758
0
  (void)dir;
6759
6760
0
  struct calc_target_weighted_pred_ctxt *ctxt =
6761
0
      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6762
6763
0
  const int bw = xd->width << MI_SIZE_LOG2;
6764
0
  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6765
6766
0
  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6767
0
  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6768
0
  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6769
0
  const int is_hbd = is_cur_buf_hbd(xd);
6770
6771
0
  if (!is_hbd) {
6772
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6773
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6774
0
        const uint8_t m0 = mask1d[col];
6775
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6776
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6777
0
                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6778
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6779
0
      }
6780
0
      wsrc += bw;
6781
0
      mask += bw;
6782
0
      tmp += ctxt->tmp_stride;
6783
0
    }
6784
0
  } else {
6785
0
    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6786
6787
0
    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6788
0
      for (int col = 0; col < ctxt->overlap; ++col) {
6789
0
        const uint8_t m0 = mask1d[col];
6790
0
        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6791
0
        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6792
0
                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6793
0
        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6794
0
      }
6795
0
      wsrc += bw;
6796
0
      mask += bw;
6797
0
      tmp16 += ctxt->tmp_stride;
6798
0
    }
6799
0
  }
6800
0
}
6801
6802
// This function has a structure similar to av1_build_obmc_inter_prediction
6803
//
6804
// The OBMC predictor is computed as:
6805
//
6806
//  PObmc(x,y) =
6807
//    AOM_BLEND_A64(Mh(x),
6808
//                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6809
//                  PLeft(x, y))
6810
//
6811
// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6812
// rounding, this can be written as:
6813
//
6814
//  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6815
//    Mh(x) * Mv(y) * P(x,y) +
6816
//      Mh(x) * Cv(y) * Pabove(x,y) +
6817
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6818
//
6819
// Where :
6820
//
6821
//  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6822
//  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6823
//
6824
// This function computes 'wsrc' and 'mask' as:
6825
//
6826
//  wsrc(x, y) =
6827
//    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6828
//      Mh(x) * Cv(y) * Pabove(x,y) +
6829
//      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6830
//
6831
//  mask(x, y) = Mh(x) * Mv(y)
6832
//
6833
// These can then be used to efficiently approximate the error for any
6834
// predictor P in the context of the provided neighbouring predictors by
6835
// computing:
6836
//
6837
//  error(x, y) =
6838
//    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6839
//
6840
static inline void calc_target_weighted_pred(
6841
    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6842
    const uint8_t *above, int above_stride, const uint8_t *left,
6843
0
    int left_stride) {
6844
0
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6845
0
  const int bw = xd->width << MI_SIZE_LOG2;
6846
0
  const int bh = xd->height << MI_SIZE_LOG2;
6847
0
  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6848
0
  int32_t *mask_buf = obmc_buffer->mask;
6849
0
  int32_t *wsrc_buf = obmc_buffer->wsrc;
6850
6851
0
  const int is_hbd = is_cur_buf_hbd(xd);
6852
0
  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6853
6854
  // plane 0 should not be sub-sampled
6855
0
  assert(xd->plane[0].subsampling_x == 0);
6856
0
  assert(xd->plane[0].subsampling_y == 0);
6857
6858
0
  av1_zero_array(wsrc_buf, bw * bh);
6859
0
  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6860
6861
  // handle above row
6862
0
  if (xd->up_available) {
6863
0
    const int overlap =
6864
0
        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6865
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6866
0
                                                   above_stride, overlap };
6867
0
    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6868
0
                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
6869
0
                                  calc_target_weighted_pred_above, &ctxt);
6870
0
  }
6871
6872
0
  for (int i = 0; i < bw * bh; ++i) {
6873
0
    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6874
0
    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6875
0
  }
6876
6877
  // handle left column
6878
0
  if (xd->left_available) {
6879
0
    const int overlap =
6880
0
        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6881
0
    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6882
0
                                                   left_stride, overlap };
6883
0
    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6884
0
                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
6885
0
                                 calc_target_weighted_pred_left, &ctxt);
6886
0
  }
6887
6888
0
  if (!is_hbd) {
6889
0
    const uint8_t *src = x->plane[0].src.buf;
6890
6891
0
    for (int row = 0; row < bh; ++row) {
6892
0
      for (int col = 0; col < bw; ++col) {
6893
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6894
0
      }
6895
0
      wsrc_buf += bw;
6896
0
      src += x->plane[0].src.stride;
6897
0
    }
6898
0
  } else {
6899
0
    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6900
6901
0
    for (int row = 0; row < bh; ++row) {
6902
0
      for (int col = 0; col < bw; ++col) {
6903
0
        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6904
0
      }
6905
0
      wsrc_buf += bw;
6906
0
      src += x->plane[0].src.stride;
6907
0
    }
6908
0
  }
6909
0
}