Coverage Report

Created: 2025-10-28 07:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/rd.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <limits.h>
14
#include <math.h>
15
#include <stdio.h>
16
17
#include "aom_dsp/aom_dsp_common.h"
18
#include "aom_mem/aom_mem.h"
19
#include "aom_ports/bitops.h"
20
#include "aom_ports/mem.h"
21
#include "aom_ports/aom_once.h"
22
23
#include "av1/common/common.h"
24
#include "av1/common/entropy.h"
25
#include "av1/common/entropymode.h"
26
#include "av1/common/pred_common.h"
27
#include "av1/common/quant_common.h"
28
#include "av1/common/reconinter.h"
29
#include "av1/common/reconintra.h"
30
#include "av1/common/seg_common.h"
31
32
#include "av1/encoder/cost.h"
33
#include "av1/encoder/encodemv.h"
34
#include "av1/encoder/encoder.h"
35
#include "av1/encoder/nonrd_opt.h"
36
#include "av1/encoder/ratectrl.h"
37
#include "av1/encoder/rd.h"
38
#include "config/aom_config.h"
39
40
#define RD_THRESH_POW 1.25
41
42
// The baseline rd thresholds for breaking out of the rd loop for
43
// certain modes are assumed to be based on 8x8 blocks.
44
// This table is used to correct for block size.
45
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
46
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
47
  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
48
};
49
50
static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
51
                                            [EXT_TX_SIZES] = {
52
                                              { 1, 1, 1, 1 },  // unused
53
                                              { 1, 1, 0, 0 },
54
                                              { 0, 0, 1, 0 },
55
                                            };
56
57
static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
58
                                            [EXT_TX_SIZES] = {
59
                                              { 1, 1, 1, 1 },  // unused
60
                                              { 1, 1, 0, 0 },
61
                                              { 0, 0, 1, 0 },
62
                                              { 0, 1, 1, 1 },
63
                                            };
64
65
static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
66
                                                      EXT_TX_SETS_INTER)] = {
67
  {
68
      // Intra
69
      EXT_TX_SET_DCTONLY,
70
      EXT_TX_SET_DTT4_IDTX_1DDCT,
71
      EXT_TX_SET_DTT4_IDTX,
72
  },
73
  {
74
      // Inter
75
      EXT_TX_SET_DCTONLY,
76
      EXT_TX_SET_ALL16,
77
      EXT_TX_SET_DTT9_IDTX_1DDCT,
78
      EXT_TX_SET_DCT_IDTX,
79
  },
80
};
81
82
void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
83
422k
                         FRAME_CONTEXT *fc) {
84
422k
  int i, j;
85
86
8.35M
  for (i = 0; i < PARTITION_CONTEXTS; ++i)
87
7.92M
    av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
88
7.92M
                             fc->partition_cdf[i], NULL);
89
90
422k
  if (cm->current_frame.skip_mode_info.skip_mode_flag) {
91
58.9k
    for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
92
44.1k
      av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
93
44.1k
                               fc->skip_mode_cdfs[i], NULL);
94
44.1k
    }
95
14.7k
  }
96
97
1.67M
  for (i = 0; i < SKIP_CONTEXTS; ++i) {
98
1.25M
    av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
99
1.25M
                             fc->skip_txfm_cdfs[i], NULL);
100
1.25M
  }
101
102
2.47M
  for (i = 0; i < KF_MODE_CONTEXTS; ++i)
103
11.7M
    for (j = 0; j < KF_MODE_CONTEXTS; ++j)
104
9.69M
      av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
105
9.69M
                               fc->kf_y_cdf[i][j], NULL);
106
107
2.06M
  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
108
1.64M
    av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
109
1.64M
                             NULL);
110
1.25M
  for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
111
10.9M
    for (j = 0; j < INTRA_MODES; ++j)
112
10.0M
      av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
113
10.0M
                               fc->uv_mode_cdf[i][j], NULL);
114
115
422k
  av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
116
422k
                           fc->filter_intra_mode_cdf, NULL);
117
9.45M
  for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
118
9.03M
    if (av1_filter_intra_allowed_bsize(cm, i))
119
5.77M
      av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
120
5.77M
                               fc->filter_intra_cdfs[i], NULL);
121
9.03M
  }
122
123
6.97M
  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
124
6.55M
    av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
125
6.55M
                             fc->switchable_interp_cdf[i], NULL);
126
127
3.27M
  for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
128
2.85M
    av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
129
2.85M
                             fc->palette_y_size_cdf[i], NULL);
130
2.85M
    av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
131
2.85M
                             fc->palette_uv_size_cdf[i], NULL);
132
11.2M
    for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
133
8.41M
      av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
134
8.41M
                               fc->palette_y_mode_cdf[i][j], NULL);
135
8.41M
    }
136
2.85M
  }
137
138
1.25M
  for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
139
837k
    av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
140
837k
                             fc->palette_uv_mode_cdf[i], NULL);
141
837k
  }
142
143
3.28M
  for (i = 0; i < PALETTE_SIZES; ++i) {
144
16.5M
    for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
145
13.6M
      av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
146
13.6M
                               fc->palette_y_color_index_cdf[i][j], NULL);
147
13.6M
      av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
148
13.6M
                               fc->palette_uv_color_index_cdf[i][j], NULL);
149
13.6M
    }
150
2.86M
  }
151
152
422k
  int sign_cost[CFL_JOINT_SIGNS];
153
422k
  av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
154
3.72M
  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
155
3.30M
    int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
156
3.30M
    int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
157
3.30M
    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
158
836k
      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
159
2.47M
    } else {
160
2.47M
      const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
161
2.47M
      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
162
2.47M
    }
163
3.30M
    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
164
832k
      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
165
2.47M
    } else {
166
2.47M
      const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
167
2.47M
      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
168
2.47M
    }
169
55.1M
    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
170
51.8M
      cost_u[u] += sign_cost[joint_sign];
171
3.30M
  }
172
173
2.09M
  for (i = 0; i < MAX_TX_CATS; ++i)
174
6.63M
    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
175
4.96M
      av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
176
4.96M
                               fc->tx_size_cdf[i][j], NULL);
177
178
9.04M
  for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
179
8.62M
    av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
180
8.62M
                             fc->txfm_partition_cdf[i], NULL);
181
8.62M
  }
182
183
2.08M
  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
184
1.66M
    int s;
185
6.58M
    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
186
4.92M
      if (use_inter_ext_tx_for_txsize[s][i]) {
187
2.47M
        av1_cost_tokens_from_cdf(
188
2.47M
            mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
189
2.47M
            av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
190
2.47M
      }
191
4.92M
    }
192
4.95M
    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
193
3.29M
      if (use_intra_ext_tx_for_txsize[s][i]) {
194
16.5M
        for (j = 0; j < INTRA_MODES; ++j) {
195
15.2M
          av1_cost_tokens_from_cdf(
196
15.2M
              mode_costs->intra_tx_type_costs[s][i][j],
197
15.2M
              fc->intra_ext_tx_cdf[s][i][j],
198
15.2M
              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
199
15.2M
        }
200
1.24M
      }
201
3.29M
    }
202
1.66M
  }
203
3.71M
  for (i = 0; i < DIRECTIONAL_MODES; ++i) {
204
3.29M
    av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
205
3.29M
                             fc->angle_delta_cdf[i], NULL);
206
3.29M
  }
207
422k
  av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
208
209
1.67M
  for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
210
1.24M
    av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
211
1.24M
                             fc->seg.spatial_pred_seg_cdf[i], NULL);
212
1.24M
  }
213
214
1.67M
  for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
215
1.25M
    av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
216
1.25M
                             NULL);
217
1.25M
  }
218
219
422k
  if (!frame_is_intra_only(cm)) {
220
585k
    for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
221
487k
      av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
222
487k
                               fc->comp_inter_cdf[i], NULL);
223
487k
    }
224
225
390k
    for (i = 0; i < REF_CONTEXTS; ++i) {
226
2.03M
      for (j = 0; j < SINGLE_REFS - 1; ++j) {
227
1.73M
        av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
228
1.73M
                                 fc->single_ref_cdf[i][j], NULL);
229
1.73M
      }
230
292k
    }
231
232
585k
    for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
233
487k
      av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
234
487k
                               fc->comp_ref_type_cdf[i], NULL);
235
487k
    }
236
237
390k
    for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
238
1.16M
      for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
239
874k
        av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
240
874k
                                 fc->uni_comp_ref_cdf[i][j], NULL);
241
874k
      }
242
292k
    }
243
244
390k
    for (i = 0; i < REF_CONTEXTS; ++i) {
245
1.16M
      for (j = 0; j < FWD_REFS - 1; ++j) {
246
874k
        av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
247
874k
                                 fc->comp_ref_cdf[i][j], NULL);
248
874k
      }
249
292k
    }
250
251
390k
    for (i = 0; i < REF_CONTEXTS; ++i) {
252
877k
      for (j = 0; j < BWD_REFS - 1; ++j) {
253
584k
        av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
254
584k
                                 fc->comp_bwdref_cdf[i][j], NULL);
255
584k
      }
256
292k
    }
257
258
487k
    for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
259
390k
      av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
260
390k
                               fc->intra_inter_cdf[i], NULL);
261
390k
    }
262
263
682k
    for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
264
584k
      av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
265
584k
                               NULL);
266
584k
    }
267
268
293k
    for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
269
195k
      av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
270
195k
                               fc->zeromv_cdf[i], NULL);
271
195k
    }
272
273
682k
    for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
274
584k
      av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
275
584k
                               NULL);
276
584k
    }
277
278
390k
    for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
279
292k
      av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
280
292k
                               NULL);
281
292k
    }
282
867k
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
283
769k
      av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
284
769k
                               fc->inter_compound_mode_cdf[i], NULL);
285
2.21M
    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
286
2.11M
      av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
287
2.11M
                               fc->compound_type_cdf[i], NULL);
288
2.18M
    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
289
2.08M
      if (av1_is_wedge_used(i)) {
290
857k
        av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
291
857k
                                 fc->wedge_idx_cdf[i], NULL);
292
857k
      }
293
2.08M
    }
294
486k
    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
295
388k
      av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
296
388k
                               fc->interintra_cdf[i], NULL);
297
388k
      av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
298
388k
                               fc->interintra_mode_cdf[i], NULL);
299
388k
    }
300
2.21M
    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
301
2.12M
      av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
302
2.12M
                               fc->wedge_interintra_cdf[i], NULL);
303
2.12M
    }
304
1.92M
    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
305
1.82M
      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
306
1.82M
                               fc->motion_mode_cdf[i], NULL);
307
1.82M
    }
308
1.93M
    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
309
1.83M
      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
310
1.83M
                               fc->obmc_cdf[i], NULL);
311
1.83M
    }
312
682k
    for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
313
584k
      av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
314
584k
                               fc->compound_index_cdf[i], NULL);
315
584k
    }
316
682k
    for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
317
584k
      av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
318
584k
                               fc->comp_group_idx_cdf[i], NULL);
319
584k
    }
320
97.8k
  }
321
422k
}
322
323
#if !CONFIG_REALTIME_ONLY
324
13.4k
void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
325
13.4k
  av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
326
13.4k
                           fc->switchable_restore_cdf, NULL);
327
13.4k
  av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
328
13.4k
                           fc->wiener_restore_cdf, NULL);
329
13.4k
  av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
330
13.4k
                           fc->sgrproj_restore_cdf, NULL);
331
13.4k
}
332
#endif  // !CONFIG_REALTIME_ONLY
333
334
// Values are now correlated to quantizer.
335
static int sad_per_bit_lut_8[QINDEX_RANGE];
336
static int sad_per_bit_lut_10[QINDEX_RANGE];
337
static int sad_per_bit_lut_12[QINDEX_RANGE];
338
339
static void init_me_luts_bd(int *bit16lut, int range,
340
12
                            aom_bit_depth_t bit_depth) {
341
12
  int i;
342
  // Initialize the sad lut tables using a formulaic calculation for now.
343
  // This is to make it easier to resolve the impact of experimental changes
344
  // to the quantizer tables.
345
3.08k
  for (i = 0; i < range; i++) {
346
3.07k
    const double q = av1_convert_qindex_to_q(i, bit_depth);
347
3.07k
    bit16lut[i] = (int)(0.0418 * q + 2.4107);
348
3.07k
  }
349
12
}
350
351
4
static void init_me_luts(void) {
352
4
  init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
353
4
  init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
354
4
  init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
355
4
}
356
357
67.8k
void av1_init_me_luts(void) { aom_once(init_me_luts); }
358
359
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
360
                                         8,  8,  4,  4,  2,  2,  1,  0 };
361
362
static const int rd_layer_depth_factor[7] = {
363
  160, 160, 160, 160, 192, 208, 224
364
};
365
366
// Returns the default rd multiplier for inter frames for a given qindex.
367
// The function here is a first pass estimate based on data from
368
// a previous Vizer run
369
35.6k
static double def_inter_rd_multiplier(int qindex) {
370
35.6k
  return 3.2 + (0.0015 * (double)qindex);
371
35.6k
}
372
373
// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
374
// The function here is a first pass estimate based on data from
375
// a previous Vizer run
376
19.4k
static double def_arf_rd_multiplier(int qindex) {
377
19.4k
  return 3.25 + (0.0015 * (double)qindex);
378
19.4k
}
379
380
// Returns the default rd multiplier for key frames for a given qindex.
381
// The function here is a first pass estimate based on data from
382
// a previous Vizer run
383
414k
static double def_kf_rd_multiplier(int qindex) {
384
414k
  return 3.3 + (0.0015 * (double)qindex);
385
414k
}
386
387
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
388
                                        FRAME_UPDATE_TYPE update_type,
389
469k
                                        int qindex, aom_tune_metric tuning) {
390
469k
  const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
391
469k
  int64_t rdmult = q * q;
392
469k
  if (update_type == KF_UPDATE) {
393
414k
    double def_rd_q_mult = def_kf_rd_multiplier(q);
394
414k
    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395
414k
  } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
396
19.4k
    double def_rd_q_mult = def_arf_rd_multiplier(q);
397
19.4k
    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398
35.6k
  } else {
399
35.6k
    double def_rd_q_mult = def_inter_rd_multiplier(q);
400
35.6k
    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
401
35.6k
  }
402
403
469k
  if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
404
    // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
405
    // quality. The most noticeable effect is a mild bias towards choosing
406
    // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
407
    // transforms).
408
    // For very high qindexes, start progressively reducing the weight towards
409
    // unity (128/128), as transforms are large enough and making them even
410
    // larger actually harms subjective quality and SSIMULACRA 2 scores.
411
    // This weight part of the equation was determined by iteratively increasing
412
    // weight on CID22 and Daala's subset1, and observing its effects on visual
413
    // quality and SSIMULACRA 2 scores along the usable (0-100) range.
414
    // The ramp-down part of the equation was determined by choosing a fixed
415
    // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
416
    // 2 scores for encodes with qindexes greater than 159 scored at or above
417
    // their equivalents with no rdmult adjustment.
418
0
    const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
419
0
    rdmult = (int64_t)((double)rdmult * weight / 128.0);
420
0
  }
421
422
469k
  switch (bit_depth) {
423
358k
    case AOM_BITS_8: break;
424
56.8k
    case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
425
54.1k
    case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
426
0
    default:
427
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
428
0
      return -1;
429
469k
  }
430
469k
  return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
431
469k
}
432
433
int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
434
                        const FRAME_UPDATE_TYPE update_type,
435
                        const int layer_depth, const int boost_index,
436
                        const FRAME_TYPE frame_type,
437
                        const int use_fixed_qp_offsets,
438
                        const int is_stat_consumption_stage,
439
450k
                        const aom_tune_metric tuning) {
440
450k
  int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
441
450k
                                                       qindex, tuning);
442
450k
  if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
443
230k
      (frame_type != KEY_FRAME)) {
444
    // Layer depth adjustment
445
38.5k
    rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
446
    // ARF boost adjustment
447
38.5k
    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
448
38.5k
  }
449
450k
  return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
450
450k
}
451
452
28.5k
int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
453
28.5k
  assert(beta > 0.0);
454
28.5k
  int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
455
28.5k
  int newq = (int)rint(q / sqrt(beta));
456
28.5k
  int orig_qindex = qindex;
457
28.5k
  if (newq == q) {
458
24.2k
    return 0;
459
24.2k
  }
460
4.35k
  if (newq < q) {
461
6.21k
    while (qindex > 0) {
462
6.21k
      qindex--;
463
6.21k
      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
464
6.21k
      if (newq >= q) {
465
2.11k
        break;
466
2.11k
      }
467
6.21k
    }
468
2.23k
  } else {
469
4.10k
    while (qindex < MAXQ) {
470
3.73k
      qindex++;
471
3.73k
      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
472
3.73k
      if (newq <= q) {
473
1.87k
        break;
474
1.87k
      }
475
3.73k
    }
476
2.23k
  }
477
4.35k
  return qindex - orig_qindex;
478
28.5k
}
479
480
int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
481
2.27k
                                  int curr_qindex) {
482
2.27k
  curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
483
2.27k
  const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
484
2.27k
  const int deltaq_deadzone = delta_q_res / 4;
485
2.27k
  const int qmask = ~(delta_q_res - 1);
486
2.27k
  int abs_deltaq_index = abs(curr_qindex - prev_qindex);
487
2.27k
  abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
488
2.27k
  int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
489
2.27k
  adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
490
2.27k
  return adjust_qindex;
491
2.27k
}
492
493
#if !CONFIG_REALTIME_ONLY
494
0
int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
495
0
  assert(beta > 0.0);
496
0
  const AV1_COMMON *cm = &cpi->common;
497
498
0
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
499
0
  const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
500
0
  const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
501
0
  const FRAME_TYPE frame_type = cm->current_frame.frame_type;
502
503
0
  const int qindex_rdmult = cm->quant_params.base_qindex;
504
0
  return (int)(av1_compute_rd_mult(
505
0
                   qindex_rdmult, cm->seq_params->bit_depth,
506
0
                   cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
507
0
                   layer_depth, boost_index, frame_type,
508
0
                   cpi->oxcf.q_cfg.use_fixed_qp_offsets,
509
0
                   is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
510
0
               beta);
511
0
}
512
#endif  // !CONFIG_REALTIME_ONLY
513
514
1.03M
static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
515
1.03M
  double q;
516
1.03M
  switch (bit_depth) {
517
738k
    case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
518
136k
    case AOM_BITS_10:
519
136k
      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
520
136k
      break;
521
156k
    case AOM_BITS_12:
522
156k
      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
523
156k
      break;
524
0
    default:
525
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
526
0
      return -1;
527
1.03M
  }
528
  // TODO(debargha): Adjust the function below.
529
1.03M
  return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
530
1.03M
}
531
532
279k
void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
533
279k
  switch (cpi->common.seq_params->bit_depth) {
534
199k
    case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
535
37.5k
    case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
536
41.9k
    case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
537
0
    default:
538
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
539
279k
  }
540
279k
}
541
542
static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
543
129k
                                 int use_nonrd_pick_mode) {
544
129k
  int i, bsize, segment_id;
545
129k
  THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
546
129k
  int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
547
548
129k
  if (use_nonrd_pick_mode) {
549
178k
    for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
550
142k
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
551
142k
      if (ref != INTRA_FRAME) {
552
534k
        for (i = 0; i < RTC_INTER_MODES; i++)
553
427k
          mode_indices[num_modes_count++] =
554
427k
              mode_idx[ref][mode_offset(inter_mode_list[i])];
555
106k
      } else {
556
178k
        for (i = 0; i < RTC_INTRA_MODES; i++)
557
142k
          mode_indices[num_modes_count++] =
558
142k
              mode_idx[ref][mode_offset(intra_mode_list[i])];
559
35.6k
      }
560
142k
    }
561
35.6k
  }
562
563
1.16M
  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
564
1.03M
    const int qindex = clamp(
565
1.03M
        av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
566
1.03M
            cm->quant_params.y_dc_delta_q,
567
1.03M
        0, MAXQ);
568
1.03M
    const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
569
570
23.7M
    for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
571
      // Threshold here seems unnecessarily harsh but fine given actual
572
      // range of values used for cpi->sf.thresh_mult[].
573
22.7M
      const int t = q * rd_thresh_block_size_factor[bsize];
574
22.7M
      const int thresh_max = INT_MAX / t;
575
576
2.90G
      for (i = 0; i < num_modes_count; ++i) {
577
2.87G
        const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
578
2.87G
        rd->threshes[segment_id][bsize][mode_index] =
579
2.87G
            rd->thresh_mult[mode_index] < thresh_max
580
2.87G
                ? rd->thresh_mult[mode_index] * t / 4
581
2.87G
                : INT_MAX;
582
2.87G
      }
583
22.7M
    }
584
1.03M
  }
585
129k
}
586
587
void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
588
419k
                          const int num_planes) {
589
419k
  const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
590
3.21M
  for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
591
6.59M
    for (int plane = 0; plane < nplanes; ++plane) {
592
3.80M
      LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
593
594
11.2M
      for (int ctx = 0; ctx < 2; ++ctx) {
595
7.45M
        aom_cdf_prob *pcdf;
596
7.45M
        switch (eob_multi_size) {
597
1.13M
          case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
598
1.12M
          case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
599
1.12M
          case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
600
1.12M
          case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
601
1.12M
          case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
602
1.12M
          case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
603
1.12M
          case 6:
604
1.12M
          default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
605
7.45M
        }
606
7.47M
        av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
607
7.47M
      }
608
3.80M
    }
609
2.77M
  }
610
2.52M
  for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
611
5.00M
    for (int plane = 0; plane < nplanes; ++plane) {
612
2.91M
      LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
613
614
39.4M
      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
615
36.5M
        av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
616
36.5M
                                 fc->txb_skip_cdf[tx_size][ctx], NULL);
617
618
14.2M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
619
11.3M
        av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
620
11.3M
                                 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
621
11.3M
                                 NULL);
622
110M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
623
107M
        av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
624
107M
                                 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
625
626
123M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
627
120M
        pcost->base_cost[ctx][4] = 0;
628
120M
        pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
629
120M
                                   av1_cost_literal(1) -
630
120M
                                   pcost->base_cost[ctx][0];
631
120M
        pcost->base_cost[ctx][6] =
632
120M
            pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
633
120M
        pcost->base_cost[ctx][7] =
634
120M
            pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
635
120M
      }
636
637
28.5M
      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
638
25.5M
        av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
639
25.5M
                                 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
640
641
11.4M
      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
642
8.57M
        av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
643
8.57M
                                 fc->dc_sign_cdf[plane][ctx], NULL);
644
645
63.1M
      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
646
60.2M
        int br_rate[BR_CDF_SIZE];
647
60.2M
        int prev_cost = 0;
648
60.2M
        int i, j;
649
60.2M
        av1_cost_tokens_from_cdf(
650
60.2M
            br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
651
60.2M
            NULL);
652
284M
        for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
653
894M
          for (j = 0; j < BR_CDF_SIZE - 1; j++) {
654
669M
            pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
655
669M
          }
656
224M
          prev_cost += br_rate[j];
657
224M
        }
658
60.2M
        pcost->lps_cost[ctx][i] = prev_cost;
659
60.2M
      }
660
63.9M
      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
661
60.9M
        pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
662
60.9M
            pcost->lps_cost[ctx][0];
663
791M
        for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
664
730M
          pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
665
730M
              pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
666
730M
        }
667
60.9M
      }
668
2.91M
    }
669
2.09M
  }
670
435k
}
671
672
void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
673
194k
                       MvCosts *mv_costs) {
674
  // Avoid accessing 'mv_costs' when it is not allocated.
675
194k
  if (mv_costs == NULL) return;
676
677
149k
  mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
678
149k
  mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
679
149k
  mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
680
149k
  mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
681
149k
  if (integer_mv) {
682
0
    mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
683
0
    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
684
0
                             nmvc, MV_SUBPEL_NONE);
685
149k
  } else {
686
149k
    mv_costs->mv_cost_stack =
687
149k
        usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
688
149k
    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
689
149k
                             nmvc, usehp);
690
149k
  }
691
149k
}
692
693
0
void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
694
0
  dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
695
0
  dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
696
0
  av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
697
0
                           MV_SUBPEL_NONE);
698
0
}
699
700
// Populates speed features based on codec control settings (of type
701
// COST_UPDATE_TYPE) and expected speed feature settings (of type
702
// INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
703
// The populated/updated speed features are used for cost updates in the
704
// encoder.
705
// WARNING: Population of unified cost update frequency needs to be taken care
706
// accordingly, in case of any modifications/additions to the enum
707
// COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
708
static inline void populate_unified_cost_update_freq(
709
129k
    const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
710
129k
  INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
711
  // Mapping of entropy cost update frequency from the encoder's codec control
712
  // settings of type COST_UPDATE_TYPE to speed features of type
713
  // INTERNAL_COST_UPDATE_TYPE.
714
129k
  static const INTERNAL_COST_UPDATE_TYPE
715
129k
      map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
716
129k
        INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
717
129k
        INTERNAL_COST_UPD_OFF
718
129k
      };
719
720
129k
  inter_sf->mv_cost_upd_level =
721
129k
      AOMMIN(inter_sf->mv_cost_upd_level,
722
129k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
723
129k
  inter_sf->coeff_cost_upd_level =
724
129k
      AOMMIN(inter_sf->coeff_cost_upd_level,
725
129k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
726
129k
  inter_sf->mode_cost_upd_level =
727
129k
      AOMMIN(inter_sf->mode_cost_upd_level,
728
129k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
729
129k
  sf->intra_sf.dv_cost_upd_level =
730
129k
      AOMMIN(sf->intra_sf.dv_cost_upd_level,
731
129k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
732
129k
}
733
734
// Checks if entropy costs should be initialized/updated at frame level or not.
735
static inline int is_frame_level_cost_upd_freq_set(
736
    const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
737
377k
    const int use_nonrd_pick_mode, const int frames_since_key) {
738
377k
  const int fill_costs =
739
377k
      frame_is_intra_only(cm) ||
740
106k
      (use_nonrd_pick_mode ? frames_since_key < 2
741
106k
                           : (cm->current_frame.frame_number & 0x07) == 1);
742
377k
  return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
743
129k
          cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
744
377k
}
745
746
// Decide whether we want to update the mode entropy cost for the current frame.
747
// The logit is currently inherited from selective_disable_cdf_rtc.
748
129k
static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
749
129k
  const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
750
129k
  if (!rt_sf->frame_level_mode_cost_update) {
751
119k
    return false;
752
119k
  }
753
754
9.06k
  if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
755
0
    return cpi->frames_since_last_update == 1;
756
9.06k
  } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
757
9.06k
    if (cpi->svc.number_spatial_layers == 1 &&
758
9.06k
        cpi->svc.number_temporal_layers == 1) {
759
9.06k
      const AV1_COMMON *const cm = &cpi->common;
760
9.06k
      const RATE_CONTROL *const rc = &cpi->rc;
761
762
9.06k
      return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
763
4.20k
             rc->high_source_sad || rc->frames_since_key < 10 ||
764
0
             cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
765
0
             cm->current_frame.frame_number % 8 == 0;
766
9.06k
    } else if (cpi->svc.number_temporal_layers > 1) {
767
0
      return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
768
0
    }
769
9.06k
  }
770
771
0
  return false;
772
9.06k
}
773
774
129k
void av1_initialize_rd_consts(AV1_COMP *cpi) {
775
129k
  AV1_COMMON *const cm = &cpi->common;
776
129k
  MACROBLOCK *const x = &cpi->td.mb;
777
129k
  SPEED_FEATURES *const sf = &cpi->sf;
778
129k
  RD_OPT *const rd = &cpi->rd;
779
129k
  int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
780
129k
  int frames_since_key = cpi->rc.frames_since_key;
781
782
129k
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
783
129k
  const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
784
129k
  const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
785
129k
  const FRAME_TYPE frame_type = cm->current_frame.frame_type;
786
787
129k
  const int qindex_rdmult =
788
129k
      cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
789
129k
  rd->RDMULT = av1_compute_rd_mult(
790
129k
      qindex_rdmult, cm->seq_params->bit_depth,
791
129k
      cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
792
129k
      boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
793
129k
      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
794
#if CONFIG_RD_COMMAND
795
  if (cpi->oxcf.pass == 2) {
796
    const RD_COMMAND *rd_command = &cpi->rd_command;
797
    if (rd_command->option_ls[rd_command->frame_index] ==
798
        RD_OPTION_SET_Q_RDMULT) {
799
      rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
800
    }
801
  }
802
#endif  // CONFIG_RD_COMMAND
803
804
129k
  av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
805
806
129k
  set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
807
808
129k
  populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
809
129k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
810
  // Frame level mv cost update
811
129k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
812
129k
                                       use_nonrd_pick_mode, frames_since_key))
813
124k
    av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
814
124k
                      cm->features.allow_high_precision_mv, x->mv_costs);
815
816
  // Frame level coefficient cost update
817
129k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
818
129k
                                       use_nonrd_pick_mode, frames_since_key))
819
124k
    av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
820
821
  // Frame level mode cost update
822
129k
  if (should_force_mode_cost_update(cpi) ||
823
119k
      is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
824
119k
                                       use_nonrd_pick_mode, frames_since_key))
825
126k
    av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
826
827
  // Frame level dv cost update
828
129k
  if (av1_need_dv_costs(cpi)) {
829
0
    if (cpi->td.dv_costs_alloc == NULL) {
830
0
      CHECK_MEM_ERROR(
831
0
          cm, cpi->td.dv_costs_alloc,
832
0
          (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
833
0
      cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
834
0
    }
835
0
    av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
836
0
  }
837
129k
}
838
839
3.55M
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
840
  // NOTE: The tables below must be of the same size.
841
842
  // The functions described below are sampled at the four most significant
843
  // bits of x^2 + 8 / 256.
844
845
  // Normalized rate:
846
  // This table models the rate for a Laplacian source with given variance
847
  // when quantized with a uniform quantizer with given stepsize. The
848
  // closed form expression is:
849
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
850
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
851
  // and H(x) is the binary entropy function.
852
3.55M
  static const int rate_tab_q10[] = {
853
3.55M
    65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
854
3.55M
    4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
855
3.55M
    3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
856
3.55M
    2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
857
3.55M
    1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
858
3.55M
    911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
859
3.55M
    395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
860
3.55M
    73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
861
3.55M
    5,     3,    2,    1,    1,    1,    0,    0,
862
3.55M
  };
863
  // Normalized distortion:
864
  // This table models the normalized distortion for a Laplacian source
865
  // with given variance when quantized with a uniform quantizer
866
  // with given stepsize. The closed form expression is:
867
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
868
  // where x = qpstep / sqrt(variance).
869
  // Note the actual distortion is Dn * variance.
870
3.55M
  static const int dist_tab_q10[] = {
871
3.55M
    0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
872
3.55M
    5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
873
3.55M
    18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
874
3.55M
    59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
875
3.55M
    151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
876
3.55M
    375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
877
3.55M
    680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
878
3.55M
    949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
879
3.55M
    1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
880
3.55M
  };
881
3.55M
  static const int xsq_iq_q10[] = {
882
3.55M
    0,      4,      8,      12,     16,     20,     24,     28,     32,
883
3.55M
    40,     48,     56,     64,     72,     80,     88,     96,     112,
884
3.55M
    128,    144,    160,    176,    192,    208,    224,    256,    288,
885
3.55M
    320,    352,    384,    416,    448,    480,    544,    608,    672,
886
3.55M
    736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
887
3.55M
    1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
888
3.55M
    3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
889
3.55M
    7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
890
3.55M
    16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
891
3.55M
    36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
892
3.55M
    81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
893
3.55M
    180192, 196576, 212960, 229344, 245728,
894
3.55M
  };
895
3.55M
  const int tmp = (xsq_q10 >> 2) + 8;
896
3.55M
  const int k = get_msb(tmp) - 3;
897
3.55M
  const int xq = (k << 3) + ((tmp >> k) & 0x7);
898
3.55M
  const int one_q10 = 1 << 10;
899
3.55M
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
900
3.55M
  const int b_q10 = one_q10 - a_q10;
901
3.55M
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
902
3.55M
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
903
3.55M
}
904
905
void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
906
                                  unsigned int qstep, int *rate,
907
3.56M
                                  int64_t *dist) {
908
  // This function models the rate and distortion for a Laplacian
909
  // source with given variance when quantized with a uniform quantizer
910
  // with given stepsize. The closed form expressions are in:
911
  // Hang and Chen, "Source Model for transform video coder and its
912
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
913
  // Sys. for Video Tech., April 1997.
914
3.56M
  if (var == 0) {
915
18.4k
    *rate = 0;
916
18.4k
    *dist = 0;
917
3.54M
  } else {
918
3.54M
    int d_q10, r_q10;
919
3.54M
    static const uint32_t MAX_XSQ_Q10 = 245727;
920
3.54M
    const uint64_t xsq_q10_64 =
921
3.54M
        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
922
3.54M
    const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
923
3.54M
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
924
3.54M
    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
925
3.54M
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
926
3.54M
  }
927
3.56M
}
928
929
11.6M
static double interp_cubic(const double *p, double x) {
930
11.6M
  return p[1] + 0.5 * x *
931
11.6M
                    (p[2] - p[0] +
932
11.6M
                     x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
933
11.6M
                          x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
934
11.6M
}
935
936
/*
937
static double interp_bicubic(const double *p, int p_stride, double x,
938
                             double y) {
939
  double q[4];
940
  q[0] = interp_cubic(p, x);
941
  q[1] = interp_cubic(p + p_stride, x);
942
  q[2] = interp_cubic(p + 2 * p_stride, x);
943
  q[3] = interp_cubic(p + 3 * p_stride, x);
944
  return interp_cubic(q, y);
945
}
946
*/
947
948
static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
949
  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
950
};
951
952
5.82M
static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
953
5.82M
  return (sse_norm > 16.0);
954
5.82M
}
955
956
static const double interp_rgrid_curv[4][65] = {
957
  {
958
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
959
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
960
      0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
961
      122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
962
      126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
963
      262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
964
      726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
965
      1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
966
      1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
967
      2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
968
      2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
969
      2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
970
      3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
971
  },
972
  {
973
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
974
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
975
      0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
976
      28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
977
      39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
978
      137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
979
      614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
980
      1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
981
      1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
982
      1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
983
      2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
984
      2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
985
      3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
986
  },
987
  {
988
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
989
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
990
      0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
991
      6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
992
      13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
993
      98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
994
      525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
995
      926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
996
      1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
997
      1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
998
      2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
999
      2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1000
      3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1001
  },
1002
  {
1003
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1004
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1005
      0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
1006
      0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
1007
      3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
1008
      65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
1009
      355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
1010
      619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
1011
      1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1012
      1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1013
      1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1014
      2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1015
      3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1016
  },
1017
};
1018
1019
static const double interp_dgrid_curv[3][65] = {
1020
  {
1021
      16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1022
      15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1023
      15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1024
      13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1025
      7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1026
      1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1027
      0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1028
      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1029
      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1030
      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1031
      0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1032
  },
1033
  {
1034
      16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1035
      15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1036
      15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1037
      13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1038
      5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1039
      1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1040
      0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1041
      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1042
      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1043
      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1044
      0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1045
  },
1046
};
1047
1048
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1049
5.82M
                          double *rate_f, double *distbysse_f) {
1050
5.82M
  const double x_start = -15.5;
1051
5.82M
  const double x_end = 16.5;
1052
5.82M
  const double x_step = 0.5;
1053
5.82M
  const double epsilon = 1e-6;
1054
5.82M
  const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1055
5.82M
  const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1056
5.82M
  (void)x_end;
1057
1058
5.82M
  xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1059
5.82M
  xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1060
5.82M
  const double x = (xqr - x_start) / x_step;
1061
5.82M
  const int xi = (int)floor(x);
1062
5.82M
  const double xo = x - xi;
1063
1064
5.82M
  assert(xi > 0);
1065
1066
5.82M
  const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1067
5.82M
  *rate_f = interp_cubic(prate, xo);
1068
5.82M
  const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1069
5.82M
  *distbysse_f = interp_cubic(pdist, xo);
1070
5.82M
}
1071
1072
static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1073
                                       const struct macroblockd_plane *pd,
1074
                                       ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1075
123M
                                       ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1076
123M
  const int num_4x4_w = mi_size_wide[plane_bsize];
1077
123M
  const int num_4x4_h = mi_size_high[plane_bsize];
1078
123M
  const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1079
123M
  const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1080
1081
123M
  memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1082
123M
  memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1083
123M
}
1084
1085
void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1086
                              const struct macroblockd_plane *pd,
1087
                              ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1088
123M
                              ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1089
123M
  assert(plane_bsize < BLOCK_SIZES_ALL);
1090
123M
  get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1091
123M
}
1092
1093
// Special clamping used in the encoder when calculating a prediction
1094
//
1095
// Logically, all pixel fetches used for prediction are clamped against the
1096
// edges of the frame. But doing this directly is slow, so instead we allocate
1097
// a finite border around the frame and fill it with copies of the outermost
1098
// pixels.
1099
//
1100
// Since this border is finite, we need to clamp the motion vector before
1101
// prediction in order to avoid out-of-bounds reads. At the same time, this
1102
// clamp must not change the prediction result.
1103
//
1104
// We can balance both of these concerns by calculating how far we would have
1105
// to go in each direction before the extended prediction region (the current
1106
// block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1107
// so that it touches the frame only at one row or column. This is a special
1108
// point because any more extreme MV will always lead to the same prediction.
1109
// So it is safe to clamp at that point.
1110
//
1111
// In the worst case, this requires a border of
1112
//   max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1113
// around the frame edges.
1114
static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1115
3.50M
                                MV *mv) {
1116
3.50M
  int bw = xd->width << MI_SIZE_LOG2;
1117
3.50M
  int bh = xd->height << MI_SIZE_LOG2;
1118
1119
3.50M
  int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1120
3.50M
  int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1121
3.50M
  int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1122
3.50M
  int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1123
1124
3.50M
  const SubpelMvLimits mv_limits = {
1125
3.50M
    .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1126
3.50M
    .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1127
3.50M
    .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1128
3.50M
    .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1129
3.50M
  };
1130
3.50M
  clamp_mv(mv, &mv_limits);
1131
3.50M
}
1132
1133
void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1134
2.21M
                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1135
2.21M
  const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1136
2.21M
  const int_mv ref_mv =
1137
2.21M
      av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1138
2.21M
  const int_mv ref_mv1 =
1139
2.21M
      av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1140
2.21M
  MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1141
2.21M
  int num_mv_refs = 0;
1142
2.21M
  pred_mv[num_mv_refs++] = ref_mv.as_mv;
1143
2.21M
  if (ref_mv.as_int != ref_mv1.as_int) {
1144
1.29M
    pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1145
1.29M
  }
1146
1147
2.21M
  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1148
1149
2.21M
  const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1150
2.21M
  int zero_seen = 0;
1151
2.21M
  int best_sad = INT_MAX;
1152
2.21M
  int max_mv = 0;
1153
  // Get the sad for each candidate reference mv.
1154
5.72M
  for (int i = 0; i < num_mv_refs; ++i) {
1155
3.50M
    MV *this_mv = &pred_mv[i];
1156
3.50M
    enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1157
1158
3.50M
    const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1159
3.50M
    const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1160
3.50M
    max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1161
1162
3.50M
    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1163
3.49M
    zero_seen |= (fp_row == 0 && fp_col == 0);
1164
1165
3.49M
    const uint8_t *const ref_y_ptr =
1166
3.49M
        &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1167
    // Find sad for current vector.
1168
3.49M
    const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1169
3.49M
        src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1170
    // Note if it is the best so far.
1171
3.49M
    if (this_sad < best_sad) {
1172
2.84M
      best_sad = this_sad;
1173
2.84M
    }
1174
3.49M
    if (i == 0)
1175
2.21M
      x->pred_mv0_sad[ref_frame] = this_sad;
1176
1.28M
    else if (i == 1)
1177
1.28M
      x->pred_mv1_sad[ref_frame] = this_sad;
1178
3.49M
  }
1179
1180
  // Note the index of the mv that worked best in the reference list.
1181
2.21M
  x->max_mv_context[ref_frame] = max_mv;
1182
2.21M
  x->pred_mv_sad[ref_frame] = best_sad;
1183
2.21M
}
1184
1185
void av1_setup_pred_block(const MACROBLOCKD *xd,
1186
                          struct buf_2d dst[MAX_MB_PLANE],
1187
                          const YV12_BUFFER_CONFIG *src,
1188
                          const struct scale_factors *scale,
1189
                          const struct scale_factors *scale_uv,
1190
2.21M
                          const int num_planes) {
1191
2.21M
  dst[0].buf = src->y_buffer;
1192
2.21M
  dst[0].stride = src->y_stride;
1193
2.21M
  dst[1].buf = src->u_buffer;
1194
2.21M
  dst[2].buf = src->v_buffer;
1195
2.21M
  dst[1].stride = dst[2].stride = src->uv_stride;
1196
1197
2.21M
  const int mi_row = xd->mi_row;
1198
2.21M
  const int mi_col = xd->mi_col;
1199
6.15M
  for (int i = 0; i < num_planes; ++i) {
1200
3.93M
    setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1201
3.93M
                     i ? src->uv_crop_width : src->y_crop_width,
1202
3.93M
                     i ? src->uv_crop_height : src->y_crop_height,
1203
3.93M
                     dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1204
3.93M
                     xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1205
3.93M
  }
1206
2.21M
}
1207
1208
YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1209
6.03M
                                             int ref_frame) {
1210
6.03M
  assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1211
6.03M
  RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1212
6.03M
  const RefCntBuffer *const ref_buf =
1213
6.03M
      get_ref_frame_buf(&cpi->common, ref_frame);
1214
6.03M
  return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1215
6.03M
                                                       : NULL;
1216
6.03M
}
1217
1218
int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1219
6.45M
                            InterpFilter interp_filter, int dual_filter) {
1220
6.45M
  if (interp_filter == SWITCHABLE) {
1221
6.45M
    const MB_MODE_INFO *const mbmi = xd->mi[0];
1222
6.45M
    int inter_filter_cost = 0;
1223
12.8M
    for (int dir = 0; dir < 2; ++dir) {
1224
12.8M
      if (dir && !dual_filter) break;
1225
6.35M
      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1226
6.35M
      const InterpFilter filter =
1227
6.35M
          av1_extract_interp_filter(mbmi->interp_filters, dir);
1228
6.35M
      inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1229
6.35M
    }
1230
6.45M
    return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1231
18.4E
  } else {
1232
18.4E
    return 0;
1233
18.4E
  }
1234
6.45M
}
1235
1236
110k
void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1237
110k
  RD_OPT *const rd = &cpi->rd;
1238
1239
  // Set baseline threshold values.
1240
110k
  av1_zero(rd->thresh_mult);
1241
1242
110k
  rd->thresh_mult[THR_NEARESTMV] = 300;
1243
110k
  rd->thresh_mult[THR_NEARESTL2] = 300;
1244
110k
  rd->thresh_mult[THR_NEARESTL3] = 300;
1245
110k
  rd->thresh_mult[THR_NEARESTB] = 300;
1246
110k
  rd->thresh_mult[THR_NEARESTA2] = 300;
1247
110k
  rd->thresh_mult[THR_NEARESTA] = 300;
1248
110k
  rd->thresh_mult[THR_NEARESTG] = 300;
1249
1250
110k
  rd->thresh_mult[THR_NEWMV] = 1000;
1251
110k
  rd->thresh_mult[THR_NEWL2] = 1000;
1252
110k
  rd->thresh_mult[THR_NEWL3] = 1000;
1253
110k
  rd->thresh_mult[THR_NEWB] = 1000;
1254
110k
  rd->thresh_mult[THR_NEWA2] = 1100;
1255
110k
  rd->thresh_mult[THR_NEWA] = 1000;
1256
110k
  rd->thresh_mult[THR_NEWG] = 1000;
1257
1258
110k
  rd->thresh_mult[THR_NEARMV] = 1000;
1259
110k
  rd->thresh_mult[THR_NEARL2] = 1000;
1260
110k
  rd->thresh_mult[THR_NEARL3] = 1000;
1261
110k
  rd->thresh_mult[THR_NEARB] = 1000;
1262
110k
  rd->thresh_mult[THR_NEARA2] = 1000;
1263
110k
  rd->thresh_mult[THR_NEARA] = 1000;
1264
110k
  rd->thresh_mult[THR_NEARG] = 1000;
1265
1266
110k
  rd->thresh_mult[THR_GLOBALMV] = 2200;
1267
110k
  rd->thresh_mult[THR_GLOBALL2] = 2000;
1268
110k
  rd->thresh_mult[THR_GLOBALL3] = 2000;
1269
110k
  rd->thresh_mult[THR_GLOBALB] = 2400;
1270
110k
  rd->thresh_mult[THR_GLOBALA2] = 2000;
1271
110k
  rd->thresh_mult[THR_GLOBALG] = 2000;
1272
110k
  rd->thresh_mult[THR_GLOBALA] = 2400;
1273
1274
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1275
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1276
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1277
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1278
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1279
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1280
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1281
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1282
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1283
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1284
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1285
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1286
1287
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1288
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1289
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1290
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1291
1292
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1293
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1294
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1295
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1296
110k
  rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1297
110k
  rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1298
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1299
1300
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1301
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1302
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1303
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1304
110k
  rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1305
110k
  rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1306
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1307
1308
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1309
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1310
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1311
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1312
110k
  rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1313
110k
  rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1314
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1315
1316
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1317
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1318
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1319
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1320
110k
  rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1321
110k
  rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1322
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1323
1324
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1325
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1326
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1327
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1328
110k
  rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1329
110k
  rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1330
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1331
1332
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1333
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1334
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1335
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1336
110k
  rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1337
110k
  rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1338
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1339
1340
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1341
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1342
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1343
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1344
110k
  rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1345
110k
  rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1346
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1347
1348
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1349
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1350
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1351
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1352
110k
  rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1353
110k
  rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1354
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1355
1356
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1357
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1358
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1359
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1360
110k
  rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1361
110k
  rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1362
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1363
1364
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1365
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1366
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1367
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1368
110k
  rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1369
110k
  rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1370
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1371
1372
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1373
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1374
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1375
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1376
110k
  rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1377
110k
  rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1378
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1379
1380
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1381
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1382
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1383
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1384
110k
  rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1385
110k
  rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1386
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1387
1388
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1389
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1390
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1391
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1392
110k
  rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1393
110k
  rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1394
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1395
1396
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1397
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1398
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1399
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1400
110k
  rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1401
110k
  rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1402
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1403
1404
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1405
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1406
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1407
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1408
110k
  rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1409
110k
  rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1410
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1411
1412
110k
  rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1413
110k
  rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1414
110k
  rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1415
110k
  rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1416
110k
  rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1417
110k
  rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1418
110k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1419
1420
110k
  rd->thresh_mult[THR_DC] = 1000;
1421
110k
  rd->thresh_mult[THR_PAETH] = 1000;
1422
110k
  rd->thresh_mult[THR_SMOOTH] = 2200;
1423
110k
  rd->thresh_mult[THR_SMOOTH_V] = 2000;
1424
110k
  rd->thresh_mult[THR_SMOOTH_H] = 2000;
1425
110k
  rd->thresh_mult[THR_H_PRED] = 2000;
1426
110k
  rd->thresh_mult[THR_V_PRED] = 1800;
1427
110k
  rd->thresh_mult[THR_D135_PRED] = 2500;
1428
110k
  rd->thresh_mult[THR_D203_PRED] = 2000;
1429
110k
  rd->thresh_mult[THR_D157_PRED] = 2500;
1430
110k
  rd->thresh_mult[THR_D67_PRED] = 2000;
1431
110k
  rd->thresh_mult[THR_D113_PRED] = 2500;
1432
110k
  rd->thresh_mult[THR_D45_PRED] = 2500;
1433
110k
}
1434
1435
static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
1436
                                   THR_MODES best_mode_index,
1437
                                   THR_MODES mode_start, THR_MODES mode_end,
1438
                                   BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1439
1.85M
                                   int max_rd_thresh_factor) {
1440
158M
  for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1441
936M
    for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1442
780M
      int *const fact = &factor_buf[bs][mode];
1443
780M
      if (mode == best_mode_index) {
1444
4.63M
        *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1445
775M
      } else {
1446
775M
        *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1447
775M
      }
1448
780M
    }
1449
156M
  }
1450
1.85M
}
1451
1452
void av1_update_rd_thresh_fact(
1453
    const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1454
    int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1455
    THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1456
928k
    THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1457
928k
  assert(use_adaptive_rd_thresh > 0);
1458
928k
  const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1459
1460
928k
  const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1461
928k
  BLOCK_SIZE min_size, max_size;
1462
928k
  if (bsize_is_1_to_4) {
1463
    // This part handles block sizes with 1:4 and 4:1 aspect ratios
1464
    // TODO(any): Experiment with threshold update for parent/child blocks
1465
0
    min_size = bsize;
1466
0
    max_size = bsize;
1467
928k
  } else {
1468
928k
    min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1469
928k
    max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1470
928k
  }
1471
1472
928k
  update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1473
928k
                  min_size, max_size, max_rd_thresh_factor);
1474
928k
  update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1475
928k
                  min_size, max_size, max_rd_thresh_factor);
1476
928k
}
1477
1478
int av1_get_intra_cost_penalty(int qindex, int qdelta,
1479
46.7M
                               aom_bit_depth_t bit_depth) {
1480
46.7M
  const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1481
46.7M
  switch (bit_depth) {
1482
41.3M
    case AOM_BITS_8: return 20 * q;
1483
2.84M
    case AOM_BITS_10: return 5 * q;
1484
2.51M
    case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1485
0
    default:
1486
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1487
0
      return -1;
1488
46.7M
  }
1489
46.7M
}