Coverage Report

Created: 2026-01-16 07:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/rd.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <limits.h>
14
#include <math.h>
15
#include <stdio.h>
16
17
#include "aom_dsp/aom_dsp_common.h"
18
#include "aom_mem/aom_mem.h"
19
#include "aom_ports/bitops.h"
20
#include "aom_ports/mem.h"
21
#include "aom_ports/aom_once.h"
22
23
#include "av1/common/common.h"
24
#include "av1/common/entropy.h"
25
#include "av1/common/entropymode.h"
26
#include "av1/common/pred_common.h"
27
#include "av1/common/quant_common.h"
28
#include "av1/common/reconinter.h"
29
#include "av1/common/reconintra.h"
30
#include "av1/common/seg_common.h"
31
32
#include "av1/encoder/cost.h"
33
#include "av1/encoder/encodemv.h"
34
#include "av1/encoder/encoder.h"
35
#include "av1/encoder/nonrd_opt.h"
36
#include "av1/encoder/ratectrl.h"
37
#include "av1/encoder/rd.h"
38
#include "config/aom_config.h"
39
40
#define RD_THRESH_POW 1.25
41
42
// The baseline rd thresholds for breaking out of the rd loop for
43
// certain modes are assumed to be based on 8x8 blocks.
44
// This table is used to correct for block size.
45
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
46
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
47
  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
48
};
49
50
static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
51
                                            [EXT_TX_SIZES] = {
52
                                              { 1, 1, 1, 1 },  // unused
53
                                              { 1, 1, 0, 0 },
54
                                              { 0, 0, 1, 0 },
55
                                            };
56
57
static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
58
                                            [EXT_TX_SIZES] = {
59
                                              { 1, 1, 1, 1 },  // unused
60
                                              { 1, 1, 0, 0 },
61
                                              { 0, 0, 1, 0 },
62
                                              { 0, 1, 1, 1 },
63
                                            };
64
65
static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
66
                                                      EXT_TX_SETS_INTER)] = {
67
  {
68
      // Intra
69
      EXT_TX_SET_DCTONLY,
70
      EXT_TX_SET_DTT4_IDTX_1DDCT,
71
      EXT_TX_SET_DTT4_IDTX,
72
  },
73
  {
74
      // Inter
75
      EXT_TX_SET_DCTONLY,
76
      EXT_TX_SET_ALL16,
77
      EXT_TX_SET_DTT9_IDTX_1DDCT,
78
      EXT_TX_SET_DCT_IDTX,
79
  },
80
};
81
82
void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
83
496k
                         FRAME_CONTEXT *fc) {
84
496k
  int i, j;
85
86
10.4M
  for (i = 0; i < PARTITION_CONTEXTS; ++i)
87
9.92M
    av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
88
9.92M
                             fc->partition_cdf[i], NULL);
89
90
496k
  if (cm->current_frame.skip_mode_info.skip_mode_flag) {
91
57.8k
    for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
92
43.3k
      av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
93
43.3k
                               fc->skip_mode_cdfs[i], NULL);
94
43.3k
    }
95
14.4k
  }
96
97
1.98M
  for (i = 0; i < SKIP_CONTEXTS; ++i) {
98
1.49M
    av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
99
1.49M
                             fc->skip_txfm_cdfs[i], NULL);
100
1.49M
  }
101
102
2.98M
  for (i = 0; i < KF_MODE_CONTEXTS; ++i)
103
14.8M
    for (j = 0; j < KF_MODE_CONTEXTS; ++j)
104
12.4M
      av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
105
12.4M
                               fc->kf_y_cdf[i][j], NULL);
106
107
2.48M
  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
108
1.98M
    av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
109
1.98M
                             NULL);
110
1.49M
  for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
111
13.8M
    for (j = 0; j < INTRA_MODES; ++j)
112
12.9M
      av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
113
12.9M
                               fc->uv_mode_cdf[i][j], NULL);
114
115
496k
  av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
116
496k
                           fc->filter_intra_mode_cdf, NULL);
117
11.4M
  for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
118
10.9M
    if (av1_filter_intra_allowed_bsize(cm, i))
119
6.94M
      av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
120
6.94M
                               fc->filter_intra_cdfs[i], NULL);
121
10.9M
  }
122
123
8.43M
  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
124
7.94M
    av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
125
7.94M
                             fc->switchable_interp_cdf[i], NULL);
126
127
3.97M
  for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
128
3.48M
    av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
129
3.48M
                             fc->palette_y_size_cdf[i], NULL);
130
3.48M
    av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
131
3.48M
                             fc->palette_uv_size_cdf[i], NULL);
132
13.9M
    for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
133
10.4M
      av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
134
10.4M
                               fc->palette_y_mode_cdf[i][j], NULL);
135
10.4M
    }
136
3.48M
  }
137
138
1.49M
  for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
139
994k
    av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
140
994k
                             fc->palette_uv_mode_cdf[i], NULL);
141
994k
  }
142
143
3.97M
  for (i = 0; i < PALETTE_SIZES; ++i) {
144
20.8M
    for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
145
17.3M
      av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
146
17.3M
                               fc->palette_y_color_index_cdf[i][j], NULL);
147
17.3M
      av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
148
17.3M
                               fc->palette_uv_color_index_cdf[i][j], NULL);
149
17.3M
    }
150
3.47M
  }
151
152
496k
  int sign_cost[CFL_JOINT_SIGNS];
153
496k
  av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
154
4.47M
  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
155
3.97M
    int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
156
3.97M
    int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
157
3.97M
    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
158
994k
      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
159
2.98M
    } else {
160
2.98M
      const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
161
2.98M
      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
162
2.98M
    }
163
3.97M
    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
164
994k
      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
165
2.98M
    } else {
166
2.98M
      const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
167
2.98M
      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
168
2.98M
    }
169
67.5M
    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
170
63.5M
      cost_u[u] += sign_cost[joint_sign];
171
3.97M
  }
172
173
2.48M
  for (i = 0; i < MAX_TX_CATS; ++i)
174
7.94M
    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
175
5.95M
      av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
176
5.95M
                               fc->tx_size_cdf[i][j], NULL);
177
178
10.9M
  for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
179
10.4M
    av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
180
10.4M
                             fc->txfm_partition_cdf[i], NULL);
181
10.4M
  }
182
183
2.48M
  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
184
1.98M
    int s;
185
7.95M
    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
186
5.96M
      if (use_inter_ext_tx_for_txsize[s][i]) {
187
2.98M
        av1_cost_tokens_from_cdf(
188
2.98M
            mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
189
2.98M
            av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
190
2.98M
      }
191
5.96M
    }
192
5.96M
    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
193
3.97M
      if (use_intra_ext_tx_for_txsize[s][i]) {
194
20.8M
        for (j = 0; j < INTRA_MODES; ++j) {
195
19.3M
          av1_cost_tokens_from_cdf(
196
19.3M
              mode_costs->intra_tx_type_costs[s][i][j],
197
19.3M
              fc->intra_ext_tx_cdf[s][i][j],
198
19.3M
              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
199
19.3M
        }
200
1.49M
      }
201
3.97M
    }
202
1.98M
  }
203
4.47M
  for (i = 0; i < DIRECTIONAL_MODES; ++i) {
204
3.97M
    av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
205
3.97M
                             fc->angle_delta_cdf[i], NULL);
206
3.97M
  }
207
496k
  av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
208
209
1.98M
  for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
210
1.49M
    av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
211
1.49M
                             fc->seg.spatial_pred_seg_cdf[i], NULL);
212
1.49M
  }
213
214
1.98M
  for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
215
1.49M
    av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
216
1.49M
                             NULL);
217
1.49M
  }
218
219
496k
  if (!frame_is_intra_only(cm)) {
220
578k
    for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
221
482k
      av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
222
482k
                               fc->comp_inter_cdf[i], NULL);
223
482k
    }
224
225
385k
    for (i = 0; i < REF_CONTEXTS; ++i) {
226
2.02M
      for (j = 0; j < SINGLE_REFS - 1; ++j) {
227
1.73M
        av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
228
1.73M
                                 fc->single_ref_cdf[i][j], NULL);
229
1.73M
      }
230
289k
    }
231
232
578k
    for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
233
482k
      av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
234
482k
                               fc->comp_ref_type_cdf[i], NULL);
235
482k
    }
236
237
385k
    for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
238
1.15M
      for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
239
867k
        av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
240
867k
                                 fc->uni_comp_ref_cdf[i][j], NULL);
241
867k
      }
242
289k
    }
243
244
385k
    for (i = 0; i < REF_CONTEXTS; ++i) {
245
1.15M
      for (j = 0; j < FWD_REFS - 1; ++j) {
246
867k
        av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
247
867k
                                 fc->comp_ref_cdf[i][j], NULL);
248
867k
      }
249
289k
    }
250
251
385k
    for (i = 0; i < REF_CONTEXTS; ++i) {
252
867k
      for (j = 0; j < BWD_REFS - 1; ++j) {
253
578k
        av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
254
578k
                                 fc->comp_bwdref_cdf[i][j], NULL);
255
578k
      }
256
289k
    }
257
258
482k
    for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
259
385k
      av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
260
385k
                               fc->intra_inter_cdf[i], NULL);
261
385k
    }
262
263
675k
    for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
264
578k
      av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
265
578k
                               NULL);
266
578k
    }
267
268
289k
    for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
269
192k
      av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
270
192k
                               fc->zeromv_cdf[i], NULL);
271
192k
    }
272
273
675k
    for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
274
578k
      av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
275
578k
                               NULL);
276
578k
    }
277
278
385k
    for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
279
289k
      av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
280
289k
                               NULL);
281
289k
    }
282
868k
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
283
771k
      av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
284
771k
                               fc->inter_compound_mode_cdf[i], NULL);
285
2.21M
    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
286
2.12M
      av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
287
2.12M
                               fc->compound_type_cdf[i], NULL);
288
2.21M
    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
289
2.11M
      if (av1_is_wedge_used(i)) {
290
867k
        av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
291
867k
                                 fc->wedge_idx_cdf[i], NULL);
292
867k
      }
293
2.11M
    }
294
482k
    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
295
385k
      av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
296
385k
                               fc->interintra_cdf[i], NULL);
297
385k
      av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
298
385k
                               fc->interintra_mode_cdf[i], NULL);
299
385k
    }
300
2.21M
    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
301
2.11M
      av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
302
2.11M
                               fc->wedge_interintra_cdf[i], NULL);
303
2.11M
    }
304
1.92M
    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
305
1.83M
      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
306
1.83M
                               fc->motion_mode_cdf[i], NULL);
307
1.83M
    }
308
1.92M
    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
309
1.83M
      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
310
1.83M
                               fc->obmc_cdf[i], NULL);
311
1.83M
    }
312
675k
    for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
313
578k
      av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
314
578k
                               fc->compound_index_cdf[i], NULL);
315
578k
    }
316
675k
    for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
317
578k
      av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
318
578k
                               fc->comp_group_idx_cdf[i], NULL);
319
578k
    }
320
96.4k
  }
321
496k
}
322
323
#if !CONFIG_REALTIME_ONLY
324
13.2k
void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
325
13.2k
  av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
326
13.2k
                           fc->switchable_restore_cdf, NULL);
327
13.2k
  av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
328
13.2k
                           fc->wiener_restore_cdf, NULL);
329
13.2k
  av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
330
13.2k
                           fc->sgrproj_restore_cdf, NULL);
331
13.2k
}
332
#endif  // !CONFIG_REALTIME_ONLY
333
334
// Values are now correlated to quantizer.
335
static int sad_per_bit_lut_8[QINDEX_RANGE];
336
static int sad_per_bit_lut_10[QINDEX_RANGE];
337
static int sad_per_bit_lut_12[QINDEX_RANGE];
338
339
static void init_me_luts_bd(int *bit16lut, int range,
340
12
                            aom_bit_depth_t bit_depth) {
341
12
  int i;
342
  // Initialize the sad lut tables using a formulaic calculation for now.
343
  // This is to make it easier to resolve the impact of experimental changes
344
  // to the quantizer tables.
345
3.08k
  for (i = 0; i < range; i++) {
346
3.07k
    const double q = av1_convert_qindex_to_q(i, bit_depth);
347
3.07k
    bit16lut[i] = (int)(0.0418 * q + 2.4107);
348
3.07k
  }
349
12
}
350
351
4
static void init_me_luts(void) {
352
4
  init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
353
4
  init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
354
4
  init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
355
4
}
356
357
76.5k
void av1_init_me_luts(void) { aom_once(init_me_luts); }
358
359
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
360
                                         8,  8,  4,  4,  2,  2,  1,  0 };
361
362
static const int rd_layer_depth_factor[7] = {
363
  160, 160, 160, 160, 192, 208, 224
364
};
365
366
// Returns the default rd multiplier for inter frames for a given qindex.
367
// The function here is a first pass estimate based on data from
368
// a previous Vizer run
369
35.0k
static double def_inter_rd_multiplier(int qindex) {
370
35.0k
  return 3.2 + (0.0015 * (double)qindex);
371
35.0k
}
372
373
// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
374
// The function here is a first pass estimate based on data from
375
// a previous Vizer run
376
18.9k
static double def_arf_rd_multiplier(int qindex) {
377
18.9k
  return 3.25 + (0.0015 * (double)qindex);
378
18.9k
}
379
380
// Returns the default rd multiplier for key frames for a given qindex.
381
// The function here is a first pass estimate based on data from
382
// a previous Vizer run
383
426k
static double def_kf_rd_multiplier(int qindex) {
384
426k
  return 3.3 + (0.0015 * (double)qindex);
385
426k
}
386
387
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
388
                                        FRAME_UPDATE_TYPE update_type,
389
480k
                                        int qindex, aom_tune_metric tuning) {
390
480k
  const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
391
480k
  int64_t rdmult = q * q;
392
480k
  if (update_type == KF_UPDATE) {
393
426k
    double def_rd_q_mult = def_kf_rd_multiplier(q);
394
426k
    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
395
426k
  } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
396
18.9k
    double def_rd_q_mult = def_arf_rd_multiplier(q);
397
18.9k
    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
398
35.0k
  } else {
399
35.0k
    double def_rd_q_mult = def_inter_rd_multiplier(q);
400
35.0k
    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
401
35.0k
  }
402
403
480k
  if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
404
    // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
405
    // quality. The most noticeable effect is a mild bias towards choosing
406
    // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
407
    // transforms).
408
    // For very high qindexes, start progressively reducing the weight towards
409
    // unity (128/128), as transforms are large enough and making them even
410
    // larger actually harms subjective quality and SSIMULACRA 2 scores.
411
    // This weight part of the equation was determined by iteratively increasing
412
    // weight on CID22 and Daala's subset1, and observing its effects on visual
413
    // quality and SSIMULACRA 2 scores along the usable (0-100) range.
414
    // The ramp-down part of the equation was determined by choosing a fixed
415
    // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
416
    // 2 scores for encodes with qindexes greater than 159 scored at or above
417
    // their equivalents with no rdmult adjustment.
418
0
    const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
419
0
    rdmult = (int64_t)((double)rdmult * weight / 128.0);
420
0
  }
421
422
480k
  switch (bit_depth) {
423
366k
    case AOM_BITS_8: break;
424
57.6k
    case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
425
55.7k
    case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
426
0
    default:
427
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
428
0
      return -1;
429
480k
  }
430
480k
  return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
431
480k
}
432
433
int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
434
                        const FRAME_UPDATE_TYPE update_type,
435
                        const int layer_depth, const int boost_index,
436
                        const FRAME_TYPE frame_type,
437
                        const int use_fixed_qp_offsets,
438
                        const int is_stat_consumption_stage,
439
461k
                        const aom_tune_metric tuning) {
440
461k
  int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
441
461k
                                                       qindex, tuning);
442
461k
  if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
443
225k
      (frame_type != KEY_FRAME)) {
444
    // Layer depth adjustment
445
37.9k
    rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
446
    // ARF boost adjustment
447
37.9k
    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
448
37.9k
  }
449
461k
  return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
450
461k
}
451
452
28.0k
int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
453
28.0k
  assert(beta > 0.0);
454
28.0k
  int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
455
28.0k
  int newq = (int)rint(q / sqrt(beta));
456
28.0k
  int orig_qindex = qindex;
457
28.0k
  if (newq == q) {
458
23.8k
    return 0;
459
23.8k
  }
460
4.26k
  if (newq < q) {
461
6.11k
    while (qindex > 0) {
462
6.11k
      qindex--;
463
6.11k
      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
464
6.11k
      if (newq >= q) {
465
2.07k
        break;
466
2.07k
      }
467
6.11k
    }
468
2.18k
  } else {
469
3.98k
    while (qindex < MAXQ) {
470
3.62k
      qindex++;
471
3.62k
      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
472
3.62k
      if (newq <= q) {
473
1.82k
        break;
474
1.82k
      }
475
3.62k
    }
476
2.18k
  }
477
4.26k
  return qindex - orig_qindex;
478
28.0k
}
479
480
int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
481
2.22k
                                  int curr_qindex) {
482
2.22k
  curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
483
2.22k
  const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
484
2.22k
  const int deltaq_deadzone = delta_q_res / 4;
485
2.22k
  const int qmask = ~(delta_q_res - 1);
486
2.22k
  int abs_deltaq_index = abs(curr_qindex - prev_qindex);
487
2.22k
  abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
488
2.22k
  int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
489
2.22k
  adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
490
2.22k
  return adjust_qindex;
491
2.22k
}
492
493
#if !CONFIG_REALTIME_ONLY
494
0
int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
495
0
  assert(beta > 0.0);
496
0
  const AV1_COMMON *cm = &cpi->common;
497
498
0
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
499
0
  const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
500
0
  const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
501
0
  const FRAME_TYPE frame_type = cm->current_frame.frame_type;
502
503
0
  const int qindex_rdmult = cm->quant_params.base_qindex;
504
0
  return (int)(av1_compute_rd_mult(
505
0
                   qindex_rdmult, cm->seq_params->bit_depth,
506
0
                   cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
507
0
                   layer_depth, boost_index, frame_type,
508
0
                   cpi->oxcf.q_cfg.use_fixed_qp_offsets,
509
0
                   is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
510
0
               beta);
511
0
}
512
#endif  // !CONFIG_REALTIME_ONLY
513
514
1.09M
static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
515
1.09M
  double q;
516
1.09M
  switch (bit_depth) {
517
785k
    case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
518
141k
    case AOM_BITS_10:
519
141k
      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
520
141k
      break;
521
165k
    case AOM_BITS_12:
522
165k
      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
523
165k
      break;
524
0
    default:
525
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
526
0
      return -1;
527
1.09M
  }
528
  // TODO(debargha): Adjust the function below.
529
1.09M
  return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
530
1.09M
}
531
532
293k
void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
533
293k
  switch (cpi->common.seq_params->bit_depth) {
534
211k
    case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
535
38.7k
    case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
536
43.9k
    case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
537
0
    default:
538
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
539
293k
  }
540
293k
}
541
542
static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
543
136k
                                 int use_nonrd_pick_mode) {
544
136k
  int i, bsize, segment_id;
545
136k
  THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
546
136k
  int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
547
548
136k
  if (use_nonrd_pick_mode) {
549
193k
    for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
550
154k
      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
551
154k
      if (ref != INTRA_FRAME) {
552
581k
        for (i = 0; i < RTC_INTER_MODES; i++)
553
464k
          mode_indices[num_modes_count++] =
554
464k
              mode_idx[ref][mode_offset(inter_mode_list[i])];
555
116k
      } else {
556
193k
        for (i = 0; i < RTC_INTRA_MODES; i++)
557
154k
          mode_indices[num_modes_count++] =
558
154k
              mode_idx[ref][mode_offset(intra_mode_list[i])];
559
38.7k
      }
560
154k
    }
561
38.7k
  }
562
563
1.22M
  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
564
1.09M
    const int qindex = clamp(
565
1.09M
        av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
566
1.09M
            cm->quant_params.y_dc_delta_q,
567
1.09M
        0, MAXQ);
568
1.09M
    const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
569
570
25.1M
    for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
571
      // Threshold here seems unnecessarily harsh but fine given actual
572
      // range of values used for cpi->sf.thresh_mult[].
573
24.0M
      const int t = q * rd_thresh_block_size_factor[bsize];
574
24.0M
      const int thresh_max = INT_MAX / t;
575
576
3.04G
      for (i = 0; i < num_modes_count; ++i) {
577
3.01G
        const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
578
3.01G
        rd->threshes[segment_id][bsize][mode_index] =
579
3.01G
            rd->thresh_mult[mode_index] < thresh_max
580
3.01G
                ? rd->thresh_mult[mode_index] * t / 4
581
3.01G
                : INT_MAX;
582
3.01G
      }
583
24.0M
    }
584
1.09M
  }
585
136k
}
586
587
void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
588
495k
                          const int num_planes) {
589
495k
  const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
590
3.95M
  for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
591
8.27M
    for (int plane = 0; plane < nplanes; ++plane) {
592
4.81M
      LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
593
594
14.4M
      for (int ctx = 0; ctx < 2; ++ctx) {
595
9.60M
        aom_cdf_prob *pcdf;
596
9.60M
        switch (eob_multi_size) {
597
1.37M
          case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
598
1.37M
          case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
599
1.37M
          case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
600
1.37M
          case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
601
1.37M
          case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
602
1.37M
          case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
603
1.37M
          case 6:
604
1.37M
          default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
605
9.60M
        }
606
9.60M
        av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
607
9.60M
      }
608
4.81M
    }
609
3.46M
  }
610
2.97M
  for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
611
5.92M
    for (int plane = 0; plane < nplanes; ++plane) {
612
3.45M
      LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
613
614
48.0M
      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
615
44.6M
        av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
616
44.6M
                                 fc->txb_skip_cdf[tx_size][ctx], NULL);
617
618
17.2M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
619
13.7M
        av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
620
13.7M
                                 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
621
13.7M
                                 NULL);
622
146M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
623
143M
        av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
624
143M
                                 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
625
626
148M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
627
144M
        pcost->base_cost[ctx][4] = 0;
628
144M
        pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
629
144M
                                   av1_cost_literal(1) -
630
144M
                                   pcost->base_cost[ctx][0];
631
144M
        pcost->base_cost[ctx][6] =
632
144M
            pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
633
144M
        pcost->base_cost[ctx][7] =
634
144M
            pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
635
144M
      }
636
637
34.4M
      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
638
30.9M
        av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
639
30.9M
                                 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
640
641
13.7M
      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
642
10.3M
        av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
643
10.3M
                                 fc->dc_sign_cdf[plane][ctx], NULL);
644
645
75.7M
      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
646
72.3M
        int br_rate[BR_CDF_SIZE];
647
72.3M
        int prev_cost = 0;
648
72.3M
        int i, j;
649
72.3M
        av1_cost_tokens_from_cdf(
650
72.3M
            br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
651
72.3M
            NULL);
652
356M
        for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
653
1.13G
          for (j = 0; j < BR_CDF_SIZE - 1; j++) {
654
849M
            pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
655
849M
          }
656
284M
          prev_cost += br_rate[j];
657
284M
        }
658
72.3M
        pcost->lps_cost[ctx][i] = prev_cost;
659
72.3M
      }
660
75.7M
      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
661
72.2M
        pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
662
72.2M
            pcost->lps_cost[ctx][0];
663
936M
        for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
664
864M
          pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
665
864M
              pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
666
864M
        }
667
72.2M
      }
668
3.45M
    }
669
2.47M
  }
670
498k
}
671
672
void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
673
201k
                       MvCosts *mv_costs) {
674
  // Avoid accessing 'mv_costs' when it is not allocated.
675
201k
  if (mv_costs == NULL) return;
676
677
147k
  mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
678
147k
  mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
679
147k
  mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
680
147k
  mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
681
147k
  if (integer_mv) {
682
0
    mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
683
0
    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
684
0
                             nmvc, MV_SUBPEL_NONE);
685
147k
  } else {
686
147k
    mv_costs->mv_cost_stack =
687
147k
        usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
688
147k
    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
689
147k
                             nmvc, usehp);
690
147k
  }
691
147k
}
692
693
0
void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
694
0
  dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
695
0
  dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
696
0
  av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
697
0
                           MV_SUBPEL_NONE);
698
0
}
699
700
// Populates speed features based on codec control settings (of type
701
// COST_UPDATE_TYPE) and expected speed feature settings (of type
702
// INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
703
// The populated/updated speed features are used for cost updates in the
704
// encoder.
705
// WARNING: Population of unified cost update frequency needs to be taken care
706
// accordingly, in case of any modifications/additions to the enum
707
// COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
708
static inline void populate_unified_cost_update_freq(
709
136k
    const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
710
136k
  INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
711
  // Mapping of entropy cost update frequency from the encoder's codec control
712
  // settings of type COST_UPDATE_TYPE to speed features of type
713
  // INTERNAL_COST_UPDATE_TYPE.
714
136k
  static const INTERNAL_COST_UPDATE_TYPE
715
136k
      map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
716
136k
        INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
717
136k
        INTERNAL_COST_UPD_OFF
718
136k
      };
719
720
136k
  inter_sf->mv_cost_upd_level =
721
136k
      AOMMIN(inter_sf->mv_cost_upd_level,
722
136k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
723
136k
  inter_sf->coeff_cost_upd_level =
724
136k
      AOMMIN(inter_sf->coeff_cost_upd_level,
725
136k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
726
136k
  inter_sf->mode_cost_upd_level =
727
136k
      AOMMIN(inter_sf->mode_cost_upd_level,
728
136k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
729
136k
  sf->intra_sf.dv_cost_upd_level =
730
136k
      AOMMIN(sf->intra_sf.dv_cost_upd_level,
731
136k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
732
136k
}
733
734
// Checks if entropy costs should be initialized/updated at frame level or not.
735
static inline int is_frame_level_cost_upd_freq_set(
736
    const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
737
400k
    const int use_nonrd_pick_mode, const int frames_since_key) {
738
400k
  const int fill_costs =
739
400k
      frame_is_intra_only(cm) ||
740
104k
      (use_nonrd_pick_mode ? frames_since_key < 2
741
104k
                           : (cm->current_frame.frame_number & 0x07) == 1);
742
400k
  return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
743
144k
          cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
744
400k
}
745
746
// Decide whether we want to update the mode entropy cost for the current frame.
747
// The logit is currently inherited from selective_disable_cdf_rtc.
748
136k
static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
749
136k
  const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
750
136k
  if (!rt_sf->frame_level_mode_cost_update) {
751
127k
    return false;
752
127k
  }
753
754
8.92k
  if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
755
0
    return cpi->frames_since_last_update == 1;
756
8.92k
  } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
757
8.92k
    if (cpi->svc.number_spatial_layers == 1 &&
758
8.92k
        cpi->svc.number_temporal_layers == 1) {
759
8.92k
      const AV1_COMMON *const cm = &cpi->common;
760
8.92k
      const RATE_CONTROL *const rc = &cpi->rc;
761
762
8.92k
      return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
763
4.13k
             rc->high_source_sad || rc->frames_since_key < 10 ||
764
0
             cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
765
0
             cm->current_frame.frame_number % 8 == 0;
766
8.92k
    } else if (cpi->svc.number_temporal_layers > 1) {
767
0
      return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
768
0
    }
769
8.92k
  }
770
771
0
  return false;
772
8.92k
}
773
774
136k
void av1_initialize_rd_consts(AV1_COMP *cpi) {
775
136k
  AV1_COMMON *const cm = &cpi->common;
776
136k
  MACROBLOCK *const x = &cpi->td.mb;
777
136k
  SPEED_FEATURES *const sf = &cpi->sf;
778
136k
  RD_OPT *const rd = &cpi->rd;
779
136k
  int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
780
136k
  int frames_since_key = cpi->rc.frames_since_key;
781
782
136k
  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
783
136k
  const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
784
136k
  const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
785
136k
  const FRAME_TYPE frame_type = cm->current_frame.frame_type;
786
787
136k
  const int qindex_rdmult =
788
136k
      cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
789
136k
  rd->RDMULT = av1_compute_rd_mult(
790
136k
      qindex_rdmult, cm->seq_params->bit_depth,
791
136k
      cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
792
136k
      boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
793
136k
      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
794
#if CONFIG_RD_COMMAND
795
  if (cpi->oxcf.pass == 2) {
796
    const RD_COMMAND *rd_command = &cpi->rd_command;
797
    if (rd_command->option_ls[rd_command->frame_index] ==
798
        RD_OPTION_SET_Q_RDMULT) {
799
      rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
800
    }
801
  }
802
#endif  // CONFIG_RD_COMMAND
803
804
136k
  av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
805
806
136k
  set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
807
808
136k
  populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
809
136k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
810
  // Frame level mv cost update
811
136k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
812
136k
                                       use_nonrd_pick_mode, frames_since_key))
813
131k
    av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
814
131k
                      cm->features.allow_high_precision_mv, x->mv_costs);
815
816
  // Frame level coefficient cost update
817
136k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
818
136k
                                       use_nonrd_pick_mode, frames_since_key))
819
131k
    av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
820
821
  // Frame level mode cost update
822
136k
  if (should_force_mode_cost_update(cpi) ||
823
127k
      is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
824
127k
                                       use_nonrd_pick_mode, frames_since_key))
825
133k
    av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
826
827
  // Frame level dv cost update
828
136k
  if (av1_need_dv_costs(cpi)) {
829
0
    if (cpi->td.dv_costs_alloc == NULL) {
830
0
      CHECK_MEM_ERROR(
831
0
          cm, cpi->td.dv_costs_alloc,
832
0
          (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
833
0
      cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
834
0
    }
835
0
    av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
836
0
  }
837
136k
}
838
839
3.58M
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
840
  // NOTE: The tables below must be of the same size.
841
842
  // The functions described below are sampled at the four most significant
843
  // bits of x^2 + 8 / 256.
844
845
  // Normalized rate:
846
  // This table models the rate for a Laplacian source with given variance
847
  // when quantized with a uniform quantizer with given stepsize. The
848
  // closed form expression is:
849
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
850
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
851
  // and H(x) is the binary entropy function.
852
3.58M
  static const int rate_tab_q10[] = {
853
3.58M
    65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
854
3.58M
    4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
855
3.58M
    3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
856
3.58M
    2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
857
3.58M
    1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
858
3.58M
    911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
859
3.58M
    395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
860
3.58M
    73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
861
3.58M
    5,     3,    2,    1,    1,    1,    0,    0,
862
3.58M
  };
863
  // Normalized distortion:
864
  // This table models the normalized distortion for a Laplacian source
865
  // with given variance when quantized with a uniform quantizer
866
  // with given stepsize. The closed form expression is:
867
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
868
  // where x = qpstep / sqrt(variance).
869
  // Note the actual distortion is Dn * variance.
870
3.58M
  static const int dist_tab_q10[] = {
871
3.58M
    0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
872
3.58M
    5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
873
3.58M
    18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
874
3.58M
    59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
875
3.58M
    151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
876
3.58M
    375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
877
3.58M
    680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
878
3.58M
    949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
879
3.58M
    1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
880
3.58M
  };
881
3.58M
  static const int xsq_iq_q10[] = {
882
3.58M
    0,      4,      8,      12,     16,     20,     24,     28,     32,
883
3.58M
    40,     48,     56,     64,     72,     80,     88,     96,     112,
884
3.58M
    128,    144,    160,    176,    192,    208,    224,    256,    288,
885
3.58M
    320,    352,    384,    416,    448,    480,    544,    608,    672,
886
3.58M
    736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
887
3.58M
    1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
888
3.58M
    3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
889
3.58M
    7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
890
3.58M
    16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
891
3.58M
    36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
892
3.58M
    81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
893
3.58M
    180192, 196576, 212960, 229344, 245728,
894
3.58M
  };
895
3.58M
  const int tmp = (xsq_q10 >> 2) + 8;
896
3.58M
  const int k = get_msb(tmp) - 3;
897
3.58M
  const int xq = (k << 3) + ((tmp >> k) & 0x7);
898
3.58M
  const int one_q10 = 1 << 10;
899
3.58M
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
900
3.58M
  const int b_q10 = one_q10 - a_q10;
901
3.58M
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
902
3.58M
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
903
3.58M
}
904
905
void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
906
                                  unsigned int qstep, int *rate,
907
3.60M
                                  int64_t *dist) {
908
  // This function models the rate and distortion for a Laplacian
909
  // source with given variance when quantized with a uniform quantizer
910
  // with given stepsize. The closed form expressions are in:
911
  // Hang and Chen, "Source Model for transform video coder and its
912
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
913
  // Sys. for Video Tech., April 1997.
914
3.60M
  if (var == 0) {
915
18.1k
    *rate = 0;
916
18.1k
    *dist = 0;
917
3.58M
  } else {
918
3.58M
    int d_q10, r_q10;
919
3.58M
    static const uint32_t MAX_XSQ_Q10 = 245727;
920
3.58M
    const uint64_t xsq_q10_64 =
921
3.58M
        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
922
3.58M
    const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
923
3.58M
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
924
3.58M
    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
925
3.58M
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
926
3.58M
  }
927
3.60M
}
928
929
11.7M
static double interp_cubic(const double *p, double x) {
930
11.7M
  return p[1] + 0.5 * x *
931
11.7M
                    (p[2] - p[0] +
932
11.7M
                     x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
933
11.7M
                          x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
934
11.7M
}
935
936
/*
937
static double interp_bicubic(const double *p, int p_stride, double x,
938
                             double y) {
939
  double q[4];
940
  q[0] = interp_cubic(p, x);
941
  q[1] = interp_cubic(p + p_stride, x);
942
  q[2] = interp_cubic(p + 2 * p_stride, x);
943
  q[3] = interp_cubic(p + 3 * p_stride, x);
944
  return interp_cubic(q, y);
945
}
946
*/
947
948
static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
949
  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
950
};
951
952
5.85M
static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
953
5.85M
  return (sse_norm > 16.0);
954
5.85M
}
955
956
static const double interp_rgrid_curv[4][65] = {
957
  {
958
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
959
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
960
      0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
961
      122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
962
      126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
963
      262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
964
      726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
965
      1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
966
      1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
967
      2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
968
      2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
969
      2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
970
      3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
971
  },
972
  {
973
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
974
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
975
      0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
976
      28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
977
      39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
978
      137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
979
      614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
980
      1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
981
      1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
982
      1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
983
      2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
984
      2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
985
      3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
986
  },
987
  {
988
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
989
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
990
      0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
991
      6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
992
      13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
993
      98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
994
      525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
995
      926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
996
      1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
997
      1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
998
      2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
999
      2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1000
      3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1001
  },
1002
  {
1003
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1004
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1005
      0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
1006
      0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
1007
      3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
1008
      65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
1009
      355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
1010
      619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
1011
      1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1012
      1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1013
      1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1014
      2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1015
      3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1016
  },
1017
};
1018
1019
static const double interp_dgrid_curv[3][65] = {
1020
  {
1021
      16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1022
      15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1023
      15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1024
      13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1025
      7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1026
      1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1027
      0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1028
      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1029
      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1030
      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1031
      0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1032
  },
1033
  {
1034
      16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1035
      15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1036
      15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1037
      13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1038
      5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1039
      1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1040
      0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1041
      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1042
      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1043
      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1044
      0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1045
  },
1046
};
1047
1048
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1049
5.86M
                          double *rate_f, double *distbysse_f) {
1050
5.86M
  const double x_start = -15.5;
1051
5.86M
  const double x_end = 16.5;
1052
5.86M
  const double x_step = 0.5;
1053
5.86M
  const double epsilon = 1e-6;
1054
5.86M
  const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1055
5.86M
  const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1056
5.86M
  (void)x_end;
1057
1058
5.86M
  xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1059
5.86M
  xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1060
5.86M
  const double x = (xqr - x_start) / x_step;
1061
5.86M
  const int xi = (int)floor(x);
1062
5.86M
  const double xo = x - xi;
1063
1064
5.86M
  assert(xi > 0);
1065
1066
5.86M
  const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1067
5.86M
  *rate_f = interp_cubic(prate, xo);
1068
5.86M
  const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1069
5.86M
  *distbysse_f = interp_cubic(pdist, xo);
1070
5.86M
}
1071
1072
static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1073
                                       const struct macroblockd_plane *pd,
1074
                                       ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1075
160M
                                       ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1076
160M
  const int num_4x4_w = mi_size_wide[plane_bsize];
1077
160M
  const int num_4x4_h = mi_size_high[plane_bsize];
1078
160M
  const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1079
160M
  const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1080
1081
160M
  memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1082
160M
  memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1083
160M
}
1084
1085
void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1086
                              const struct macroblockd_plane *pd,
1087
                              ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1088
160M
                              ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1089
160M
  assert(plane_bsize < BLOCK_SIZES_ALL);
1090
160M
  get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1091
160M
}
1092
1093
// Special clamping used in the encoder when calculating a prediction
1094
//
1095
// Logically, all pixel fetches used for prediction are clamped against the
1096
// edges of the frame. But doing this directly is slow, so instead we allocate
1097
// a finite border around the frame and fill it with copies of the outermost
1098
// pixels.
1099
//
1100
// Since this border is finite, we need to clamp the motion vector before
1101
// prediction in order to avoid out-of-bounds reads. At the same time, this
1102
// clamp must not change the prediction result.
1103
//
1104
// We can balance both of these concerns by calculating how far we would have
1105
// to go in each direction before the extended prediction region (the current
1106
// block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
1107
// so that it touches the frame only at one row or column. This is a special
1108
// point because any more extreme MV will always lead to the same prediction.
1109
// So it is safe to clamp at that point.
1110
//
1111
// In the worst case, this requires a border of
1112
//   max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
1113
// around the frame edges.
1114
static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
1115
3.48M
                                MV *mv) {
1116
3.48M
  int bw = xd->width << MI_SIZE_LOG2;
1117
3.48M
  int bh = xd->height << MI_SIZE_LOG2;
1118
1119
3.48M
  int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
1120
3.48M
  int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
1121
3.48M
  int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
1122
3.48M
  int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
1123
1124
3.48M
  const SubpelMvLimits mv_limits = {
1125
3.48M
    .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
1126
3.48M
    .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
1127
3.48M
    .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
1128
3.48M
    .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
1129
3.48M
  };
1130
3.48M
  clamp_mv(mv, &mv_limits);
1131
3.48M
}
1132
1133
void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1134
2.20M
                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1135
2.20M
  const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1136
2.20M
  const int_mv ref_mv =
1137
2.20M
      av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1138
2.20M
  const int_mv ref_mv1 =
1139
2.20M
      av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1140
2.20M
  MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1141
2.20M
  int num_mv_refs = 0;
1142
2.20M
  pred_mv[num_mv_refs++] = ref_mv.as_mv;
1143
2.20M
  if (ref_mv.as_int != ref_mv1.as_int) {
1144
1.28M
    pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1145
1.28M
  }
1146
1147
2.20M
  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1148
1149
2.20M
  const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1150
2.20M
  int zero_seen = 0;
1151
2.20M
  int best_sad = INT_MAX;
1152
2.20M
  int max_mv = 0;
1153
  // Get the sad for each candidate reference mv.
1154
5.69M
  for (int i = 0; i < num_mv_refs; ++i) {
1155
3.48M
    MV *this_mv = &pred_mv[i];
1156
3.48M
    enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
1157
1158
3.48M
    const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1159
3.48M
    const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1160
3.48M
    max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1161
1162
3.48M
    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1163
3.47M
    zero_seen |= (fp_row == 0 && fp_col == 0);
1164
1165
3.47M
    const uint8_t *const ref_y_ptr =
1166
3.47M
        &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1167
    // Find sad for current vector.
1168
3.47M
    const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1169
3.47M
        src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1170
    // Note if it is the best so far.
1171
3.47M
    if (this_sad < best_sad) {
1172
2.82M
      best_sad = this_sad;
1173
2.82M
    }
1174
3.47M
    if (i == 0)
1175
2.20M
      x->pred_mv0_sad[ref_frame] = this_sad;
1176
1.27M
    else if (i == 1)
1177
1.27M
      x->pred_mv1_sad[ref_frame] = this_sad;
1178
3.47M
  }
1179
1180
  // Note the index of the mv that worked best in the reference list.
1181
2.20M
  x->max_mv_context[ref_frame] = max_mv;
1182
2.20M
  x->pred_mv_sad[ref_frame] = best_sad;
1183
2.20M
}
1184
1185
void av1_setup_pred_block(const MACROBLOCKD *xd,
1186
                          struct buf_2d dst[MAX_MB_PLANE],
1187
                          const YV12_BUFFER_CONFIG *src,
1188
                          const struct scale_factors *scale,
1189
                          const struct scale_factors *scale_uv,
1190
2.20M
                          const int num_planes) {
1191
2.20M
  dst[0].buf = src->y_buffer;
1192
2.20M
  dst[0].stride = src->y_stride;
1193
2.20M
  dst[1].buf = src->u_buffer;
1194
2.20M
  dst[2].buf = src->v_buffer;
1195
2.20M
  dst[1].stride = dst[2].stride = src->uv_stride;
1196
1197
2.20M
  const int mi_row = xd->mi_row;
1198
2.20M
  const int mi_col = xd->mi_col;
1199
6.10M
  for (int i = 0; i < num_planes; ++i) {
1200
3.90M
    setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1201
3.90M
                     i ? src->uv_crop_width : src->y_crop_width,
1202
3.90M
                     i ? src->uv_crop_height : src->y_crop_height,
1203
3.90M
                     dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1204
3.90M
                     xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1205
3.90M
  }
1206
2.20M
}
1207
1208
YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1209
5.99M
                                             int ref_frame) {
1210
5.99M
  assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1211
5.99M
  RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1212
5.99M
  const RefCntBuffer *const ref_buf =
1213
5.99M
      get_ref_frame_buf(&cpi->common, ref_frame);
1214
5.99M
  return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1215
5.99M
                                                       : NULL;
1216
5.99M
}
1217
1218
int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1219
6.47M
                            InterpFilter interp_filter, int dual_filter) {
1220
6.47M
  if (interp_filter == SWITCHABLE) {
1221
6.46M
    const MB_MODE_INFO *const mbmi = xd->mi[0];
1222
6.46M
    int inter_filter_cost = 0;
1223
12.9M
    for (int dir = 0; dir < 2; ++dir) {
1224
12.9M
      if (dir && !dual_filter) break;
1225
6.45M
      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1226
6.45M
      const InterpFilter filter =
1227
6.45M
          av1_extract_interp_filter(mbmi->interp_filters, dir);
1228
6.45M
      inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1229
6.45M
    }
1230
6.46M
    return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1231
6.46M
  } else {
1232
2.81k
    return 0;
1233
2.81k
  }
1234
6.47M
}
1235
1236
117k
void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1237
117k
  RD_OPT *const rd = &cpi->rd;
1238
1239
  // Set baseline threshold values.
1240
117k
  av1_zero(rd->thresh_mult);
1241
1242
117k
  rd->thresh_mult[THR_NEARESTMV] = 300;
1243
117k
  rd->thresh_mult[THR_NEARESTL2] = 300;
1244
117k
  rd->thresh_mult[THR_NEARESTL3] = 300;
1245
117k
  rd->thresh_mult[THR_NEARESTB] = 300;
1246
117k
  rd->thresh_mult[THR_NEARESTA2] = 300;
1247
117k
  rd->thresh_mult[THR_NEARESTA] = 300;
1248
117k
  rd->thresh_mult[THR_NEARESTG] = 300;
1249
1250
117k
  rd->thresh_mult[THR_NEWMV] = 1000;
1251
117k
  rd->thresh_mult[THR_NEWL2] = 1000;
1252
117k
  rd->thresh_mult[THR_NEWL3] = 1000;
1253
117k
  rd->thresh_mult[THR_NEWB] = 1000;
1254
117k
  rd->thresh_mult[THR_NEWA2] = 1100;
1255
117k
  rd->thresh_mult[THR_NEWA] = 1000;
1256
117k
  rd->thresh_mult[THR_NEWG] = 1000;
1257
1258
117k
  rd->thresh_mult[THR_NEARMV] = 1000;
1259
117k
  rd->thresh_mult[THR_NEARL2] = 1000;
1260
117k
  rd->thresh_mult[THR_NEARL3] = 1000;
1261
117k
  rd->thresh_mult[THR_NEARB] = 1000;
1262
117k
  rd->thresh_mult[THR_NEARA2] = 1000;
1263
117k
  rd->thresh_mult[THR_NEARA] = 1000;
1264
117k
  rd->thresh_mult[THR_NEARG] = 1000;
1265
1266
117k
  rd->thresh_mult[THR_GLOBALMV] = 2200;
1267
117k
  rd->thresh_mult[THR_GLOBALL2] = 2000;
1268
117k
  rd->thresh_mult[THR_GLOBALL3] = 2000;
1269
117k
  rd->thresh_mult[THR_GLOBALB] = 2400;
1270
117k
  rd->thresh_mult[THR_GLOBALA2] = 2000;
1271
117k
  rd->thresh_mult[THR_GLOBALG] = 2000;
1272
117k
  rd->thresh_mult[THR_GLOBALA] = 2400;
1273
1274
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1275
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1276
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1277
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1278
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1279
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1280
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1281
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1282
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1283
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1284
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1285
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1286
1287
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1288
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1289
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1290
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1291
1292
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1293
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1294
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1295
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1296
117k
  rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1297
117k
  rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1298
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1299
1300
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1301
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1302
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1303
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1304
117k
  rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1305
117k
  rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1306
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1307
1308
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1309
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1310
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1311
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1312
117k
  rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1313
117k
  rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1314
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1315
1316
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1317
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1318
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1319
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1320
117k
  rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1321
117k
  rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1322
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1323
1324
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1325
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1326
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1327
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1328
117k
  rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1329
117k
  rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1330
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1331
1332
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1333
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1334
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1335
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1336
117k
  rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1337
117k
  rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1338
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1339
1340
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1341
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1342
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1343
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1344
117k
  rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1345
117k
  rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1346
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1347
1348
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1349
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1350
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1351
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1352
117k
  rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1353
117k
  rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1354
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1355
1356
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1357
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1358
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1359
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1360
117k
  rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1361
117k
  rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1362
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1363
1364
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1365
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1366
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1367
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1368
117k
  rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1369
117k
  rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1370
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1371
1372
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1373
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1374
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1375
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1376
117k
  rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1377
117k
  rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1378
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1379
1380
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1381
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1382
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1383
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1384
117k
  rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1385
117k
  rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1386
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1387
1388
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1389
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1390
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1391
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1392
117k
  rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1393
117k
  rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1394
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1395
1396
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1397
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1398
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1399
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1400
117k
  rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1401
117k
  rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1402
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1403
1404
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1405
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1406
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1407
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1408
117k
  rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1409
117k
  rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1410
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1411
1412
117k
  rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1413
117k
  rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1414
117k
  rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1415
117k
  rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1416
117k
  rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1417
117k
  rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1418
117k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1419
1420
117k
  rd->thresh_mult[THR_DC] = 1000;
1421
117k
  rd->thresh_mult[THR_PAETH] = 1000;
1422
117k
  rd->thresh_mult[THR_SMOOTH] = 2200;
1423
117k
  rd->thresh_mult[THR_SMOOTH_V] = 2000;
1424
117k
  rd->thresh_mult[THR_SMOOTH_H] = 2000;
1425
117k
  rd->thresh_mult[THR_H_PRED] = 2000;
1426
117k
  rd->thresh_mult[THR_V_PRED] = 1800;
1427
117k
  rd->thresh_mult[THR_D135_PRED] = 2500;
1428
117k
  rd->thresh_mult[THR_D203_PRED] = 2000;
1429
117k
  rd->thresh_mult[THR_D157_PRED] = 2500;
1430
117k
  rd->thresh_mult[THR_D67_PRED] = 2000;
1431
117k
  rd->thresh_mult[THR_D113_PRED] = 2500;
1432
117k
  rd->thresh_mult[THR_D45_PRED] = 2500;
1433
117k
}
1434
1435
static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
1436
                                   THR_MODES best_mode_index,
1437
                                   THR_MODES mode_start, THR_MODES mode_end,
1438
                                   BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1439
1.84M
                                   int max_rd_thresh_factor) {
1440
157M
  for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1441
929M
    for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1442
774M
      int *const fact = &factor_buf[bs][mode];
1443
774M
      if (mode == best_mode_index) {
1444
4.59M
        *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1445
769M
      } else {
1446
769M
        *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1447
769M
      }
1448
774M
    }
1449
155M
  }
1450
1.84M
}
1451
1452
void av1_update_rd_thresh_fact(
1453
    const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1454
    int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1455
    THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1456
921k
    THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1457
921k
  assert(use_adaptive_rd_thresh > 0);
1458
921k
  const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1459
1460
921k
  const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1461
921k
  BLOCK_SIZE min_size, max_size;
1462
921k
  if (bsize_is_1_to_4) {
1463
    // This part handles block sizes with 1:4 and 4:1 aspect ratios
1464
    // TODO(any): Experiment with threshold update for parent/child blocks
1465
0
    min_size = bsize;
1466
0
    max_size = bsize;
1467
921k
  } else {
1468
921k
    min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1469
921k
    max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1470
921k
  }
1471
1472
921k
  update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1473
921k
                  min_size, max_size, max_rd_thresh_factor);
1474
921k
  update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1475
921k
                  min_size, max_size, max_rd_thresh_factor);
1476
921k
}
1477
1478
int av1_get_intra_cost_penalty(int qindex, int qdelta,
1479
46.7M
                               aom_bit_depth_t bit_depth) {
1480
46.7M
  const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1481
46.7M
  switch (bit_depth) {
1482
41.3M
    case AOM_BITS_8: return 20 * q;
1483
2.82M
    case AOM_BITS_10: return 5 * q;
1484
2.49M
    case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1485
0
    default:
1486
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1487
0
      return -1;
1488
46.7M
  }
1489
46.7M
}