Coverage Report

Created: 2022-08-24 06:17

/src/aom/av1/encoder/rd.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <math.h>
14
#include <stdio.h>
15
16
#include "config/av1_rtcd.h"
17
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_mem/aom_mem.h"
20
#include "aom_ports/bitops.h"
21
#include "aom_ports/mem.h"
22
23
#include "av1/common/common.h"
24
#include "av1/common/entropy.h"
25
#include "av1/common/entropymode.h"
26
#include "av1/common/mvref_common.h"
27
#include "av1/common/pred_common.h"
28
#include "av1/common/quant_common.h"
29
#include "av1/common/reconinter.h"
30
#include "av1/common/reconintra.h"
31
#include "av1/common/seg_common.h"
32
33
#include "av1/encoder/av1_quantize.h"
34
#include "av1/encoder/cost.h"
35
#include "av1/encoder/encodemb.h"
36
#include "av1/encoder/encodemv.h"
37
#include "av1/encoder/encoder.h"
38
#include "av1/encoder/encodetxb.h"
39
#include "av1/encoder/mcomp.h"
40
#include "av1/encoder/ratectrl.h"
41
#include "av1/encoder/rd.h"
42
#include "av1/encoder/tokenize.h"
43
44
#define RD_THRESH_POW 1.25
45
46
// The baseline rd thresholds for breaking out of the rd loop for
47
// certain modes are assumed to be based on 8x8 blocks.
48
// This table is used to correct for block size.
49
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
50
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
51
  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
52
};
53
54
static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
55
                                            [EXT_TX_SIZES] = {
56
                                              { 1, 1, 1, 1 },  // unused
57
                                              { 1, 1, 0, 0 },
58
                                              { 0, 0, 1, 0 },
59
                                            };
60
61
static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
62
                                            [EXT_TX_SIZES] = {
63
                                              { 1, 1, 1, 1 },  // unused
64
                                              { 1, 1, 0, 0 },
65
                                              { 0, 0, 1, 0 },
66
                                              { 0, 1, 1, 1 },
67
                                            };
68
69
static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
70
                                                      EXT_TX_SETS_INTER)] = {
71
  {
72
      // Intra
73
      EXT_TX_SET_DCTONLY,
74
      EXT_TX_SET_DTT4_IDTX_1DDCT,
75
      EXT_TX_SET_DTT4_IDTX,
76
  },
77
  {
78
      // Inter
79
      EXT_TX_SET_DCTONLY,
80
      EXT_TX_SET_ALL16,
81
      EXT_TX_SET_DTT9_IDTX_1DDCT,
82
      EXT_TX_SET_DCT_IDTX,
83
  },
84
};
85
86
void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
87
13.0k
                         FRAME_CONTEXT *fc) {
88
13.0k
  int i, j;
89
90
273k
  for (i = 0; i < PARTITION_CONTEXTS; ++i)
91
260k
    av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
92
260k
                             fc->partition_cdf[i], NULL);
93
94
13.0k
  if (cm->current_frame.skip_mode_info.skip_mode_flag) {
95
0
    for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
96
0
      av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
97
0
                               fc->skip_mode_cdfs[i], NULL);
98
0
    }
99
0
  }
100
101
52.2k
  for (i = 0; i < SKIP_CONTEXTS; ++i) {
102
39.1k
    av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
103
39.1k
                             fc->skip_txfm_cdfs[i], NULL);
104
39.1k
  }
105
106
78.2k
  for (i = 0; i < KF_MODE_CONTEXTS; ++i)
107
389k
    for (j = 0; j < KF_MODE_CONTEXTS; ++j)
108
324k
      av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
109
324k
                               fc->kf_y_cdf[i][j], NULL);
110
111
65.2k
  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
112
52.2k
    av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
113
52.2k
                             NULL);
114
39.1k
  for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
115
363k
    for (j = 0; j < INTRA_MODES; ++j)
116
337k
      av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
117
337k
                               fc->uv_mode_cdf[i][j], NULL);
118
119
13.0k
  av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
120
13.0k
                           fc->filter_intra_mode_cdf, NULL);
121
300k
  for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
122
287k
    if (av1_filter_intra_allowed_bsize(cm, i))
123
182k
      av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
124
182k
                               fc->filter_intra_cdfs[i], NULL);
125
287k
  }
126
127
221k
  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
128
208k
    av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
129
208k
                             fc->switchable_interp_cdf[i], NULL);
130
131
104k
  for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
132
91.2k
    av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
133
91.2k
                             fc->palette_y_size_cdf[i], NULL);
134
91.2k
    av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
135
91.2k
                             fc->palette_uv_size_cdf[i], NULL);
136
364k
    for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
137
273k
      av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
138
273k
                               fc->palette_y_mode_cdf[i][j], NULL);
139
273k
    }
140
91.2k
  }
141
142
39.1k
  for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
143
26.1k
    av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
144
26.1k
                             fc->palette_uv_mode_cdf[i], NULL);
145
26.1k
  }
146
147
104k
  for (i = 0; i < PALETTE_SIZES; ++i) {
148
546k
    for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
149
454k
      av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
150
454k
                               fc->palette_y_color_index_cdf[i][j], NULL);
151
454k
      av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
152
454k
                               fc->palette_uv_color_index_cdf[i][j], NULL);
153
454k
    }
154
91.2k
  }
155
156
13.0k
  int sign_cost[CFL_JOINT_SIGNS];
157
13.0k
  av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
158
117k
  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
159
104k
    int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
160
104k
    int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
161
104k
    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
162
26.1k
      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
163
78.2k
    } else {
164
78.2k
      const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
165
78.2k
      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
166
78.2k
    }
167
104k
    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
168
26.1k
      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
169
78.2k
    } else {
170
78.2k
      const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
171
78.2k
      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
172
78.2k
    }
173
1.77M
    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
174
1.66M
      cost_u[u] += sign_cost[joint_sign];
175
104k
  }
176
177
65.2k
  for (i = 0; i < MAX_TX_CATS; ++i)
178
208k
    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
179
156k
      av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
180
156k
                               fc->tx_size_cdf[i][j], NULL);
181
182
287k
  for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
183
273k
    av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
184
273k
                             fc->txfm_partition_cdf[i], NULL);
185
273k
  }
186
187
65.2k
  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
188
52.1k
    int s;
189
208k
    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
190
156k
      if (use_inter_ext_tx_for_txsize[s][i]) {
191
78.2k
        av1_cost_tokens_from_cdf(
192
78.2k
            mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
193
78.2k
            av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
194
78.2k
      }
195
156k
    }
196
156k
    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
197
104k
      if (use_intra_ext_tx_for_txsize[s][i]) {
198
545k
        for (j = 0; j < INTRA_MODES; ++j) {
199
506k
          av1_cost_tokens_from_cdf(
200
506k
              mode_costs->intra_tx_type_costs[s][i][j],
201
506k
              fc->intra_ext_tx_cdf[s][i][j],
202
506k
              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
203
506k
        }
204
39.1k
      }
205
104k
    }
206
52.1k
  }
207
117k
  for (i = 0; i < DIRECTIONAL_MODES; ++i) {
208
104k
    av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
209
104k
                             fc->angle_delta_cdf[i], NULL);
210
104k
  }
211
13.0k
  av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
212
213
13.0k
  if (!frame_is_intra_only(cm)) {
214
0
    for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
215
0
      av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
216
0
                               fc->comp_inter_cdf[i], NULL);
217
0
    }
218
219
0
    for (i = 0; i < REF_CONTEXTS; ++i) {
220
0
      for (j = 0; j < SINGLE_REFS - 1; ++j) {
221
0
        av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
222
0
                                 fc->single_ref_cdf[i][j], NULL);
223
0
      }
224
0
    }
225
226
0
    for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
227
0
      av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
228
0
                               fc->comp_ref_type_cdf[i], NULL);
229
0
    }
230
231
0
    for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
232
0
      for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
233
0
        av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
234
0
                                 fc->uni_comp_ref_cdf[i][j], NULL);
235
0
      }
236
0
    }
237
238
0
    for (i = 0; i < REF_CONTEXTS; ++i) {
239
0
      for (j = 0; j < FWD_REFS - 1; ++j) {
240
0
        av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
241
0
                                 fc->comp_ref_cdf[i][j], NULL);
242
0
      }
243
0
    }
244
245
0
    for (i = 0; i < REF_CONTEXTS; ++i) {
246
0
      for (j = 0; j < BWD_REFS - 1; ++j) {
247
0
        av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
248
0
                                 fc->comp_bwdref_cdf[i][j], NULL);
249
0
      }
250
0
    }
251
252
0
    for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
253
0
      av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
254
0
                               fc->intra_inter_cdf[i], NULL);
255
0
    }
256
257
0
    for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
258
0
      av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
259
0
                               NULL);
260
0
    }
261
262
0
    for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
263
0
      av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
264
0
                               fc->zeromv_cdf[i], NULL);
265
0
    }
266
267
0
    for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
268
0
      av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
269
0
                               NULL);
270
0
    }
271
272
0
    for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
273
0
      av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
274
0
                               NULL);
275
0
    }
276
0
    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
277
0
      av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
278
0
                               fc->inter_compound_mode_cdf[i], NULL);
279
0
    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
280
0
      av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
281
0
                               fc->compound_type_cdf[i], NULL);
282
0
    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
283
0
      if (av1_is_wedge_used(i)) {
284
0
        av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
285
0
                                 fc->wedge_idx_cdf[i], NULL);
286
0
      }
287
0
    }
288
0
    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
289
0
      av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
290
0
                               fc->interintra_cdf[i], NULL);
291
0
      av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
292
0
                               fc->interintra_mode_cdf[i], NULL);
293
0
    }
294
0
    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
295
0
      av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
296
0
                               fc->wedge_interintra_cdf[i], NULL);
297
0
    }
298
0
    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
299
0
      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
300
0
                               fc->motion_mode_cdf[i], NULL);
301
0
    }
302
0
    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
303
0
      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
304
0
                               fc->obmc_cdf[i], NULL);
305
0
    }
306
0
    for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
307
0
      av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
308
0
                               fc->compound_index_cdf[i], NULL);
309
0
    }
310
0
    for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
311
0
      av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
312
0
                               fc->comp_group_idx_cdf[i], NULL);
313
0
    }
314
0
  }
315
13.0k
}
316
317
0
void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
318
0
  av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
319
0
                           fc->switchable_restore_cdf, NULL);
320
0
  av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
321
0
                           fc->wiener_restore_cdf, NULL);
322
0
  av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
323
0
                           fc->sgrproj_restore_cdf, NULL);
324
0
}
325
326
// Values are now correlated to quantizer.
327
static int sad_per_bit_lut_8[QINDEX_RANGE];
328
static int sad_per_bit_lut_10[QINDEX_RANGE];
329
static int sad_per_bit_lut_12[QINDEX_RANGE];
330
331
static void init_me_luts_bd(int *bit16lut, int range,
332
3
                            aom_bit_depth_t bit_depth) {
333
3
  int i;
334
  // Initialize the sad lut tables using a formulaic calculation for now.
335
  // This is to make it easier to resolve the impact of experimental changes
336
  // to the quantizer tables.
337
771
  for (i = 0; i < range; i++) {
338
768
    const double q = av1_convert_qindex_to_q(i, bit_depth);
339
768
    bit16lut[i] = (int)(0.0418 * q + 2.4107);
340
768
  }
341
3
}
342
343
1
void av1_init_me_luts(void) {
344
1
  init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
345
1
  init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
346
1
  init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
347
1
}
348
349
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
350
                                         8,  8,  4,  4,  2,  2,  1,  0 };
351
352
static const int rd_layer_depth_factor[7] = {
353
  160, 160, 160, 160, 192, 208, 224
354
};
355
356
// Returns the default rd multiplier for inter frames for a given qindex.
357
// The function here is a first pass estimate based on data from
358
// a previous Vizer run
359
0
static double def_inter_rd_multiplier(int qindex) {
360
0
  return 3.2 + (0.0035 * (double)qindex);
361
0
}
362
363
// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
364
// The function here is a first pass estimate based on data from
365
// a previous Vizer run
366
0
static double def_arf_rd_multiplier(int qindex) {
367
0
  return 3.25 + (0.0035 * (double)qindex);
368
0
}
369
370
// Returns the default rd multiplier for key frames for a given qindex.
371
// The function here is a first pass estimate based on data from
372
// a previous Vizer run
373
2.52k
static double def_kf_rd_multiplier(int qindex) {
374
2.52k
  return 3.3 + (0.0035 * (double)qindex);
375
2.52k
}
376
377
int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
378
                                        FRAME_UPDATE_TYPE update_type,
379
2.52k
                                        int qindex) {
380
2.52k
  const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
381
2.52k
  int rdmult = q * q;
382
2.52k
  if (update_type == KF_UPDATE) {
383
2.52k
    double def_rd_q_mult = def_kf_rd_multiplier(qindex);
384
2.52k
    rdmult = (int)((double)rdmult * def_rd_q_mult);
385
2.52k
  } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
386
0
    double def_rd_q_mult = def_arf_rd_multiplier(qindex);
387
0
    rdmult = (int)((double)rdmult * def_rd_q_mult);
388
0
  } else {
389
0
    double def_rd_q_mult = def_inter_rd_multiplier(qindex);
390
0
    rdmult = (int)((double)rdmult * def_rd_q_mult);
391
0
  }
392
393
2.52k
  switch (bit_depth) {
394
2.52k
    case AOM_BITS_8: break;
395
0
    case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
396
0
    case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
397
0
    default:
398
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
399
0
      return -1;
400
2.52k
  }
401
2.52k
  return rdmult > 0 ? rdmult : 1;
402
2.52k
}
403
404
2.52k
int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
405
2.52k
  const aom_bit_depth_t bit_depth = cpi->common.seq_params->bit_depth;
406
2.52k
  const FRAME_UPDATE_TYPE update_type =
407
2.52k
      cpi->ppi->gf_group.update_type[cpi->gf_frame_index];
408
2.52k
  int64_t rdmult =
409
2.52k
      av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
410
2.52k
  if (is_stat_consumption_stage(cpi) && !cpi->oxcf.q_cfg.use_fixed_qp_offsets &&
411
2.52k
      (cpi->common.current_frame.frame_type != KEY_FRAME)) {
412
0
    const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
413
0
    const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
414
0
    const int layer_depth =
415
0
        AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
416
417
    // Layer depth adjustment
418
0
    rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
419
420
    // ARF boost adjustment
421
0
    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
422
0
  }
423
2.52k
  return (int)rdmult;
424
2.52k
}
425
426
0
int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
427
0
  assert(beta > 0.0);
428
0
  int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
429
0
  int newq = (int)rint(q / sqrt(beta));
430
0
  int orig_qindex = qindex;
431
0
  if (newq == q) {
432
0
    return 0;
433
0
  }
434
0
  if (newq < q) {
435
0
    while (qindex > 0) {
436
0
      qindex--;
437
0
      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
438
0
      if (newq >= q) {
439
0
        break;
440
0
      }
441
0
    }
442
0
  } else {
443
0
    while (qindex < MAXQ) {
444
0
      qindex++;
445
0
      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
446
0
      if (newq <= q) {
447
0
        break;
448
0
      }
449
0
    }
450
0
  }
451
0
  return qindex - orig_qindex;
452
0
}
453
454
int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
455
0
                                  int curr_qindex) {
456
0
  curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
457
0
  const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
458
0
  const int deltaq_deadzone = delta_q_res / 4;
459
0
  const int qmask = ~(delta_q_res - 1);
460
0
  int abs_deltaq_index = abs(curr_qindex - prev_qindex);
461
0
  abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
462
0
  int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
463
0
  adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
464
0
  return adjust_qindex;
465
0
}
466
467
0
int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
468
0
  assert(beta > 0.0);
469
0
  const AV1_COMMON *cm = &cpi->common;
470
0
  int q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
471
0
                           cm->seq_params->bit_depth);
472
473
0
  return (int)(av1_compute_rd_mult(cpi, q) / beta);
474
0
}
475
476
10.0k
static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
477
10.0k
  double q;
478
10.0k
  switch (bit_depth) {
479
10.0k
    case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
480
0
    case AOM_BITS_10:
481
0
      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
482
0
      break;
483
0
    case AOM_BITS_12:
484
0
      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
485
0
      break;
486
0
    default:
487
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
488
0
      return -1;
489
10.0k
  }
490
  // TODO(debargha): Adjust the function below.
491
10.0k
  return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
492
10.0k
}
493
494
2.52k
void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
495
2.52k
  switch (cpi->common.seq_params->bit_depth) {
496
2.52k
    case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
497
0
    case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
498
0
    case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
499
0
    default:
500
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501
2.52k
  }
502
2.52k
}
503
504
1.26k
static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
505
1.26k
  int i, bsize, segment_id;
506
507
11.3k
  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
508
10.0k
    const int qindex = clamp(
509
10.0k
        av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
510
10.0k
            cm->quant_params.y_dc_delta_q,
511
10.0k
        0, MAXQ);
512
10.0k
    const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
513
514
232k
    for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
515
      // Threshold here seems unnecessarily harsh but fine given actual
516
      // range of values used for cpi->sf.thresh_mult[].
517
222k
      const int t = q * rd_thresh_block_size_factor[bsize];
518
222k
      const int thresh_max = INT_MAX / t;
519
520
37.7M
      for (i = 0; i < MAX_MODES; ++i)
521
37.5M
        rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
522
37.5M
                                                 ? rd->thresh_mult[i] * t / 4
523
37.5M
                                                 : INT_MAX;
524
222k
    }
525
10.0k
  }
526
1.26k
}
527
528
void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
529
13.0k
                          const int num_planes) {
530
13.0k
  const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
531
104k
  for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
532
273k
    for (int plane = 0; plane < nplanes; ++plane) {
533
181k
      LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
534
535
545k
      for (int ctx = 0; ctx < 2; ++ctx) {
536
363k
        aom_cdf_prob *pcdf;
537
363k
        switch (eob_multi_size) {
538
52.1k
          case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
539
52.1k
          case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
540
52.1k
          case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
541
52.1k
          case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
542
52.1k
          case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
543
52.1k
          case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
544
52.1k
          case 6:
545
52.1k
          default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
546
363k
        }
547
363k
        av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
548
363k
      }
549
181k
    }
550
91.1k
  }
551
78.3k
  for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
552
195k
    for (int plane = 0; plane < nplanes; ++plane) {
553
130k
      LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
554
555
1.82M
      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
556
1.69M
        av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
557
1.69M
                                 fc->txb_skip_cdf[tx_size][ctx], NULL);
558
559
651k
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
560
521k
        av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
561
521k
                                 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
562
521k
                                 NULL);
563
5.52M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
564
5.39M
        av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
565
5.39M
                                 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
566
567
5.60M
      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
568
5.47M
        pcost->base_cost[ctx][4] = 0;
569
5.47M
        pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
570
5.47M
                                   av1_cost_literal(1) -
571
5.47M
                                   pcost->base_cost[ctx][0];
572
5.47M
        pcost->base_cost[ctx][6] =
573
5.47M
            pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
574
5.47M
        pcost->base_cost[ctx][7] =
575
5.47M
            pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
576
5.47M
      }
577
578
1.30M
      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
579
1.17M
        av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
580
1.17M
                                 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
581
582
521k
      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
583
391k
        av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
584
391k
                                 fc->dc_sign_cdf[plane][ctx], NULL);
585
586
2.84M
      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
587
2.71M
        int br_rate[BR_CDF_SIZE];
588
2.71M
        int prev_cost = 0;
589
2.71M
        int i, j;
590
2.71M
        av1_cost_tokens_from_cdf(
591
2.71M
            br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
592
2.71M
            NULL);
593
        // printf("br_rate: ");
594
        // for(j = 0; j < BR_CDF_SIZE; j++)
595
        //  printf("%4d ", br_rate[j]);
596
        // printf("\n");
597
13.5M
        for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
598
43.4M
          for (j = 0; j < BR_CDF_SIZE - 1; j++) {
599
32.6M
            pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
600
32.6M
          }
601
10.8M
          prev_cost += br_rate[j];
602
10.8M
        }
603
2.71M
        pcost->lps_cost[ctx][i] = prev_cost;
604
        // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
605
        // for (i = 0; i <= COEFF_BASE_RANGE; i++)
606
        //  printf("%5d ", pcost->lps_cost[ctx][i]);
607
        // printf("\n");
608
2.71M
      }
609
2.86M
      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
610
2.73M
        pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
611
2.73M
            pcost->lps_cost[ctx][0];
612
35.5M
        for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
613
32.8M
          pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
614
32.8M
              pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
615
32.8M
        }
616
2.73M
      }
617
130k
    }
618
65.2k
  }
619
13.0k
}
620
621
void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
622
1.26k
                       MvCosts *mv_costs) {
623
  // Avoid accessing 'mv_costs' when it is not allocated.
624
1.26k
  if (mv_costs == NULL) return;
625
626
0
  mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
627
0
  mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
628
0
  mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
629
0
  mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
630
0
  if (integer_mv) {
631
0
    mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
632
0
    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
633
0
                             nmvc, MV_SUBPEL_NONE);
634
0
  } else {
635
0
    mv_costs->mv_cost_stack =
636
0
        usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
637
0
    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
638
0
                             nmvc, usehp);
639
0
  }
640
0
}
641
642
0
void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
643
0
  dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
644
0
  dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
645
0
  av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
646
0
                           MV_SUBPEL_NONE);
647
0
}
648
649
// Populates speed features based on codec control settings (of type
650
// COST_UPDATE_TYPE) and expected speed feature settings (of type
651
// INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
652
// The populated/updated speed features are used for cost updates in the
653
// encoder.
654
// WARNING: Population of unified cost update frequency needs to be taken care
655
// accordingly, in case of any modifications/additions to the enum
656
// COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
657
static INLINE void populate_unified_cost_update_freq(
658
1.26k
    const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
659
1.26k
  INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
660
  // Mapping of entropy cost update frequency from the encoder's codec control
661
  // settings of type COST_UPDATE_TYPE to speed features of type
662
  // INTERNAL_COST_UPDATE_TYPE.
663
1.26k
  static const INTERNAL_COST_UPDATE_TYPE
664
1.26k
      map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
665
1.26k
        INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
666
1.26k
        INTERNAL_COST_UPD_OFF
667
1.26k
      };
668
669
1.26k
  inter_sf->mv_cost_upd_level =
670
1.26k
      AOMMIN(inter_sf->mv_cost_upd_level,
671
1.26k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
672
1.26k
  inter_sf->coeff_cost_upd_level =
673
1.26k
      AOMMIN(inter_sf->coeff_cost_upd_level,
674
1.26k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
675
1.26k
  inter_sf->mode_cost_upd_level =
676
1.26k
      AOMMIN(inter_sf->mode_cost_upd_level,
677
1.26k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
678
1.26k
  sf->intra_sf.dv_cost_upd_level =
679
1.26k
      AOMMIN(sf->intra_sf.dv_cost_upd_level,
680
1.26k
             map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
681
1.26k
}
682
683
// Checks if entropy costs should be initialized/updated at frame level or not.
684
static INLINE int is_frame_level_cost_upd_freq_set(
685
    const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
686
3.78k
    const int use_nonrd_pick_mode, const int frames_since_key) {
687
3.78k
  const int fill_costs =
688
3.78k
      frame_is_intra_only(cm) ||
689
3.78k
      (use_nonrd_pick_mode ? frames_since_key < 2
690
0
                           : (cm->current_frame.frame_number & 0x07) == 1);
691
3.78k
  return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
692
3.78k
          cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
693
3.78k
}
694
695
1.26k
void av1_initialize_rd_consts(AV1_COMP *cpi) {
696
1.26k
  AV1_COMMON *const cm = &cpi->common;
697
1.26k
  MACROBLOCK *const x = &cpi->td.mb;
698
1.26k
  SPEED_FEATURES *const sf = &cpi->sf;
699
1.26k
  RD_OPT *const rd = &cpi->rd;
700
1.26k
  int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
701
1.26k
  int frames_since_key = cpi->rc.frames_since_key;
702
703
1.26k
  rd->RDMULT = av1_compute_rd_mult(
704
1.26k
      cpi, cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q);
705
#if CONFIG_RD_COMMAND
706
  if (cpi->oxcf.pass == 2) {
707
    const RD_COMMAND *rd_command = &cpi->rd_command;
708
    if (rd_command->option_ls[rd_command->frame_index] ==
709
        RD_OPTION_SET_Q_RDMULT) {
710
      rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
711
    }
712
  }
713
#endif  // CONFIG_RD_COMMAND
714
715
1.26k
  av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
716
717
1.26k
  set_block_thresholds(cm, rd);
718
719
1.26k
  populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
720
1.26k
  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
721
  // Frame level mv cost update
722
1.26k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
723
1.26k
                                       use_nonrd_pick_mode, frames_since_key))
724
1.26k
    av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
725
1.26k
                      cm->features.allow_high_precision_mv, x->mv_costs);
726
727
  // Frame level coefficient cost update
728
1.26k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
729
1.26k
                                       use_nonrd_pick_mode, frames_since_key))
730
1.26k
    av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
731
732
  // Frame level mode cost update
733
1.26k
  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
734
1.26k
                                       use_nonrd_pick_mode, frames_since_key))
735
1.26k
    av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
736
737
  // Frame level dv cost update
738
1.26k
  if (!use_nonrd_pick_mode && av1_allow_intrabc(cm) &&
739
1.26k
      !is_stat_generation_stage(cpi)) {
740
0
    av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
741
0
  }
742
1.26k
}
743
744
0
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
745
  // NOTE: The tables below must be of the same size.
746
747
  // The functions described below are sampled at the four most significant
748
  // bits of x^2 + 8 / 256.
749
750
  // Normalized rate:
751
  // This table models the rate for a Laplacian source with given variance
752
  // when quantized with a uniform quantizer with given stepsize. The
753
  // closed form expression is:
754
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
755
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
756
  // and H(x) is the binary entropy function.
757
0
  static const int rate_tab_q10[] = {
758
0
    65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
759
0
    4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
760
0
    3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
761
0
    2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
762
0
    1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
763
0
    911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
764
0
    395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
765
0
    73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
766
0
    5,     3,    2,    1,    1,    1,    0,    0,
767
0
  };
768
  // Normalized distortion:
769
  // This table models the normalized distortion for a Laplacian source
770
  // with given variance when quantized with a uniform quantizer
771
  // with given stepsize. The closed form expression is:
772
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
773
  // where x = qpstep / sqrt(variance).
774
  // Note the actual distortion is Dn * variance.
775
0
  static const int dist_tab_q10[] = {
776
0
    0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
777
0
    5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
778
0
    18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
779
0
    59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
780
0
    151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
781
0
    375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
782
0
    680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
783
0
    949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
784
0
    1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
785
0
  };
786
0
  static const int xsq_iq_q10[] = {
787
0
    0,      4,      8,      12,     16,     20,     24,     28,     32,
788
0
    40,     48,     56,     64,     72,     80,     88,     96,     112,
789
0
    128,    144,    160,    176,    192,    208,    224,    256,    288,
790
0
    320,    352,    384,    416,    448,    480,    544,    608,    672,
791
0
    736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
792
0
    1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
793
0
    3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
794
0
    7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
795
0
    16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
796
0
    36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
797
0
    81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
798
0
    180192, 196576, 212960, 229344, 245728,
799
0
  };
800
0
  const int tmp = (xsq_q10 >> 2) + 8;
801
0
  const int k = get_msb(tmp) - 3;
802
0
  const int xq = (k << 3) + ((tmp >> k) & 0x7);
803
0
  const int one_q10 = 1 << 10;
804
0
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
805
0
  const int b_q10 = one_q10 - a_q10;
806
0
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
807
0
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
808
0
}
809
810
void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
811
                                  unsigned int qstep, int *rate,
812
0
                                  int64_t *dist) {
813
  // This function models the rate and distortion for a Laplacian
814
  // source with given variance when quantized with a uniform quantizer
815
  // with given stepsize. The closed form expressions are in:
816
  // Hang and Chen, "Source Model for transform video coder and its
817
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
818
  // Sys. for Video Tech., April 1997.
819
0
  if (var == 0) {
820
0
    *rate = 0;
821
0
    *dist = 0;
822
0
  } else {
823
0
    int d_q10, r_q10;
824
0
    static const uint32_t MAX_XSQ_Q10 = 245727;
825
0
    const uint64_t xsq_q10_64 =
826
0
        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
827
0
    const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
828
0
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
829
0
    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
830
0
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
831
0
  }
832
0
}
833
834
0
static double interp_cubic(const double *p, double x) {
835
0
  return p[1] + 0.5 * x *
836
0
                    (p[2] - p[0] +
837
0
                     x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
838
0
                          x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
839
0
}
840
841
/*
842
static double interp_bicubic(const double *p, int p_stride, double x,
843
                             double y) {
844
  double q[4];
845
  q[0] = interp_cubic(p, x);
846
  q[1] = interp_cubic(p + p_stride, x);
847
  q[2] = interp_cubic(p + 2 * p_stride, x);
848
  q[3] = interp_cubic(p + 3 * p_stride, x);
849
  return interp_cubic(q, y);
850
}
851
*/
852
853
static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
854
  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
855
};
856
857
0
static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
858
0
  return (sse_norm > 16.0);
859
0
}
860
861
// Models distortion by sse using a logistic function on
862
// l = log2(sse / q^2) as:
863
// dbysse = 16 / (1 + k exp(l + c))
864
0
static double get_dbysse_logistic(double l, double c, double k) {
865
0
  const double A = 16.0;
866
0
  const double dbysse = A / (1 + k * exp(l + c));
867
0
  return dbysse;
868
0
}
869
870
// Models rate using a clamped linear function on
871
// l = log2(sse / q^2) as:
872
// rate = max(0, a + b * l)
873
0
static double get_rate_clamplinear(double l, double a, double b) {
874
0
  const double rate = a + b * l;
875
0
  return (rate < 0 ? 0 : rate);
876
0
}
877
878
static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
879
  0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
880
};
881
882
static const double surffit_rate_params[9][4] = {
883
  {
884
      638.390212,
885
      2.253108,
886
      166.585650,
887
      -3.939401,
888
  },
889
  {
890
      5.256905,
891
      81.997240,
892
      -1.321771,
893
      17.694216,
894
  },
895
  {
896
      -74.193045,
897
      72.431868,
898
      -19.033152,
899
      15.407276,
900
  },
901
  {
902
      416.770113,
903
      14.794188,
904
      167.686830,
905
      -6.997756,
906
  },
907
  {
908
      378.511276,
909
      9.558376,
910
      154.658843,
911
      -6.635663,
912
  },
913
  {
914
      277.818787,
915
      4.413180,
916
      150.317637,
917
      -9.893038,
918
  },
919
  {
920
      142.212132,
921
      11.542038,
922
      94.393964,
923
      -5.518517,
924
  },
925
  {
926
      219.100256,
927
      4.007421,
928
      108.932852,
929
      -6.981310,
930
  },
931
  {
932
      222.261971,
933
      3.251049,
934
      95.972916,
935
      -5.609789,
936
  },
937
};
938
939
static const double surffit_dist_params[7] = { 1.475844,  4.328362, -5.680233,
940
                                               -0.500994, 0.554585, 4.839478,
941
                                               -0.695837 };
942
943
static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
944
0
                                             double *rpar) {
945
0
  const int cat = bsize_surffit_model_cat_lookup[bsize];
946
0
  rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
947
0
  rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
948
0
}
949
950
static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
951
0
                                             double *dpar) {
952
0
  (void)bsize;
953
0
  const double *params = surffit_dist_params;
954
0
  dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
955
0
  dpar[1] = params[4] + params[5] * exp(params[6] * xm);
956
0
}
957
958
void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
959
0
                          double yl, double *rate_f, double *distbysse_f) {
960
0
  (void)sse_norm;
961
0
  double rpar[2], dpar[2];
962
0
  rate_surffit_model_params_lookup(bsize, xm, rpar);
963
0
  dist_surffit_model_params_lookup(bsize, xm, dpar);
964
965
0
  *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
966
0
  *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
967
0
}
968
969
static const double interp_rgrid_curv[4][65] = {
970
  {
971
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
972
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
973
      0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
974
      122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
975
      126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
976
      262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
977
      726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
978
      1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
979
      1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
980
      2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
981
      2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
982
      2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
983
      3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
984
  },
985
  {
986
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
987
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
988
      0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
989
      28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
990
      39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
991
      137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
992
      614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
993
      1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
994
      1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
995
      1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
996
      2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
997
      2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
998
      3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
999
  },
1000
  {
1001
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1002
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1003
      0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
1004
      6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
1005
      13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
1006
      98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
1007
      525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
1008
      926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
1009
      1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
1010
      1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
1011
      2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
1012
      2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
1013
      3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
1014
  },
1015
  {
1016
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1017
      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
1018
      0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
1019
      0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
1020
      3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
1021
      65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
1022
      355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
1023
      619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
1024
      1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
1025
      1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
1026
      1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
1027
      2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
1028
      3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
1029
  },
1030
};
1031
1032
static const double interp_dgrid_curv[3][65] = {
1033
  {
1034
      16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
1035
      15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
1036
      15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
1037
      13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
1038
      7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
1039
      1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
1040
      0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
1041
      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1042
      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1043
      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1044
      0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
1045
  },
1046
  {
1047
      16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
1048
      15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
1049
      15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
1050
      13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
1051
      5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1052
      1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1053
      0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1054
      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1055
      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1056
      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1057
      0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1058
  },
1059
};
1060
1061
void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1062
0
                          double *rate_f, double *distbysse_f) {
1063
0
  const double x_start = -15.5;
1064
0
  const double x_end = 16.5;
1065
0
  const double x_step = 0.5;
1066
0
  const double epsilon = 1e-6;
1067
0
  const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1068
0
  const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1069
0
  (void)x_end;
1070
1071
0
  xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1072
0
  xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1073
0
  const double x = (xqr - x_start) / x_step;
1074
0
  const int xi = (int)floor(x);
1075
0
  const double xo = x - xi;
1076
1077
0
  assert(xi > 0);
1078
1079
0
  const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1080
0
  *rate_f = interp_cubic(prate, xo);
1081
0
  const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1082
0
  *distbysse_f = interp_cubic(pdist, xo);
1083
0
}
1084
1085
static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1086
                                       const struct macroblockd_plane *pd,
1087
                                       ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1088
4.76M
                                       ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1089
4.76M
  const int num_4x4_w = mi_size_wide[plane_bsize];
1090
4.76M
  const int num_4x4_h = mi_size_high[plane_bsize];
1091
4.76M
  const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1092
4.76M
  const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1093
1094
4.76M
  memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1095
4.76M
  memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1096
4.76M
}
1097
1098
void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1099
                              const struct macroblockd_plane *pd,
1100
                              ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1101
4.76M
                              ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1102
4.76M
  assert(plane_bsize < BLOCK_SIZES_ALL);
1103
4.76M
  get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1104
4.76M
}
1105
1106
void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1107
0
                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1108
0
  const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1109
0
  const int_mv ref_mv =
1110
0
      av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1111
0
  const int_mv ref_mv1 =
1112
0
      av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1113
0
  MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1114
0
  int num_mv_refs = 0;
1115
0
  pred_mv[num_mv_refs++] = ref_mv.as_mv;
1116
0
  if (ref_mv.as_int != ref_mv1.as_int) {
1117
0
    pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1118
0
  }
1119
1120
0
  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1121
1122
0
  const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1123
0
  int zero_seen = 0;
1124
0
  int best_sad = INT_MAX;
1125
0
  int max_mv = 0;
1126
  // Get the sad for each candidate reference mv.
1127
0
  for (int i = 0; i < num_mv_refs; ++i) {
1128
0
    const MV *this_mv = &pred_mv[i];
1129
0
    const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1130
0
    const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1131
0
    max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1132
1133
0
    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1134
0
    zero_seen |= (fp_row == 0 && fp_col == 0);
1135
1136
0
    const uint8_t *const ref_y_ptr =
1137
0
        &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1138
    // Find sad for current vector.
1139
0
    const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1140
0
        src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1141
    // Note if it is the best so far.
1142
0
    if (this_sad < best_sad) {
1143
0
      best_sad = this_sad;
1144
0
    }
1145
0
    if (i == 0)
1146
0
      x->pred_mv0_sad[ref_frame] = this_sad;
1147
0
    else if (i == 1)
1148
0
      x->pred_mv1_sad[ref_frame] = this_sad;
1149
0
  }
1150
1151
  // Note the index of the mv that worked best in the reference list.
1152
0
  x->max_mv_context[ref_frame] = max_mv;
1153
0
  x->pred_mv_sad[ref_frame] = best_sad;
1154
0
}
1155
1156
void av1_setup_pred_block(const MACROBLOCKD *xd,
1157
                          struct buf_2d dst[MAX_MB_PLANE],
1158
                          const YV12_BUFFER_CONFIG *src,
1159
                          const struct scale_factors *scale,
1160
                          const struct scale_factors *scale_uv,
1161
0
                          const int num_planes) {
1162
0
  dst[0].buf = src->y_buffer;
1163
0
  dst[0].stride = src->y_stride;
1164
0
  dst[1].buf = src->u_buffer;
1165
0
  dst[2].buf = src->v_buffer;
1166
0
  dst[1].stride = dst[2].stride = src->uv_stride;
1167
1168
0
  const int mi_row = xd->mi_row;
1169
0
  const int mi_col = xd->mi_col;
1170
0
  for (int i = 0; i < num_planes; ++i) {
1171
0
    setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1172
0
                     i ? src->uv_crop_width : src->y_crop_width,
1173
0
                     i ? src->uv_crop_height : src->y_crop_height,
1174
0
                     dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1175
0
                     xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1176
0
  }
1177
0
}
1178
1179
YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1180
0
                                             int ref_frame) {
1181
0
  assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1182
0
  RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1183
0
  const RefCntBuffer *const ref_buf =
1184
0
      get_ref_frame_buf(&cpi->common, ref_frame);
1185
0
  return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1186
0
                                                       : NULL;
1187
0
}
1188
1189
int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1190
0
                            InterpFilter interp_filter, int dual_filter) {
1191
0
  if (interp_filter == SWITCHABLE) {
1192
0
    const MB_MODE_INFO *const mbmi = xd->mi[0];
1193
0
    int inter_filter_cost = 0;
1194
0
    for (int dir = 0; dir < 2; ++dir) {
1195
0
      if (dir && !dual_filter) break;
1196
0
      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1197
0
      const InterpFilter filter =
1198
0
          av1_extract_interp_filter(mbmi->interp_filters, dir);
1199
0
      inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1200
0
    }
1201
0
    return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1202
0
  } else {
1203
0
    return 0;
1204
0
  }
1205
0
}
1206
1207
1.26k
void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1208
1.26k
  RD_OPT *const rd = &cpi->rd;
1209
1210
  // Set baseline threshold values.
1211
1.26k
  av1_zero(rd->thresh_mult);
1212
1213
1.26k
  rd->thresh_mult[THR_NEARESTMV] = 300;
1214
1.26k
  rd->thresh_mult[THR_NEARESTL2] = 300;
1215
1.26k
  rd->thresh_mult[THR_NEARESTL3] = 300;
1216
1.26k
  rd->thresh_mult[THR_NEARESTB] = 300;
1217
1.26k
  rd->thresh_mult[THR_NEARESTA2] = 300;
1218
1.26k
  rd->thresh_mult[THR_NEARESTA] = 300;
1219
1.26k
  rd->thresh_mult[THR_NEARESTG] = 300;
1220
1221
1.26k
  rd->thresh_mult[THR_NEWMV] = 1000;
1222
1.26k
  rd->thresh_mult[THR_NEWL2] = 1000;
1223
1.26k
  rd->thresh_mult[THR_NEWL3] = 1000;
1224
1.26k
  rd->thresh_mult[THR_NEWB] = 1000;
1225
1.26k
  rd->thresh_mult[THR_NEWA2] = 1100;
1226
1.26k
  rd->thresh_mult[THR_NEWA] = 1000;
1227
1.26k
  rd->thresh_mult[THR_NEWG] = 1000;
1228
1229
1.26k
  rd->thresh_mult[THR_NEARMV] = 1000;
1230
1.26k
  rd->thresh_mult[THR_NEARL2] = 1000;
1231
1.26k
  rd->thresh_mult[THR_NEARL3] = 1000;
1232
1.26k
  rd->thresh_mult[THR_NEARB] = 1000;
1233
1.26k
  rd->thresh_mult[THR_NEARA2] = 1000;
1234
1.26k
  rd->thresh_mult[THR_NEARA] = 1000;
1235
1.26k
  rd->thresh_mult[THR_NEARG] = 1000;
1236
1237
1.26k
  rd->thresh_mult[THR_GLOBALMV] = 2200;
1238
1.26k
  rd->thresh_mult[THR_GLOBALL2] = 2000;
1239
1.26k
  rd->thresh_mult[THR_GLOBALL3] = 2000;
1240
1.26k
  rd->thresh_mult[THR_GLOBALB] = 2400;
1241
1.26k
  rd->thresh_mult[THR_GLOBALA2] = 2000;
1242
1.26k
  rd->thresh_mult[THR_GLOBALG] = 2000;
1243
1.26k
  rd->thresh_mult[THR_GLOBALA] = 2400;
1244
1245
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1246
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1247
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1248
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1249
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1250
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1251
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1252
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1253
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1254
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1255
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1256
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1257
1258
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1259
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1260
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1261
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1262
1263
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1264
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1265
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1266
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1267
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1268
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1269
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1270
1271
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1272
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1273
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1274
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1275
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1276
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1277
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1278
1279
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1280
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1281
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1282
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1283
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1284
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1285
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1286
1287
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1288
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1289
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1290
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1291
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1292
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1293
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1294
1295
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1296
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1297
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1298
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1299
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1300
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1301
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1302
1303
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1304
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1305
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1306
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1307
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1308
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1309
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1310
1311
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1312
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1313
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1314
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1315
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1316
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1317
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1318
1319
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1320
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1321
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1322
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1323
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1324
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1325
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1326
1327
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1328
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1329
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1330
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1331
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1332
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1333
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1334
1335
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1336
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1337
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1338
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1339
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1340
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1341
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1342
1343
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1344
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1345
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1346
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1347
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1348
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1349
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1350
1351
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1352
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1353
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1354
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1355
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1356
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1357
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1358
1359
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1360
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1361
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1362
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1363
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1364
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1365
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1366
1367
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1368
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1369
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1370
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1371
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1372
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1373
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1374
1375
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1376
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1377
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1378
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1379
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1380
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1381
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1382
1383
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1384
1.26k
  rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1385
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1386
1.26k
  rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1387
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1388
1.26k
  rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1389
1.26k
  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1390
1391
1.26k
  rd->thresh_mult[THR_DC] = 1000;
1392
1.26k
  rd->thresh_mult[THR_PAETH] = 1000;
1393
1.26k
  rd->thresh_mult[THR_SMOOTH] = 2200;
1394
1.26k
  rd->thresh_mult[THR_SMOOTH_V] = 2000;
1395
1.26k
  rd->thresh_mult[THR_SMOOTH_H] = 2000;
1396
1.26k
  rd->thresh_mult[THR_H_PRED] = 2000;
1397
1.26k
  rd->thresh_mult[THR_V_PRED] = 1800;
1398
1.26k
  rd->thresh_mult[THR_D135_PRED] = 2500;
1399
1.26k
  rd->thresh_mult[THR_D203_PRED] = 2000;
1400
1.26k
  rd->thresh_mult[THR_D157_PRED] = 2500;
1401
1.26k
  rd->thresh_mult[THR_D67_PRED] = 2000;
1402
1.26k
  rd->thresh_mult[THR_D113_PRED] = 2500;
1403
1.26k
  rd->thresh_mult[THR_D45_PRED] = 2500;
1404
1.26k
}
1405
1406
static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1407
                                   THR_MODES best_mode_index,
1408
                                   THR_MODES mode_start, THR_MODES mode_end,
1409
                                   BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1410
0
                                   int max_rd_thresh_factor) {
1411
0
  for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1412
0
    for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1413
0
      int *const fact = &factor_buf[bs][mode];
1414
0
      if (mode == best_mode_index) {
1415
0
        *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1416
0
      } else {
1417
0
        *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1418
0
      }
1419
0
    }
1420
0
  }
1421
0
}
1422
1423
void av1_update_rd_thresh_fact(
1424
    const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1425
    int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1426
    THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1427
0
    THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1428
0
  assert(use_adaptive_rd_thresh > 0);
1429
0
  const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1430
1431
0
  const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1432
0
  BLOCK_SIZE min_size, max_size;
1433
0
  if (bsize_is_1_to_4) {
1434
    // This part handles block sizes with 1:4 and 4:1 aspect ratios
1435
    // TODO(any): Experiment with threshold update for parent/child blocks
1436
0
    min_size = bsize;
1437
0
    max_size = bsize;
1438
0
  } else {
1439
0
    min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1440
0
    max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1441
0
  }
1442
1443
0
  update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1444
0
                  min_size, max_size, max_rd_thresh_factor);
1445
0
  update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1446
0
                  min_size, max_size, max_rd_thresh_factor);
1447
0
}
1448
1449
int av1_get_intra_cost_penalty(int qindex, int qdelta,
1450
0
                               aom_bit_depth_t bit_depth) {
1451
0
  const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1452
0
  switch (bit_depth) {
1453
0
    case AOM_BITS_8: return 20 * q;
1454
0
    case AOM_BITS_10: return 5 * q;
1455
0
    case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1456
0
    default:
1457
0
      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1458
0
      return -1;
1459
0
  }
1460
0
}