Coverage Report

Created: 2026-05-16 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp9/encoder/vp9_rd.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <math.h>
13
#include <stdio.h>
14
15
#include "./vp9_rtcd.h"
16
17
#include "vpx_dsp/vpx_dsp_common.h"
18
#include "vpx_mem/vpx_mem.h"
19
#include "vpx_ports/bitops.h"
20
#include "vpx_ports/mem.h"
21
#include "vpx_ports/system_state.h"
22
23
#include "vp9/common/vp9_common.h"
24
#include "vp9/common/vp9_entropy.h"
25
#include "vp9/common/vp9_entropymode.h"
26
#include "vp9/common/vp9_mvref_common.h"
27
#include "vp9/common/vp9_pred_common.h"
28
#include "vp9/common/vp9_quant_common.h"
29
#include "vp9/common/vp9_reconinter.h"
30
#include "vp9/common/vp9_reconintra.h"
31
#include "vp9/common/vp9_seg_common.h"
32
33
#include "vp9/encoder/vp9_cost.h"
34
#include "vp9/encoder/vp9_encodemb.h"
35
#include "vp9/encoder/vp9_encodemv.h"
36
#include "vp9/encoder/vp9_encoder.h"
37
#include "vp9/encoder/vp9_mcomp.h"
38
#include "vp9/encoder/vp9_quantize.h"
39
#include "vp9/encoder/vp9_ratectrl.h"
40
#include "vp9/encoder/vp9_rd.h"
41
#include "vp9/encoder/vp9_tokenize.h"
42
43
#define RD_THRESH_POW 1.25
44
45
// Factor to weigh the rate for switchable interp filters.
46
45.8M
#define SWITCHABLE_INTERP_RATE_FACTOR 1
47
48
5.65M
void vp9_rd_cost_reset(RD_COST *rd_cost) {
49
5.65M
  rd_cost->rate = INT_MAX;
50
5.65M
  rd_cost->dist = INT64_MAX;
51
5.65M
  rd_cost->rdcost = INT64_MAX;
52
5.65M
}
53
54
8.17M
void vp9_rd_cost_init(RD_COST *rd_cost) {
55
8.17M
  rd_cost->rate = 0;
56
8.17M
  rd_cost->dist = 0;
57
8.17M
  rd_cost->rdcost = 0;
58
8.17M
}
59
60
24.6M
int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61
24.6M
  assert(mult >= 0);
62
24.6M
  assert(div > 0);
63
24.6M
  if (rate >= 0 && dist >= 0) {
64
23.8M
    return RDCOST(mult, div, rate, dist);
65
23.8M
  }
66
798k
  if (rate >= 0 && dist < 0) {
67
131k
    return RDCOST_NEG_D(mult, div, rate, -dist);
68
131k
  }
69
666k
  if (rate < 0 && dist >= 0) {
70
666k
    return RDCOST_NEG_R(mult, div, -rate, dist);
71
666k
  }
72
145
  return -RDCOST(mult, div, -rate, -dist);
73
666k
}
74
75
15.5M
void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76
15.5M
  if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77
14.7M
    rd_cost->rdcost =
78
14.7M
        vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79
14.7M
  } else {
80
833k
    vp9_rd_cost_reset(rd_cost);
81
833k
  }
82
15.5M
}
83
84
// The baseline rd thresholds for breaking out of the rd loop for
85
// certain modes are assumed to be based on 8x8 blocks.
86
// This table is used to correct for block size.
87
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89
  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90
};
91
92
72.9k
static void fill_mode_costs(VP9_COMP *cpi) {
93
72.9k
  const FRAME_CONTEXT *const fc = cpi->common.fc;
94
72.9k
  int i, j;
95
96
802k
  for (i = 0; i < INTRA_MODES; ++i) {
97
8.02M
    for (j = 0; j < INTRA_MODES; ++j) {
98
7.29M
      vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99
7.29M
                      vp9_intra_mode_tree);
100
7.29M
    }
101
729k
  }
102
103
72.9k
  vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104
802k
  for (i = 0; i < INTRA_MODES; ++i) {
105
729k
    vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106
729k
                    vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107
729k
    vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108
729k
                    fc->uv_mode_prob[i], vp9_intra_mode_tree);
109
729k
  }
110
111
364k
  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112
291k
    vp9_cost_tokens(cpi->switchable_interp_costs[i],
113
291k
                    fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114
291k
  }
115
116
291k
  for (i = TX_8X8; i < TX_SIZES; ++i) {
117
656k
    for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118
437k
      const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119
437k
      int k;
120
1.75M
      for (k = 0; k <= i; ++k) {
121
1.31M
        int cost = 0;
122
1.31M
        int m;
123
3.64M
        for (m = 0; m <= k - (k == i); ++m) {
124
2.33M
          if (m == k)
125
875k
            cost += vp9_cost_zero(tx_probs[m]);
126
1.45M
          else
127
1.45M
            cost += vp9_cost_one(tx_probs[m]);
128
2.33M
        }
129
1.31M
        cpi->tx_size_cost[i - 1][j][k] = cost;
130
1.31M
      }
131
437k
    }
132
218k
  }
133
72.9k
}
134
135
static void fill_token_costs(vp9_coeff_cost *c,
136
72.9k
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137
72.9k
  int i, j, k, l;
138
72.9k
  TX_SIZE t;
139
364k
  for (t = TX_4X4; t <= TX_32X32; ++t)
140
875k
    for (i = 0; i < PLANE_TYPES; ++i)
141
1.75M
      for (j = 0; j < REF_TYPES; ++j)
142
8.17M
        for (k = 0; k < COEF_BANDS; ++k)
143
45.5M
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144
38.5M
            vpx_prob probs[ENTROPY_NODES];
145
38.5M
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146
38.5M
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147
38.5M
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148
38.5M
                                 vp9_coef_tree);
149
38.5M
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150
38.5M
                   c[t][i][j][k][1][l][EOB_TOKEN]);
151
38.5M
          }
152
72.9k
}
153
154
// Values are now correlated to quantizer.
155
static int sad_per_bit16lut_8[QINDEX_RANGE];
156
static int sad_per_bit4lut_8[QINDEX_RANGE];
157
158
#if CONFIG_VP9_HIGHBITDEPTH
159
static int sad_per_bit16lut_10[QINDEX_RANGE];
160
static int sad_per_bit4lut_10[QINDEX_RANGE];
161
static int sad_per_bit16lut_12[QINDEX_RANGE];
162
static int sad_per_bit4lut_12[QINDEX_RANGE];
163
#endif
164
165
static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166
3
                            vpx_bit_depth_t bit_depth) {
167
3
  int i;
168
  // Initialize the sad lut tables using a formulaic calculation for now.
169
  // This is to make it easier to resolve the impact of experimental changes
170
  // to the quantizer tables.
171
771
  for (i = 0; i < range; i++) {
172
768
    const double q = vp9_convert_qindex_to_q(i, bit_depth);
173
768
    bit16lut[i] = (int)(0.0418 * q + 2.4107);
174
768
    bit4lut[i] = (int)(0.063 * q + 2.742);
175
768
  }
176
3
}
177
178
1
void vp9_init_me_luts(void) {
179
1
  init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180
1
                  VPX_BITS_8);
181
1
#if CONFIG_VP9_HIGHBITDEPTH
182
1
  init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183
1
                  VPX_BITS_10);
184
1
  init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185
1
                  VPX_BITS_12);
186
1
#endif
187
1
}
188
189
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190
                                         8,  8,  4,  4,  2,  2,  1,  0 };
191
192
// Note that the element below for frame type "USE_BUF_FRAME", which indicates
193
// that the show frame flag is set, should not be used as no real frame
194
// is encoded so we should not reach here. However, a dummy value
195
// is inserted here to make sure the data structure has the right number
196
// of values assigned.
197
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198
                                                              128, 144, 144 };
199
200
// Configure Vizier RD parameters.
201
// Later this function will use passed in command line values.
202
3.75k
void vp9_init_rd_parameters(VP9_COMP *cpi) {
203
3.75k
  RD_CONTROL *const rdc = &cpi->rd_ctrl;
204
205
  // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
206
  // initialized by the pass in values.
207
  // Be careful that parameters below are only initialized to 1, if we do not
208
  // pass values to them. It is desired to take care of each parameter when
209
  // using |use_vizier_rc_params|.
210
3.75k
  if (cpi->twopass.use_vizier_rc_params) return;
211
212
  // Make sure this function is floating point safe.
213
3.75k
  vpx_clear_system_state();
214
215
3.75k
  rdc->rd_mult_inter_qp_fac = 1.0;
216
3.75k
  rdc->rd_mult_arf_qp_fac = 1.0;
217
3.75k
  rdc->rd_mult_key_qp_fac = 1.0;
218
3.75k
}
219
220
// Returns the default rd multiplier for inter frames for a given qindex.
221
// The function here is a first pass estimate based on data from
222
// a previous Vizer run
223
6.15M
static double def_inter_rd_multiplier(int qindex) {
224
6.15M
  return 4.15 + (0.001 * (double)qindex);
225
6.15M
}
226
227
// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
228
// The function here is a first pass estimate based on data from
229
// a previous Vizer run
230
398k
static double def_arf_rd_multiplier(int qindex) {
231
398k
  return 4.25 + (0.001 * (double)qindex);
232
398k
}
233
234
// Returns the default rd multiplier for key frames for a given qindex.
235
// The function here is a first pass estimate based on data from
236
// a previous Vizer run
237
4.04M
static double def_kf_rd_multiplier(int qindex) {
238
4.04M
  return 4.35 + (0.001 * (double)qindex);
239
4.04M
}
240
241
10.5M
int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
242
10.5M
  const RD_CONTROL *rdc = &cpi->rd_ctrl;
243
10.5M
  const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
244
  // largest dc_quant is 21387, therefore rdmult should fit in int32_t
245
10.5M
  int rdmult = q * q;
246
247
10.5M
  if (cpi->ext_ratectrl.ready &&
248
0
      (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
249
0
      cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
250
0
    return cpi->ext_ratectrl.ext_rdmult;
251
0
  }
252
253
  // Make sure this function is floating point safe.
254
10.5M
  vpx_clear_system_state();
255
256
10.5M
  if (cpi->common.frame_type == KEY_FRAME) {
257
4.04M
    double def_rd_q_mult = def_kf_rd_multiplier(qindex);
258
4.04M
    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
259
6.55M
  } else if (!cpi->rc.is_src_frame_alt_ref &&
260
6.55M
             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
261
398k
    double def_rd_q_mult = def_arf_rd_multiplier(qindex);
262
398k
    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
263
6.15M
  } else {
264
6.15M
    double def_rd_q_mult = def_inter_rd_multiplier(qindex);
265
6.15M
    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
266
6.15M
  }
267
268
10.5M
#if CONFIG_VP9_HIGHBITDEPTH
269
10.5M
  switch (cpi->common.bit_depth) {
270
0
    case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
271
0
    case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
272
10.5M
    default: break;
273
10.5M
  }
274
10.5M
#endif  // CONFIG_VP9_HIGHBITDEPTH
275
10.5M
  return rdmult > 0 ? rdmult : 1;
276
10.5M
}
277
278
10.5M
static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
279
10.5M
  int64_t rdmult_64 = rdmult;
280
10.5M
  if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
281
0
    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
282
0
    const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
283
0
    const int gfu_boost = cpi->multi_layer_arf
284
0
                              ? gf_group->gfu_boost[gf_group->index]
285
0
                              : cpi->rc.gfu_boost;
286
0
    const int boost_index = VPXMIN(15, (gfu_boost / 100));
287
288
0
    rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
289
0
    rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
290
0
  }
291
10.5M
  return (int)rdmult_64;
292
10.5M
}
293
294
10.5M
int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
295
10.5M
  int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
296
10.5M
  if (cpi->ext_ratectrl.ready &&
297
0
      (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
298
0
      cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
299
0
    return cpi->ext_ratectrl.ext_rdmult;
300
0
  }
301
10.5M
  return modulate_rdmult(cpi, rdmult);
302
10.5M
}
303
304
0
int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
305
0
  int rdmult =
306
0
      vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
307
0
  rdmult = (int)((double)rdmult / beta);
308
0
  rdmult = rdmult > 0 ? rdmult : 1;
309
0
  return modulate_rdmult(cpi, rdmult);
310
0
}
311
312
583k
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
313
583k
  double q;
314
583k
#if CONFIG_VP9_HIGHBITDEPTH
315
583k
  switch (bit_depth) {
316
583k
    case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
317
0
    case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
318
0
    default:
319
0
      assert(bit_depth == VPX_BITS_12);
320
0
      q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
321
0
      break;
322
583k
  }
323
#else
324
  (void)bit_depth;
325
  q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
326
#endif  // CONFIG_VP9_HIGHBITDEPTH
327
  // TODO(debargha): Adjust the function below.
328
583k
  return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
329
583k
}
330
331
10.5M
void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
332
10.5M
#if CONFIG_VP9_HIGHBITDEPTH
333
10.5M
  switch (cpi->common.bit_depth) {
334
10.5M
    case VPX_BITS_8:
335
10.5M
      x->sadperbit16 = sad_per_bit16lut_8[qindex];
336
10.5M
      x->sadperbit4 = sad_per_bit4lut_8[qindex];
337
10.5M
      break;
338
0
    case VPX_BITS_10:
339
0
      x->sadperbit16 = sad_per_bit16lut_10[qindex];
340
0
      x->sadperbit4 = sad_per_bit4lut_10[qindex];
341
0
      break;
342
0
    default:
343
0
      assert(cpi->common.bit_depth == VPX_BITS_12);
344
0
      x->sadperbit16 = sad_per_bit16lut_12[qindex];
345
0
      x->sadperbit4 = sad_per_bit4lut_12[qindex];
346
0
      break;
347
10.5M
  }
348
#else
349
  (void)cpi;
350
  x->sadperbit16 = sad_per_bit16lut_8[qindex];
351
  x->sadperbit4 = sad_per_bit4lut_8[qindex];
352
#endif  // CONFIG_VP9_HIGHBITDEPTH
353
10.5M
}
354
355
72.9k
static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
356
72.9k
  int i, bsize, segment_id;
357
358
656k
  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
359
583k
    const int qindex =
360
583k
        clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
361
583k
                  cm->y_dc_delta_q,
362
583k
              0, MAXQ);
363
583k
    const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
364
365
8.17M
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
366
      // Threshold here seems unnecessarily harsh but fine given actual
367
      // range of values used for cpi->sf.thresh_mult[].
368
7.58M
      const int t = q * rd_thresh_block_size_factor[bsize];
369
7.58M
      const int thresh_max = INT_MAX / t;
370
371
7.58M
      if (bsize >= BLOCK_8X8) {
372
180M
        for (i = 0; i < MAX_MODES; ++i)
373
175M
          rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
374
175M
                                                   ? rd->thresh_mult[i] * t / 4
375
175M
                                                   : INT_MAX;
376
5.83M
      } else {
377
12.2M
        for (i = 0; i < MAX_REFS; ++i)
378
10.5M
          rd->threshes[segment_id][bsize][i] =
379
10.5M
              rd->thresh_mult_sub8x8[i] < thresh_max
380
10.5M
                  ? rd->thresh_mult_sub8x8[i] * t / 4
381
10.5M
                  : INT_MAX;
382
1.75M
      }
383
7.58M
    }
384
583k
  }
385
72.9k
}
386
387
57.8k
void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
388
57.8k
  const VP9_COMMON *const cm = &cpi->common;
389
57.8k
  int i;
390
462k
  for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
391
405k
    vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
392
405k
                    vp9_inter_mode_tree);
393
405k
  }
394
57.8k
}
395
396
72.9k
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
397
72.9k
  VP9_COMMON *const cm = &cpi->common;
398
72.9k
  MACROBLOCK *const x = &cpi->td.mb;
399
72.9k
  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
400
72.9k
  RD_OPT *const rd = &cpi->rd;
401
72.9k
  int i;
402
403
72.9k
  vpx_clear_system_state();
404
405
72.9k
  rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
406
72.9k
  rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
407
408
72.9k
  set_error_per_bit(x, rd->RDMULT);
409
410
72.9k
  x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
411
31.1k
                       cm->frame_type != KEY_FRAME)
412
72.9k
                          ? 0
413
72.9k
                          : 1;
414
415
72.9k
  set_block_thresholds(cm, rd);
416
72.9k
  set_partition_probs(cm, xd);
417
418
72.9k
  if (cpi->oxcf.pass == 1) {
419
0
    if (!frame_is_intra_only(cm))
420
0
      vp9_build_nmv_cost_table(
421
0
          x->nmvjointcost,
422
0
          cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
423
0
          &cm->fc->nmvc, cm->allow_high_precision_mv);
424
72.9k
  } else {
425
72.9k
    if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
426
72.9k
      fill_token_costs(x->token_costs, cm->fc->coef_probs);
427
428
72.9k
    if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
429
72.9k
        cm->frame_type == KEY_FRAME) {
430
1.24M
      for (i = 0; i < PARTITION_CONTEXTS; ++i)
431
1.16M
        vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
432
1.16M
                        vp9_partition_tree);
433
72.9k
    }
434
435
72.9k
    if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
436
72.9k
        cm->frame_type == KEY_FRAME) {
437
72.9k
      fill_mode_costs(cpi);
438
439
72.9k
      if (!frame_is_intra_only(cm)) {
440
57.8k
        vp9_build_nmv_cost_table(
441
57.8k
            x->nmvjointcost,
442
57.8k
            cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
443
57.8k
            &cm->fc->nmvc, cm->allow_high_precision_mv);
444
57.8k
        vp9_build_inter_mode_cost(cpi);
445
57.8k
      }
446
72.9k
    }
447
72.9k
  }
448
72.9k
}
449
450
// NOTE: The tables below must be of the same size.
451
452
// The functions described below are sampled at the four most significant
453
// bits of x^2 + 8 / 256.
454
455
// Normalized rate:
456
// This table models the rate for a Laplacian source with given variance
457
// when quantized with a uniform quantizer with given stepsize. The
458
// closed form expression is:
459
// Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
460
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
461
// and H(x) is the binary entropy function.
462
static const int rate_tab_q10[] = {
463
  65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
464
  3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
465
  2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
466
  2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
467
  1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
468
  680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
469
  190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
470
  16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
471
};
472
473
// Normalized distortion:
474
// This table models the normalized distortion for a Laplacian source
475
// with given variance when quantized with a uniform quantizer
476
// with given stepsize. The closed form expression is:
477
// Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
478
// where x = qpstep / sqrt(variance).
479
// Note the actual distortion is Dn * variance.
480
static const int dist_tab_q10[] = {
481
  0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
482
  6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
483
  24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
484
  73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
485
  215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
486
  495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
487
  842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
488
  1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
489
};
490
static const int xsq_iq_q10[] = {
491
  0,      4,      8,      12,     16,     20,     24,     28,     32,
492
  40,     48,     56,     64,     72,     80,     88,     96,     112,
493
  128,    144,    160,    176,    192,    208,    224,    256,    288,
494
  320,    352,    384,    416,    448,    480,    544,    608,    672,
495
  736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
496
  1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
497
  3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
498
  7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
499
  16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
500
  36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
501
  81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
502
  180192, 196576, 212960, 229344, 245728,
503
};
504
505
87.8M
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
506
87.8M
  const int tmp = (xsq_q10 >> 2) + 8;
507
87.8M
  const int k = get_msb(tmp) - 3;
508
87.8M
  const int xq = (k << 3) + ((tmp >> k) & 0x7);
509
87.8M
  const int one_q10 = 1 << 10;
510
87.8M
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
511
87.8M
  const int b_q10 = one_q10 - a_q10;
512
87.8M
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
513
87.8M
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
514
87.8M
}
515
516
static const uint32_t MAX_XSQ_Q10 = 245727;
517
518
void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
519
                                  unsigned int qstep, int *rate,
520
90.3M
                                  int64_t *dist) {
521
  // This function models the rate and distortion for a Laplacian
522
  // source with given variance when quantized with a uniform quantizer
523
  // with given stepsize. The closed form expressions are in:
524
  // Hang and Chen, "Source Model for transform video coder and its
525
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
526
  // Sys. for Video Tech., April 1997.
527
90.3M
  if (var == 0) {
528
2.50M
    *rate = 0;
529
2.50M
    *dist = 0;
530
87.8M
  } else {
531
87.8M
    int d_q10, r_q10;
532
87.8M
    const uint64_t xsq_q10_64 =
533
87.8M
        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
534
87.8M
    const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
535
87.8M
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
536
87.8M
    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
537
87.8M
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
538
87.8M
  }
539
90.3M
}
540
541
// Disable gcc 12.2 false positive warning.
542
// warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
543
#if defined(__GNUC__) && !defined(__clang__)
544
#pragma GCC diagnostic push
545
#pragma GCC diagnostic ignored "-Wstringop-overflow"
546
#endif
547
void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
548
                              const struct macroblockd_plane *pd,
549
                              ENTROPY_CONTEXT t_above[16],
550
231M
                              ENTROPY_CONTEXT t_left[16]) {
551
231M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
552
231M
  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
553
231M
  const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
554
231M
  const ENTROPY_CONTEXT *const above = pd->above_context;
555
231M
  const ENTROPY_CONTEXT *const left = pd->left_context;
556
557
231M
  int i;
558
231M
  switch (tx_size) {
559
159M
    case TX_4X4:
560
159M
      memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
561
159M
      memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
562
159M
      break;
563
57.1M
    case TX_8X8:
564
126M
      for (i = 0; i < num_4x4_w; i += 2)
565
69.3M
        t_above[i] = !!*(const uint16_t *)&above[i];
566
126M
      for (i = 0; i < num_4x4_h; i += 2)
567
68.9M
        t_left[i] = !!*(const uint16_t *)&left[i];
568
57.1M
      break;
569
12.3M
    case TX_16X16:
570
27.7M
      for (i = 0; i < num_4x4_w; i += 4)
571
15.3M
        t_above[i] = !!*(const uint32_t *)&above[i];
572
27.5M
      for (i = 0; i < num_4x4_h; i += 4)
573
15.1M
        t_left[i] = !!*(const uint32_t *)&left[i];
574
12.3M
      break;
575
3.05M
    default:
576
3.05M
      assert(tx_size == TX_32X32);
577
7.01M
      for (i = 0; i < num_4x4_w; i += 8)
578
3.95M
        t_above[i] = !!*(const uint64_t *)&above[i];
579
6.89M
      for (i = 0; i < num_4x4_h; i += 8)
580
3.83M
        t_left[i] = !!*(const uint64_t *)&left[i];
581
3.05M
      break;
582
231M
  }
583
231M
}
584
#if defined(__GNUC__) && !defined(__clang__)
585
#pragma GCC diagnostic pop
586
#endif
587
588
void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
589
7.57M
                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
590
7.57M
  int i;
591
7.57M
  int zero_seen = 0;
592
7.57M
  int best_index = 0;
593
7.57M
  int best_sad = INT_MAX;
594
7.57M
  int this_sad = INT_MAX;
595
7.57M
  int max_mv = 0;
596
7.57M
  int near_same_nearest;
597
7.57M
  uint8_t *src_y_ptr = x->plane[0].src.buf;
598
7.57M
  uint8_t *ref_y_ptr;
599
7.57M
  const int num_mv_refs =
600
7.57M
      MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
601
602
7.57M
  MV pred_mv[3];
603
7.57M
  pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
604
7.57M
  pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
605
7.57M
  pred_mv[2] = x->pred_mv[ref_frame];
606
7.57M
  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
607
608
7.57M
  near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
609
7.57M
                      x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
610
611
  // Get the sad for each candidate reference mv.
612
30.2M
  for (i = 0; i < num_mv_refs; ++i) {
613
22.6M
    const MV *this_mv = &pred_mv[i];
614
22.6M
    int fp_row, fp_col;
615
22.6M
    if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
616
21.8M
    if (i == 1 && near_same_nearest) continue;
617
19.0M
    fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
618
19.0M
    fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
619
19.0M
    max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
620
621
19.0M
    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
622
18.5M
    zero_seen |= (fp_row == 0 && fp_col == 0);
623
624
18.5M
    ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
625
    // Find sad for current vector.
626
18.5M
    this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
627
18.5M
                                           ref_y_ptr, ref_y_stride);
628
    // Note if it is the best so far.
629
18.5M
    if (this_sad < best_sad) {
630
12.7M
      best_sad = this_sad;
631
12.7M
      best_index = i;
632
12.7M
    }
633
18.5M
  }
634
635
  // Note the index of the mv that worked best in the reference list.
636
7.57M
  x->mv_best_ref_index[ref_frame] = best_index;
637
7.57M
  x->max_mv_context[ref_frame] = max_mv;
638
7.57M
  x->pred_mv_sad[ref_frame] = best_sad;
639
7.57M
}
640
641
void vp9_setup_pred_block(const MACROBLOCKD *xd,
642
                          struct buf_2d dst[MAX_MB_PLANE],
643
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
644
                          const struct scale_factors *scale,
645
13.9M
                          const struct scale_factors *scale_uv) {
646
13.9M
  int i;
647
648
13.9M
  dst[0].buf = src->y_buffer;
649
13.9M
  dst[0].stride = src->y_stride;
650
13.9M
  dst[1].buf = src->u_buffer;
651
13.9M
  dst[2].buf = src->v_buffer;
652
13.9M
  dst[1].stride = dst[2].stride = src->uv_stride;
653
654
55.6M
  for (i = 0; i < MAX_MB_PLANE; ++i) {
655
41.7M
    setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
656
41.7M
                     i ? scale_uv : scale, xd->plane[i].subsampling_x,
657
41.7M
                     xd->plane[i].subsampling_y);
658
41.7M
  }
659
13.9M
}
660
661
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
662
408M
                            int stride) {
663
408M
  const int bw = b_width_log2_lookup[plane_bsize];
664
408M
  const int y = 4 * (raster_block >> bw);
665
408M
  const int x = 4 * (raster_block & ((1 << bw) - 1));
666
408M
  return y * stride + x;
667
408M
}
668
669
int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
670
277M
                                       int16_t *base) {
671
277M
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
672
277M
  return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
673
277M
}
674
675
YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
676
14.0M
                                             int ref_frame) {
677
14.0M
  const VP9_COMMON *const cm = &cpi->common;
678
14.0M
  const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
679
14.0M
  const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
680
14.0M
  assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
681
14.0M
  return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
682
14.0M
             ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
683
14.0M
             : NULL;
684
14.0M
}
685
686
45.8M
int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
687
45.8M
  const MODE_INFO *const mi = xd->mi[0];
688
45.8M
  const int ctx = get_pred_context_switchable_interp(xd);
689
45.8M
  return SWITCHABLE_INTERP_RATE_FACTOR *
690
45.8M
         cpi->switchable_interp_costs[ctx][mi->interp_filter];
691
45.8M
}
692
693
72.9k
void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
694
72.9k
  int i;
695
72.9k
  RD_OPT *const rd = &cpi->rd;
696
72.9k
  SPEED_FEATURES *const sf = &cpi->sf;
697
698
  // Set baseline threshold values.
699
2.26M
  for (i = 0; i < MAX_MODES; ++i)
700
2.18M
    rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
701
702
72.9k
  if (sf->adaptive_rd_thresh) {
703
72.9k
    rd->thresh_mult[THR_NEARESTMV] = 300;
704
72.9k
    rd->thresh_mult[THR_NEARESTG] = 300;
705
72.9k
    rd->thresh_mult[THR_NEARESTA] = 300;
706
72.9k
  } else {
707
0
    rd->thresh_mult[THR_NEARESTMV] = 0;
708
0
    rd->thresh_mult[THR_NEARESTG] = 0;
709
0
    rd->thresh_mult[THR_NEARESTA] = 0;
710
0
  }
711
712
72.9k
  rd->thresh_mult[THR_DC] += 1000;
713
714
72.9k
  rd->thresh_mult[THR_NEWMV] += 1000;
715
72.9k
  rd->thresh_mult[THR_NEWA] += 1000;
716
72.9k
  rd->thresh_mult[THR_NEWG] += 1000;
717
718
72.9k
  rd->thresh_mult[THR_NEARMV] += 1000;
719
72.9k
  rd->thresh_mult[THR_NEARA] += 1000;
720
72.9k
  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
721
72.9k
  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
722
723
72.9k
  rd->thresh_mult[THR_TM] += 1000;
724
725
72.9k
  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
726
72.9k
  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
727
72.9k
  rd->thresh_mult[THR_NEARG] += 1000;
728
72.9k
  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
729
72.9k
  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
730
731
72.9k
  rd->thresh_mult[THR_ZEROMV] += 2000;
732
72.9k
  rd->thresh_mult[THR_ZEROG] += 2000;
733
72.9k
  rd->thresh_mult[THR_ZEROA] += 2000;
734
72.9k
  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
735
72.9k
  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
736
737
72.9k
  rd->thresh_mult[THR_H_PRED] += 2000;
738
72.9k
  rd->thresh_mult[THR_V_PRED] += 2000;
739
72.9k
  rd->thresh_mult[THR_D45_PRED] += 2500;
740
72.9k
  rd->thresh_mult[THR_D135_PRED] += 2500;
741
72.9k
  rd->thresh_mult[THR_D117_PRED] += 2500;
742
72.9k
  rd->thresh_mult[THR_D153_PRED] += 2500;
743
72.9k
  rd->thresh_mult[THR_D207_PRED] += 2500;
744
72.9k
  rd->thresh_mult[THR_D63_PRED] += 2500;
745
72.9k
}
746
747
72.9k
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
748
72.9k
  static const int thresh_mult[2][MAX_REFS] = {
749
72.9k
    { 2500, 2500, 2500, 4500, 4500, 2500 },
750
72.9k
    { 2000, 2000, 2000, 4000, 4000, 2000 }
751
72.9k
  };
752
72.9k
  RD_OPT *const rd = &cpi->rd;
753
72.9k
  const int idx = cpi->oxcf.mode == BEST;
754
72.9k
  memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
755
72.9k
}
756
757
void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
758
3.95M
                               int bsize, int best_mode_index) {
759
3.95M
  if (rd_thresh > 0) {
760
3.95M
    const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
761
3.95M
    int mode;
762
94.3M
    for (mode = 0; mode < top_mode; ++mode) {
763
90.4M
      const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
764
90.4M
      const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
765
90.4M
      BLOCK_SIZE bs;
766
445M
      for (bs = min_size; bs <= max_size; ++bs) {
767
354M
        int *const fact = &factor_buf[bs][mode];
768
354M
        if (mode == best_mode_index) {
769
14.8M
          *fact -= (*fact >> 4);
770
339M
        } else {
771
339M
          *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
772
339M
        }
773
354M
      }
774
90.4M
    }
775
3.95M
  }
776
3.95M
}
777
778
int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
779
6.44M
                               int qindex, int qdelta) {
780
  // Reduce the intra cost penalty for small blocks (<=16x16).
781
6.44M
  int reduction_fac =
782
6.44M
      (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
783
784
6.44M
  if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
785
    // Don't reduce intra cost penalty if estimated noise level is high.
786
0
    reduction_fac = 0;
787
788
  // Always use VPX_BITS_8 as input here because the penalty is applied
789
  // to rate not distortion so we want a consistent penalty for all bit
790
  // depths. If the actual bit depth were passed in here then the value
791
  // retured by vp9_dc_quant() would scale with the bit depth and we would
792
  // then need to apply inverse scaling to correct back to a bit depth
793
  // independent rate penalty.
794
6.44M
  return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
795
6.44M
}