Coverage Report

Created: 2026-04-12 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libvpx/vp9/encoder/vp9_rd.c
Line
Count
Source
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <math.h>
13
#include <stdio.h>
14
15
#include "./vp9_rtcd.h"
16
17
#include "vpx_dsp/vpx_dsp_common.h"
18
#include "vpx_mem/vpx_mem.h"
19
#include "vpx_ports/bitops.h"
20
#include "vpx_ports/mem.h"
21
#include "vpx_ports/system_state.h"
22
23
#include "vp9/common/vp9_common.h"
24
#include "vp9/common/vp9_entropy.h"
25
#include "vp9/common/vp9_entropymode.h"
26
#include "vp9/common/vp9_mvref_common.h"
27
#include "vp9/common/vp9_pred_common.h"
28
#include "vp9/common/vp9_quant_common.h"
29
#include "vp9/common/vp9_reconinter.h"
30
#include "vp9/common/vp9_reconintra.h"
31
#include "vp9/common/vp9_seg_common.h"
32
33
#include "vp9/encoder/vp9_cost.h"
34
#include "vp9/encoder/vp9_encodemb.h"
35
#include "vp9/encoder/vp9_encodemv.h"
36
#include "vp9/encoder/vp9_encoder.h"
37
#include "vp9/encoder/vp9_mcomp.h"
38
#include "vp9/encoder/vp9_quantize.h"
39
#include "vp9/encoder/vp9_ratectrl.h"
40
#include "vp9/encoder/vp9_rd.h"
41
#include "vp9/encoder/vp9_tokenize.h"
42
43
#define RD_THRESH_POW 1.25
44
45
// Factor to weigh the rate for switchable interp filters.
46
56.5M
#define SWITCHABLE_INTERP_RATE_FACTOR 1
47
48
20.6M
void vp9_rd_cost_reset(RD_COST *rd_cost) {
49
20.6M
  rd_cost->rate = INT_MAX;
50
20.6M
  rd_cost->dist = INT64_MAX;
51
20.6M
  rd_cost->rdcost = INT64_MAX;
52
20.6M
}
53
54
9.76M
void vp9_rd_cost_init(RD_COST *rd_cost) {
55
9.76M
  rd_cost->rate = 0;
56
9.76M
  rd_cost->dist = 0;
57
9.76M
  rd_cost->rdcost = 0;
58
9.76M
}
59
60
13.3M
int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
61
13.3M
  assert(mult >= 0);
62
13.3M
  assert(div > 0);
63
13.3M
  if (rate >= 0 && dist >= 0) {
64
13.0M
    return RDCOST(mult, div, rate, dist);
65
13.0M
  }
66
302k
  if (rate >= 0 && dist < 0) {
67
45.0k
    return RDCOST_NEG_D(mult, div, rate, -dist);
68
45.0k
  }
69
257k
  if (rate < 0 && dist >= 0) {
70
257k
    return RDCOST_NEG_R(mult, div, -rate, dist);
71
257k
  }
72
555
  return -RDCOST(mult, div, -rate, -dist);
73
257k
}
74
75
9.30M
void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
76
9.30M
  if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
77
7.76M
    rd_cost->rdcost =
78
7.76M
        vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist);
79
7.76M
  } else {
80
1.54M
    vp9_rd_cost_reset(rd_cost);
81
1.54M
  }
82
9.30M
}
83
84
// The baseline rd thresholds for breaking out of the rd loop for
85
// certain modes are assumed to be based on 8x8 blocks.
86
// This table is used to correct for block size.
87
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
88
static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
89
  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
90
};
91
92
231k
static void fill_mode_costs(VP9_COMP *cpi) {
93
231k
  const FRAME_CONTEXT *const fc = cpi->common.fc;
94
231k
  int i, j;
95
96
2.54M
  for (i = 0; i < INTRA_MODES; ++i) {
97
25.4M
    for (j = 0; j < INTRA_MODES; ++j) {
98
23.1M
      vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
99
23.1M
                      vp9_intra_mode_tree);
100
23.1M
    }
101
2.31M
  }
102
103
231k
  vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
104
2.54M
  for (i = 0; i < INTRA_MODES; ++i) {
105
2.31M
    vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
106
2.31M
                    vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
107
2.31M
    vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
108
2.31M
                    fc->uv_mode_prob[i], vp9_intra_mode_tree);
109
2.31M
  }
110
111
1.15M
  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
112
925k
    vp9_cost_tokens(cpi->switchable_interp_costs[i],
113
925k
                    fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
114
925k
  }
115
116
925k
  for (i = TX_8X8; i < TX_SIZES; ++i) {
117
2.08M
    for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
118
1.38M
      const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
119
1.38M
      int k;
120
5.55M
      for (k = 0; k <= i; ++k) {
121
4.16M
        int cost = 0;
122
4.16M
        int m;
123
11.5M
        for (m = 0; m <= k - (k == i); ++m) {
124
7.40M
          if (m == k)
125
2.77M
            cost += vp9_cost_zero(tx_probs[m]);
126
4.62M
          else
127
4.62M
            cost += vp9_cost_one(tx_probs[m]);
128
7.40M
        }
129
4.16M
        cpi->tx_size_cost[i - 1][j][k] = cost;
130
4.16M
      }
131
1.38M
    }
132
693k
  }
133
231k
}
134
135
static void fill_token_costs(vp9_coeff_cost *c,
136
199k
                             vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
137
199k
  int i, j, k, l;
138
199k
  TX_SIZE t;
139
997k
  for (t = TX_4X4; t <= TX_32X32; ++t)
140
2.39M
    for (i = 0; i < PLANE_TYPES; ++i)
141
4.78M
      for (j = 0; j < REF_TYPES; ++j)
142
22.3M
        for (k = 0; k < COEF_BANDS; ++k)
143
124M
          for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
144
105M
            vpx_prob probs[ENTROPY_NODES];
145
105M
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
146
105M
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
147
105M
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
148
105M
                                 vp9_coef_tree);
149
105M
            assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
150
105M
                   c[t][i][j][k][1][l][EOB_TOKEN]);
151
105M
          }
152
199k
}
153
154
// Values are now correlated to quantizer.
155
static int sad_per_bit16lut_8[QINDEX_RANGE];
156
static int sad_per_bit4lut_8[QINDEX_RANGE];
157
158
#if CONFIG_VP9_HIGHBITDEPTH
159
static int sad_per_bit16lut_10[QINDEX_RANGE];
160
static int sad_per_bit4lut_10[QINDEX_RANGE];
161
static int sad_per_bit16lut_12[QINDEX_RANGE];
162
static int sad_per_bit4lut_12[QINDEX_RANGE];
163
#endif
164
165
static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
166
6
                            vpx_bit_depth_t bit_depth) {
167
6
  int i;
168
  // Initialize the sad lut tables using a formulaic calculation for now.
169
  // This is to make it easier to resolve the impact of experimental changes
170
  // to the quantizer tables.
171
1.54k
  for (i = 0; i < range; i++) {
172
1.53k
    const double q = vp9_convert_qindex_to_q(i, bit_depth);
173
1.53k
    bit16lut[i] = (int)(0.0418 * q + 2.4107);
174
1.53k
    bit4lut[i] = (int)(0.063 * q + 2.742);
175
1.53k
  }
176
6
}
177
178
2
void vp9_init_me_luts(void) {
179
2
  init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
180
2
                  VPX_BITS_8);
181
2
#if CONFIG_VP9_HIGHBITDEPTH
182
2
  init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
183
2
                  VPX_BITS_10);
184
2
  init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
185
2
                  VPX_BITS_12);
186
2
#endif
187
2
}
188
189
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
190
                                         8,  8,  4,  4,  2,  2,  1,  0 };
191
192
// Note that the element below for frame type "USE_BUF_FRAME", which indicates
193
// that the show frame flag is set, should not be used as no real frame
194
// is encoded so we should not reach here. However, a dummy value
195
// is inserted here to make sure the data structure has the right number
196
// of values assigned.
197
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
198
                                                              128, 144, 144 };
199
200
// Configure Vizier RD parameters.
201
// Later this function will use passed in command line values.
202
9.97k
void vp9_init_rd_parameters(VP9_COMP *cpi) {
203
9.97k
  RD_CONTROL *const rdc = &cpi->rd_ctrl;
204
205
  // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
206
  // initialized by the pass in values.
207
  // Be careful that parameters below are only initialized to 1, if we do not
208
  // pass values to them. It is desired to take care of each parameter when
209
  // using |use_vizier_rc_params|.
210
9.97k
  if (cpi->twopass.use_vizier_rc_params) return;
211
212
  // Make sure this function is floating point safe.
213
9.97k
  vpx_clear_system_state();
214
215
9.97k
  rdc->rd_mult_inter_qp_fac = 1.0;
216
9.97k
  rdc->rd_mult_arf_qp_fac = 1.0;
217
9.97k
  rdc->rd_mult_key_qp_fac = 1.0;
218
9.97k
}
219
220
// Returns the default rd multiplier for inter frames for a given qindex.
221
// The function here is a first pass estimate based on data from
222
// a previous Vizer run
223
5.89M
static double def_inter_rd_multiplier(int qindex) {
224
5.89M
  return 4.15 + (0.001 * (double)qindex);
225
5.89M
}
226
227
// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
228
// The function here is a first pass estimate based on data from
229
// a previous Vizer run
230
563k
static double def_arf_rd_multiplier(int qindex) {
231
563k
  return 4.25 + (0.001 * (double)qindex);
232
563k
}
233
234
// Returns the default rd multiplier for key frames for a given qindex.
235
// The function here is a first pass estimate based on data from
236
// a previous Vizer run
237
651k
static double def_kf_rd_multiplier(int qindex) {
238
651k
  return 4.35 + (0.001 * (double)qindex);
239
651k
}
240
241
7.11M
int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
242
7.11M
  const RD_CONTROL *rdc = &cpi->rd_ctrl;
243
7.11M
  const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
244
  // largest dc_quant is 21387, therefore rdmult should fit in int32_t
245
7.11M
  int rdmult = q * q;
246
247
7.11M
  if (cpi->ext_ratectrl.ready &&
248
0
      (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
249
0
      cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
250
0
    return cpi->ext_ratectrl.ext_rdmult;
251
0
  }
252
253
  // Make sure this function is floating point safe.
254
7.11M
  vpx_clear_system_state();
255
256
7.11M
  if (cpi->common.frame_type == KEY_FRAME) {
257
651k
    double def_rd_q_mult = def_kf_rd_multiplier(qindex);
258
651k
    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
259
6.46M
  } else if (!cpi->rc.is_src_frame_alt_ref &&
260
6.44M
             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
261
563k
    double def_rd_q_mult = def_arf_rd_multiplier(qindex);
262
563k
    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
263
5.89M
  } else {
264
5.89M
    double def_rd_q_mult = def_inter_rd_multiplier(qindex);
265
5.89M
    rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
266
5.89M
  }
267
268
7.11M
#if CONFIG_VP9_HIGHBITDEPTH
269
7.11M
  switch (cpi->common.bit_depth) {
270
0
    case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
271
0
    case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
272
7.11M
    default: break;
273
7.11M
  }
274
7.11M
#endif  // CONFIG_VP9_HIGHBITDEPTH
275
7.11M
  return rdmult > 0 ? rdmult : 1;
276
7.11M
}
277
278
7.11M
static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
279
7.11M
  int64_t rdmult_64 = rdmult;
280
7.11M
  if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
281
0
    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
282
0
    const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
283
0
    const int gfu_boost = cpi->multi_layer_arf
284
0
                              ? gf_group->gfu_boost[gf_group->index]
285
0
                              : cpi->rc.gfu_boost;
286
0
    const int boost_index = VPXMIN(15, (gfu_boost / 100));
287
288
0
    rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
289
0
    rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
290
0
  }
291
7.11M
  return (int)rdmult_64;
292
7.11M
}
293
294
7.11M
int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
295
7.11M
  int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
296
7.11M
  if (cpi->ext_ratectrl.ready &&
297
0
      (cpi->ext_ratectrl.funcs.rc_type & VPX_RC_RDMULT) != 0 &&
298
0
      cpi->ext_ratectrl.ext_rdmult != VPX_DEFAULT_RDMULT) {
299
0
    return cpi->ext_ratectrl.ext_rdmult;
300
0
  }
301
7.11M
  return modulate_rdmult(cpi, rdmult);
302
7.11M
}
303
304
0
int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
305
0
  int rdmult =
306
0
      vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
307
0
  rdmult = (int)((double)rdmult / beta);
308
0
  rdmult = rdmult > 0 ? rdmult : 1;
309
0
  return modulate_rdmult(cpi, rdmult);
310
0
}
311
312
3.23M
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
313
3.23M
  double q;
314
3.23M
#if CONFIG_VP9_HIGHBITDEPTH
315
3.23M
  switch (bit_depth) {
316
3.23M
    case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
317
0
    case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
318
0
    default:
319
0
      assert(bit_depth == VPX_BITS_12);
320
0
      q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
321
0
      break;
322
3.23M
  }
323
#else
324
  (void)bit_depth;
325
  q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
326
#endif  // CONFIG_VP9_HIGHBITDEPTH
327
  // TODO(debargha): Adjust the function below.
328
3.23M
  return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
329
3.23M
}
330
331
7.11M
void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
332
7.11M
#if CONFIG_VP9_HIGHBITDEPTH
333
7.11M
  switch (cpi->common.bit_depth) {
334
7.11M
    case VPX_BITS_8:
335
7.11M
      x->sadperbit16 = sad_per_bit16lut_8[qindex];
336
7.11M
      x->sadperbit4 = sad_per_bit4lut_8[qindex];
337
7.11M
      break;
338
0
    case VPX_BITS_10:
339
0
      x->sadperbit16 = sad_per_bit16lut_10[qindex];
340
0
      x->sadperbit4 = sad_per_bit4lut_10[qindex];
341
0
      break;
342
0
    default:
343
0
      assert(cpi->common.bit_depth == VPX_BITS_12);
344
0
      x->sadperbit16 = sad_per_bit16lut_12[qindex];
345
0
      x->sadperbit4 = sad_per_bit4lut_12[qindex];
346
0
      break;
347
7.11M
  }
348
#else
349
  (void)cpi;
350
  x->sadperbit16 = sad_per_bit16lut_8[qindex];
351
  x->sadperbit4 = sad_per_bit4lut_8[qindex];
352
#endif  // CONFIG_VP9_HIGHBITDEPTH
353
7.11M
}
354
355
404k
static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
356
404k
  int i, bsize, segment_id;
357
358
3.64M
  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
359
3.23M
    const int qindex =
360
3.23M
        clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
361
3.23M
                  cm->y_dc_delta_q,
362
3.23M
              0, MAXQ);
363
3.23M
    const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
364
365
45.3M
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
366
      // Threshold here seems unnecessarily harsh but fine given actual
367
      // range of values used for cpi->sf.thresh_mult[].
368
42.1M
      const int t = q * rd_thresh_block_size_factor[bsize];
369
42.1M
      const int thresh_max = INT_MAX / t;
370
371
42.1M
      if (bsize >= BLOCK_8X8) {
372
1.00G
        for (i = 0; i < MAX_MODES; ++i)
373
971M
          rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
374
971M
                                                   ? rd->thresh_mult[i] * t / 4
375
971M
                                                   : INT_MAX;
376
32.3M
      } else {
377
68.0M
        for (i = 0; i < MAX_REFS; ++i)
378
58.3M
          rd->threshes[segment_id][bsize][i] =
379
58.3M
              rd->thresh_mult_sub8x8[i] < thresh_max
380
58.3M
                  ? rd->thresh_mult_sub8x8[i] * t / 4
381
58.3M
                  : INT_MAX;
382
9.71M
      }
383
42.1M
    }
384
3.23M
  }
385
404k
}
386
387
207k
void vp9_build_inter_mode_cost(VP9_COMP *cpi) {
388
207k
  const VP9_COMMON *const cm = &cpi->common;
389
207k
  int i;
390
1.66M
  for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
391
1.45M
    vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i],
392
1.45M
                    vp9_inter_mode_tree);
393
1.45M
  }
394
207k
}
395
396
404k
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
397
404k
  VP9_COMMON *const cm = &cpi->common;
398
404k
  MACROBLOCK *const x = &cpi->td.mb;
399
404k
  MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
400
404k
  RD_OPT *const rd = &cpi->rd;
401
404k
  int i;
402
403
404k
  vpx_clear_system_state();
404
405
404k
  rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
406
404k
  rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
407
408
404k
  set_error_per_bit(x, rd->RDMULT);
409
410
404k
  x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
411
15.7k
                       cm->frame_type != KEY_FRAME)
412
404k
                          ? 0
413
404k
                          : 1;
414
415
404k
  set_block_thresholds(cm, rd);
416
404k
  set_partition_probs(cm, xd);
417
418
404k
  if (cpi->oxcf.pass == 1) {
419
0
    if (!frame_is_intra_only(cm))
420
0
      vp9_build_nmv_cost_table(
421
0
          x->nmvjointcost,
422
0
          cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
423
0
          &cm->fc->nmvc, cm->allow_high_precision_mv);
424
404k
  } else {
425
404k
    if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
426
199k
      fill_token_costs(x->token_costs, cm->fc->coef_probs);
427
428
404k
    if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
429
404k
        cm->frame_type == KEY_FRAME) {
430
6.88M
      for (i = 0; i < PARTITION_CONTEXTS; ++i)
431
6.47M
        vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
432
6.47M
                        vp9_partition_tree);
433
404k
    }
434
435
404k
    if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
436
231k
        cm->frame_type == KEY_FRAME) {
437
231k
      fill_mode_costs(cpi);
438
439
231k
      if (!frame_is_intra_only(cm)) {
440
207k
        vp9_build_nmv_cost_table(
441
207k
            x->nmvjointcost,
442
207k
            cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
443
207k
            &cm->fc->nmvc, cm->allow_high_precision_mv);
444
207k
        vp9_build_inter_mode_cost(cpi);
445
207k
      }
446
231k
    }
447
404k
  }
448
404k
}
449
450
// NOTE: The tables below must be of the same size.
451
452
// The functions described below are sampled at the four most significant
453
// bits of x^2 + 8 / 256.
454
455
// Normalized rate:
456
// This table models the rate for a Laplacian source with given variance
457
// when quantized with a uniform quantizer with given stepsize. The
458
// closed form expression is:
459
// Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
460
// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
461
// and H(x) is the binary entropy function.
462
static const int rate_tab_q10[] = {
463
  65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
464
  3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
465
  2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
466
  2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
467
  1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
468
  680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
469
  190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
470
  16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
471
};
472
473
// Normalized distortion:
474
// This table models the normalized distortion for a Laplacian source
475
// with given variance when quantized with a uniform quantizer
476
// with given stepsize. The closed form expression is:
477
// Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
478
// where x = qpstep / sqrt(variance).
479
// Note the actual distortion is Dn * variance.
480
static const int dist_tab_q10[] = {
481
  0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
482
  6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
483
  24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
484
  73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
485
  215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
486
  495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
487
  842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
488
  1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
489
};
490
static const int xsq_iq_q10[] = {
491
  0,      4,      8,      12,     16,     20,     24,     28,     32,
492
  40,     48,     56,     64,     72,     80,     88,     96,     112,
493
  128,    144,    160,    176,    192,    208,    224,    256,    288,
494
  320,    352,    384,    416,    448,    480,    544,    608,    672,
495
  736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
496
  1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
497
  3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
498
  7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
499
  16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
500
  36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
501
  81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
502
  180192, 196576, 212960, 229344, 245728,
503
};
504
505
133M
static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
506
133M
  const int tmp = (xsq_q10 >> 2) + 8;
507
133M
  const int k = get_msb(tmp) - 3;
508
133M
  const int xq = (k << 3) + ((tmp >> k) & 0x7);
509
133M
  const int one_q10 = 1 << 10;
510
133M
  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
511
133M
  const int b_q10 = one_q10 - a_q10;
512
133M
  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
513
133M
  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
514
133M
}
515
516
static const uint32_t MAX_XSQ_Q10 = 245727;
517
518
void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
519
                                  unsigned int qstep, int *rate,
520
142M
                                  int64_t *dist) {
521
  // This function models the rate and distortion for a Laplacian
522
  // source with given variance when quantized with a uniform quantizer
523
  // with given stepsize. The closed form expressions are in:
524
  // Hang and Chen, "Source Model for transform video coder and its
525
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
526
  // Sys. for Video Tech., April 1997.
527
142M
  if (var == 0) {
528
9.20M
    *rate = 0;
529
9.20M
    *dist = 0;
530
133M
  } else {
531
133M
    int d_q10, r_q10;
532
133M
    const uint64_t xsq_q10_64 =
533
133M
        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
534
133M
    const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
535
133M
    model_rd_norm(xsq_q10, &r_q10, &d_q10);
536
133M
    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
537
133M
    *dist = (var * (int64_t)d_q10 + 512) >> 10;
538
133M
  }
539
142M
}
540
541
// Disable gcc 12.2 false positive warning.
542
// warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]
543
#if defined(__GNUC__) && !defined(__clang__)
544
#pragma GCC diagnostic push
545
#pragma GCC diagnostic ignored "-Wstringop-overflow"
546
#endif
547
void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
548
                              const struct macroblockd_plane *pd,
549
                              ENTROPY_CONTEXT t_above[16],
550
160M
                              ENTROPY_CONTEXT t_left[16]) {
551
160M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
552
160M
  const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
553
160M
  const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
554
160M
  const ENTROPY_CONTEXT *const above = pd->above_context;
555
160M
  const ENTROPY_CONTEXT *const left = pd->left_context;
556
557
160M
  int i;
558
160M
  switch (tx_size) {
559
111M
    case TX_4X4:
560
111M
      memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
561
111M
      memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
562
111M
      break;
563
37.2M
    case TX_8X8:
564
86.0M
      for (i = 0; i < num_4x4_w; i += 2)
565
48.7M
        t_above[i] = !!*(const uint16_t *)&above[i];
566
83.9M
      for (i = 0; i < num_4x4_h; i += 2)
567
46.6M
        t_left[i] = !!*(const uint16_t *)&left[i];
568
37.2M
      break;
569
10.0M
    case TX_16X16:
570
25.4M
      for (i = 0; i < num_4x4_w; i += 4)
571
15.3M
        t_above[i] = !!*(const uint32_t *)&above[i];
572
24.2M
      for (i = 0; i < num_4x4_h; i += 4)
573
14.2M
        t_left[i] = !!*(const uint32_t *)&left[i];
574
10.0M
      break;
575
2.14M
    default:
576
2.14M
      assert(tx_size == TX_32X32);
577
5.14M
      for (i = 0; i < num_4x4_w; i += 8)
578
3.00M
        t_above[i] = !!*(const uint64_t *)&above[i];
579
5.19M
      for (i = 0; i < num_4x4_h; i += 8)
580
3.05M
        t_left[i] = !!*(const uint64_t *)&left[i];
581
2.14M
      break;
582
160M
  }
583
160M
}
584
#if defined(__GNUC__) && !defined(__clang__)
585
#pragma GCC diagnostic pop
586
#endif
587
588
void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
589
14.8M
                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
590
14.8M
  int i;
591
14.8M
  int zero_seen = 0;
592
14.8M
  int best_index = 0;
593
14.8M
  int best_sad = INT_MAX;
594
14.8M
  int this_sad = INT_MAX;
595
14.8M
  int max_mv = 0;
596
14.8M
  int near_same_nearest;
597
14.8M
  uint8_t *src_y_ptr = x->plane[0].src.buf;
598
14.8M
  uint8_t *ref_y_ptr;
599
14.8M
  const int num_mv_refs =
600
14.8M
      MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
601
602
14.8M
  MV pred_mv[3];
603
14.8M
  pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
604
14.8M
  pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
605
14.8M
  pred_mv[2] = x->pred_mv[ref_frame];
606
14.8M
  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
607
608
14.8M
  near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
609
14.8M
                      x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
610
611
  // Get the sad for each candidate reference mv.
612
58.0M
  for (i = 0; i < num_mv_refs; ++i) {
613
43.1M
    const MV *this_mv = &pred_mv[i];
614
43.1M
    int fp_row, fp_col;
615
43.1M
    if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
616
37.0M
    if (i == 1 && near_same_nearest) continue;
617
28.6M
    fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
618
28.6M
    fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
619
28.6M
    max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
620
621
28.6M
    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
622
27.6M
    zero_seen |= (fp_row == 0 && fp_col == 0);
623
624
27.6M
    ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
625
    // Find sad for current vector.
626
27.6M
    this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
627
27.6M
                                           ref_y_ptr, ref_y_stride);
628
    // Note if it is the best so far.
629
27.6M
    if (this_sad < best_sad) {
630
19.8M
      best_sad = this_sad;
631
19.8M
      best_index = i;
632
19.8M
    }
633
27.6M
  }
634
635
  // Note the index of the mv that worked best in the reference list.
636
14.8M
  x->mv_best_ref_index[ref_frame] = best_index;
637
14.8M
  x->max_mv_context[ref_frame] = max_mv;
638
14.8M
  x->pred_mv_sad[ref_frame] = best_sad;
639
14.8M
}
640
641
void vp9_setup_pred_block(const MACROBLOCKD *xd,
642
                          struct buf_2d dst[MAX_MB_PLANE],
643
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
644
                          const struct scale_factors *scale,
645
21.6M
                          const struct scale_factors *scale_uv) {
646
21.6M
  int i;
647
648
21.6M
  dst[0].buf = src->y_buffer;
649
21.6M
  dst[0].stride = src->y_stride;
650
21.6M
  dst[1].buf = src->u_buffer;
651
21.6M
  dst[2].buf = src->v_buffer;
652
21.6M
  dst[1].stride = dst[2].stride = src->uv_stride;
653
654
86.7M
  for (i = 0; i < MAX_MB_PLANE; ++i) {
655
65.0M
    setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
656
65.0M
                     i ? scale_uv : scale, xd->plane[i].subsampling_x,
657
65.0M
                     xd->plane[i].subsampling_y);
658
65.0M
  }
659
21.6M
}
660
661
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
662
414M
                            int stride) {
663
414M
  const int bw = b_width_log2_lookup[plane_bsize];
664
414M
  const int y = 4 * (raster_block >> bw);
665
414M
  const int x = 4 * (raster_block & ((1 << bw) - 1));
666
414M
  return y * stride + x;
667
414M
}
668
669
int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
670
257M
                                       int16_t *base) {
671
257M
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
672
257M
  return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
673
257M
}
674
675
YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
676
20.5M
                                             int ref_frame) {
677
20.5M
  const VP9_COMMON *const cm = &cpi->common;
678
20.5M
  const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
679
20.5M
  const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
680
20.5M
  assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
681
20.5M
  return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
682
20.5M
             ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
683
20.5M
             : NULL;
684
20.5M
}
685
686
56.5M
int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
687
56.5M
  const MODE_INFO *const mi = xd->mi[0];
688
56.5M
  const int ctx = get_pred_context_switchable_interp(xd);
689
56.5M
  return SWITCHABLE_INTERP_RATE_FACTOR *
690
56.5M
         cpi->switchable_interp_costs[ctx][mi->interp_filter];
691
56.5M
}
692
693
404k
void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
694
404k
  int i;
695
404k
  RD_OPT *const rd = &cpi->rd;
696
404k
  SPEED_FEATURES *const sf = &cpi->sf;
697
698
  // Set baseline threshold values.
699
12.5M
  for (i = 0; i < MAX_MODES; ++i)
700
12.1M
    rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
701
702
404k
  if (sf->adaptive_rd_thresh) {
703
404k
    rd->thresh_mult[THR_NEARESTMV] = 300;
704
404k
    rd->thresh_mult[THR_NEARESTG] = 300;
705
404k
    rd->thresh_mult[THR_NEARESTA] = 300;
706
404k
  } else {
707
0
    rd->thresh_mult[THR_NEARESTMV] = 0;
708
0
    rd->thresh_mult[THR_NEARESTG] = 0;
709
0
    rd->thresh_mult[THR_NEARESTA] = 0;
710
0
  }
711
712
404k
  rd->thresh_mult[THR_DC] += 1000;
713
714
404k
  rd->thresh_mult[THR_NEWMV] += 1000;
715
404k
  rd->thresh_mult[THR_NEWA] += 1000;
716
404k
  rd->thresh_mult[THR_NEWG] += 1000;
717
718
404k
  rd->thresh_mult[THR_NEARMV] += 1000;
719
404k
  rd->thresh_mult[THR_NEARA] += 1000;
720
404k
  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
721
404k
  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
722
723
404k
  rd->thresh_mult[THR_TM] += 1000;
724
725
404k
  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
726
404k
  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
727
404k
  rd->thresh_mult[THR_NEARG] += 1000;
728
404k
  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
729
404k
  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
730
731
404k
  rd->thresh_mult[THR_ZEROMV] += 2000;
732
404k
  rd->thresh_mult[THR_ZEROG] += 2000;
733
404k
  rd->thresh_mult[THR_ZEROA] += 2000;
734
404k
  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
735
404k
  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
736
737
404k
  rd->thresh_mult[THR_H_PRED] += 2000;
738
404k
  rd->thresh_mult[THR_V_PRED] += 2000;
739
404k
  rd->thresh_mult[THR_D45_PRED] += 2500;
740
404k
  rd->thresh_mult[THR_D135_PRED] += 2500;
741
404k
  rd->thresh_mult[THR_D117_PRED] += 2500;
742
404k
  rd->thresh_mult[THR_D153_PRED] += 2500;
743
404k
  rd->thresh_mult[THR_D207_PRED] += 2500;
744
404k
  rd->thresh_mult[THR_D63_PRED] += 2500;
745
404k
}
746
747
404k
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
748
404k
  static const int thresh_mult[2][MAX_REFS] = {
749
404k
    { 2500, 2500, 2500, 4500, 4500, 2500 },
750
404k
    { 2000, 2000, 2000, 4000, 4000, 2000 }
751
404k
  };
752
404k
  RD_OPT *const rd = &cpi->rd;
753
404k
  const int idx = cpi->oxcf.mode == BEST;
754
404k
  memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
755
404k
}
756
757
void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
758
2.58M
                               int bsize, int best_mode_index) {
759
2.58M
  if (rd_thresh > 0) {
760
2.58M
    const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
761
2.58M
    int mode;
762
69.7M
    for (mode = 0; mode < top_mode; ++mode) {
763
67.1M
      const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
764
67.1M
      const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
765
67.1M
      BLOCK_SIZE bs;
766
332M
      for (bs = min_size; bs <= max_size; ++bs) {
767
265M
        int *const fact = &factor_buf[bs][mode];
768
265M
        if (mode == best_mode_index) {
769
10.0M
          *fact -= (*fact >> 4);
770
255M
        } else {
771
255M
          *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
772
255M
        }
773
265M
      }
774
67.1M
    }
775
2.58M
  }
776
2.58M
}
777
778
int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
779
9.44M
                               int qindex, int qdelta) {
780
  // Reduce the intra cost penalty for small blocks (<=16x16).
781
9.44M
  int reduction_fac =
782
9.44M
      (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
783
784
9.44M
  if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
785
    // Don't reduce intra cost penalty if estimated noise level is high.
786
0
    reduction_fac = 0;
787
788
  // Always use VPX_BITS_8 as input here because the penalty is applied
789
  // to rate not distortion so we want a consistent penalty for all bit
790
  // depths. If the actual bit depth were passed in here then the value
791
  // retured by vp9_dc_quant() would scale with the bit depth and we would
792
  // then need to apply inverse scaling to correct back to a bit depth
793
  // independent rate penalty.
794
9.44M
  return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
795
9.44M
}