Coverage Report

Created: 2026-04-12 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavif/ext/aom/av1/encoder/encodemv.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <math.h>
13
14
#include "av1/common/common.h"
15
#include "av1/common/entropymode.h"
16
17
#include "av1/encoder/cost.h"
18
#include "av1/encoder/encodemv.h"
19
20
#include "aom_dsp/aom_dsp_common.h"
21
#include "aom_ports/bitops.h"
22
23
static void update_mv_component_stats(int comp, nmv_component *mvcomp,
24
473k
                                      MvSubpelPrecision precision) {
25
473k
  assert(comp != 0);
26
473k
  int offset;
27
473k
  const int sign = comp < 0;
28
473k
  const int mag = sign ? -comp : comp;
29
473k
  const int mv_class = av1_get_mv_class(mag - 1, &offset);
30
473k
  const int d = offset >> 3;         // int mv data
31
473k
  const int fr = (offset >> 1) & 3;  // fractional mv data
32
473k
  const int hp = offset & 1;         // high precision mv data
33
34
  // Sign
35
473k
  update_cdf(mvcomp->sign_cdf, sign, 2);
36
37
  // Class
38
473k
  update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES);
39
40
  // Integer bits
41
473k
  if (mv_class == MV_CLASS_0) {
42
286k
    update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE);
43
286k
  } else {
44
187k
    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
45
632k
    for (int i = 0; i < n; ++i)
46
444k
      update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2);
47
187k
  }
48
  // Fractional bits
49
473k
  if (precision > MV_SUBPEL_NONE) {
50
472k
    aom_cdf_prob *fp_cdf =
51
472k
        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf;
52
472k
    update_cdf(fp_cdf, fr, MV_FP_SIZE);
53
472k
  }
54
55
  // High precision bit
56
473k
  if (precision > MV_SUBPEL_LOW_PRECISION) {
57
264k
    aom_cdf_prob *hp_cdf =
58
264k
        mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf;
59
264k
    update_cdf(hp_cdf, hp, 2);
60
264k
  }
61
473k
}
62
63
void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx,
64
288k
                         MvSubpelPrecision precision) {
65
288k
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
66
288k
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
67
68
288k
  update_cdf(mvctx->joints_cdf, j, MV_JOINTS);
69
70
288k
  if (mv_joint_vertical(j))
71
239k
    update_mv_component_stats(diff.row, &mvctx->comps[0], precision);
72
73
288k
  if (mv_joint_horizontal(j))
74
234k
    update_mv_component_stats(diff.col, &mvctx->comps[1], precision);
75
288k
}
76
77
static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
78
471k
                                MvSubpelPrecision precision) {
79
471k
  assert(comp != 0);
80
471k
  int offset;
81
471k
  const int sign = comp < 0;
82
471k
  const int mag = sign ? -comp : comp;
83
471k
  const int mv_class = av1_get_mv_class(mag - 1, &offset);
84
471k
  const int d = offset >> 3;         // int mv data
85
471k
  const int fr = (offset >> 1) & 3;  // fractional mv data
86
471k
  const int hp = offset & 1;         // high precision mv data
87
88
  // Sign
89
471k
  aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);
90
91
  // Class
92
471k
  aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);
93
94
  // Integer bits
95
471k
  if (mv_class == MV_CLASS_0) {
96
285k
    aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
97
285k
  } else {
98
186k
    int i;
99
186k
    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
100
629k
    for (i = 0; i < n; ++i)
101
443k
      aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
102
186k
  }
103
  // Fractional bits
104
471k
  if (precision > MV_SUBPEL_NONE) {
105
470k
    aom_write_symbol(
106
470k
        w, fr,
107
470k
        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
108
470k
        MV_FP_SIZE);
109
470k
  }
110
111
  // High precision bit
112
471k
  if (precision > MV_SUBPEL_LOW_PRECISION)
113
263k
    aom_write_symbol(
114
263k
        w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
115
263k
        2);
116
471k
}
117
118
/* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This
119
 *   is more than most L1D caches and is a significant chunk of L2. Write
120
 *   SIMD that uses streaming writes to avoid loading all of that into L1, or
121
 *   just don't update the larger component costs every time this called
122
 *   (or both).
123
 */
124
void av1_build_nmv_component_cost_table(int *mvcost,
125
                                        const nmv_component *const mvcomp,
126
306k
                                        MvSubpelPrecision precision) {
127
306k
  int i, j, v, o, mantissa;
128
306k
  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
129
306k
  int bits_cost[MV_OFFSET_BITS][2];
130
306k
  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 },
131
306k
      fp_cost[MV_FP_SIZE] = { 0 };
132
306k
  int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 };
133
134
306k
  av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
135
306k
  av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
136
306k
  av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
137
3.36M
  for (i = 0; i < MV_OFFSET_BITS; ++i) {
138
3.05M
    av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
139
3.05M
  }
140
141
306k
  if (precision > MV_SUBPEL_NONE) {
142
893k
    for (i = 0; i < CLASS0_SIZE; ++i)
143
595k
      av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
144
595k
                               NULL);
145
297k
    av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
146
297k
  }
147
148
306k
  if (precision > MV_SUBPEL_LOW_PRECISION) {
149
222k
    av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
150
222k
    av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
151
222k
  }
152
153
  // Instead of accumulating the cost of each vector component's bits
154
  //   individually, compute the costs based on smaller vectors. Costs for
155
  //   [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1]
156
  //   respectively. Offsets are maintained to swap both 1) class costs when
157
  //   treated as a complete vector component with the highest set bit when
158
  //   treated as a mantissa (significand) and 2) leading zeros to account for
159
  //   the current exponent.
160
161
  // Cost offsets
162
306k
  int cost_swap[MV_OFFSET_BITS] = { 0 };
163
  // Delta to convert positive vector to negative vector costs
164
306k
  int negate_sign = sign_cost[1] - sign_cost[0];
165
166
  // Initialize with offsets to swap the class costs with the costs of the
167
  //   highest set bit.
168
3.06M
  for (i = 1; i < MV_OFFSET_BITS; ++i) {
169
2.75M
    cost_swap[i] = bits_cost[i - 1][1];
170
2.75M
    if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS];
171
2.75M
  }
172
173
  // Seed the fractional costs onto the output (overwritten latter).
174
1.53M
  for (o = 0; o < MV_FP_SIZE; ++o) {
175
1.22M
    int hp;
176
3.67M
    for (hp = 0; hp < 2; ++hp) {
177
2.44M
      v = 2 * o + hp + 1;
178
2.44M
      mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0];
179
2.44M
    }
180
1.22M
  }
181
182
306k
  mvcost[0] = 0;
183
  // Fill the costs for each exponent's vectors, using the costs set in the
184
  //   previous exponents.
185
3.36M
  for (i = 0; i < MV_OFFSET_BITS; ++i) {
186
3.06M
    const int exponent = (2 * MV_FP_SIZE) << i;
187
188
3.06M
    int class = 0;
189
3.06M
    if (i >= CLASS0_BITS) {
190
2.75M
      class = class_cost[i - CLASS0_BITS + 1];
191
2.75M
    }
192
193
    // Iterate through mantissas, keeping track of the location
194
    //   of the highest set bit for the mantissa.
195
    // To be clear: in the outer loop, the position of the highest set bit
196
    //   (exponent) is tracked and, in this loop, the highest set bit of the
197
    //   mantissa is tracked.
198
3.06M
    mantissa = 0;
199
19.8M
    for (j = 0; j <= i; ++j) {
200
2.42G
      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
201
2.40G
        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
202
2.40G
        v = exponent + mantissa + 1;
203
2.40G
        mvcost[v] = cost;
204
2.40G
        mvcost[-v] = cost + negate_sign;
205
2.40G
      }
206
16.7M
      cost_swap[j] += bits_cost[i][0];
207
16.7M
    }
208
3.06M
  }
209
210
  // Special case to avoid buffer overrun
211
306k
  {
212
306k
    int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS;
213
306k
    int class = class_cost[MV_CLASSES - 1];
214
306k
    mantissa = 0;
215
3.36M
    for (j = 0; j < MV_OFFSET_BITS; ++j) {
216
1.22G
      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
217
1.22G
        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
218
1.22G
        v = exponent + mantissa + 1;
219
1.22G
        mvcost[v] = cost;
220
1.22G
        mvcost[-v] = cost + negate_sign;
221
1.22G
      }
222
3.06M
    }
223
    // At this point: mantissa = exponent >> 1
224
225
    // Manually calculate the final cost offset
226
306k
    int cost_swap_hi =
227
306k
        bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2];
228
1.22G
    for (; mantissa < exponent - 1; ++mantissa) {
229
1.22G
      int cost = mvcost[mantissa + 1] + class + cost_swap_hi;
230
1.22G
      v = exponent + mantissa + 1;
231
1.22G
      mvcost[v] = cost;
232
1.22G
      mvcost[-v] = cost + negate_sign;
233
1.22G
    }
234
306k
  }
235
236
  // Fill costs for class0 vectors, overwriting previous placeholder values
237
  //   used for calculating the costs of the larger vectors.
238
919k
  for (i = 0; i < CLASS0_SIZE; ++i) {
239
613k
    const int top = i * 2 * MV_FP_SIZE;
240
3.06M
    for (o = 0; o < MV_FP_SIZE; ++o) {
241
2.45M
      int hp;
242
2.45M
      int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i];
243
7.35M
      for (hp = 0; hp < 2; ++hp) {
244
4.90M
        v = top + 2 * o + hp + 1;
245
4.90M
        mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0];
246
4.90M
        mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1];
247
4.90M
      }
248
2.45M
    }
249
613k
  }
250
306k
}
251
252
void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv,
253
286k
                   const MV *ref, nmv_context *mvctx, int usehp) {
254
286k
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
255
286k
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
256
  // If the mv_diff is zero, then we should have used near or nearest instead.
257
286k
  assert(j != MV_JOINT_ZERO);
258
286k
  if (cpi->common.features.cur_frame_force_integer_mv) {
259
0
    usehp = MV_SUBPEL_NONE;
260
0
  }
261
286k
  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
262
286k
  if (mv_joint_vertical(j))
263
237k
    encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
264
265
286k
  if (mv_joint_horizontal(j))
266
233k
    encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
267
268
  // If auto_mv_step_size is enabled then keep track of the largest
269
  // motion vector component used.
270
286k
  if (cpi->sf.mv_sf.auto_mv_step_size) {
271
286k
    int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
272
286k
    td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude);
273
286k
  }
274
286k
}
275
276
void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
277
1.08k
                   nmv_context *mvctx) {
278
  // DV and ref DV should not have sub-pel.
279
1.08k
  assert((mv->col & 7) == 0);
280
1.08k
  assert((mv->row & 7) == 0);
281
1.08k
  assert((ref->col & 7) == 0);
282
1.08k
  assert((ref->row & 7) == 0);
283
1.08k
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
284
1.08k
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
285
286
1.08k
  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
287
1.08k
  if (mv_joint_vertical(j))
288
709
    encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);
289
290
1.08k
  if (mv_joint_horizontal(j))
291
820
    encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
292
1.08k
}
293
294
void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
295
                              const nmv_context *ctx,
296
153k
                              MvSubpelPrecision precision) {
297
153k
  av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
298
153k
  av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
299
153k
  av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
300
153k
}
301
302
int_mv av1_get_ref_mv_from_stack(int ref_idx,
303
                                 const MV_REFERENCE_FRAME *ref_frame,
304
                                 int ref_mv_idx,
305
14.3M
                                 const MB_MODE_INFO_EXT *mbmi_ext) {
306
14.3M
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
307
14.3M
  const CANDIDATE_MV *curr_ref_mv_stack =
308
14.3M
      mbmi_ext->ref_mv_stack[ref_frame_type];
309
310
14.3M
  if (ref_frame[1] > INTRA_FRAME) {
311
0
    assert(ref_idx == 0 || ref_idx == 1);
312
0
    return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
313
0
                   : curr_ref_mv_stack[ref_mv_idx].this_mv;
314
0
  }
315
316
14.3M
  assert(ref_idx == 0);
317
14.3M
  return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]
318
14.3M
             ? curr_ref_mv_stack[ref_mv_idx].this_mv
319
14.3M
             : mbmi_ext->global_mvs[ref_frame_type];
320
14.3M
}
321
322
6.04M
int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
323
6.04M
  const MACROBLOCKD *xd = &x->e_mbd;
324
6.04M
  const MB_MODE_INFO *mbmi = xd->mi[0];
325
6.04M
  int ref_mv_idx = mbmi->ref_mv_idx;
326
6.04M
  if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
327
0
    assert(has_second_ref(mbmi));
328
0
    ref_mv_idx += 1;
329
0
  }
330
6.04M
  return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
331
6.04M
                                   &x->mbmi_ext);
332
6.04M
}
333
334
void av1_find_best_ref_mvs_from_stack(int allow_hp,
335
                                      const MB_MODE_INFO_EXT *mbmi_ext,
336
                                      MV_REFERENCE_FRAME ref_frame,
337
                                      int_mv *nearest_mv, int_mv *near_mv,
338
1.59M
                                      int is_integer) {
339
1.59M
  const int ref_idx = 0;
340
1.59M
  MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
341
1.59M
  *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
342
1.59M
  lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
343
1.59M
  *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
344
1.59M
  lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
345
1.59M
}