/src/libavif/ext/aom/av1/encoder/encodemv.c

Source (jump to first uncovered line)
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include <math.h>

#include "av1/common/common.h"
#include "av1/common/entropymode.h"

#include "av1/encoder/cost.h"
#include "av1/encoder/encodemv.h"

#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/bitops.h"

static void update_mv_component_stats(int comp, nmv_component *mvcomp,
                                      MvSubpelPrecision precision) {
  assert(comp != 0);
  int offset;
  const int sign = comp < 0;
  const int mag = sign ? -comp : comp;
  const int mv_class = av1_get_mv_class(mag - 1, &offset);
  const int d = offset >> 3;         // int mv data
  const int fr = (offset >> 1) & 3;  // fractional mv data
  const int hp = offset & 1;         // high precision mv data

  // Sign
  update_cdf(mvcomp->sign_cdf, sign, 2);

  // Class
  update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES);

  // Integer bits
  if (mv_class == MV_CLASS_0) {
    update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE);
  } else {
    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
    for (int i = 0; i < n; ++i)
      update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2);
  }
  // Fractional bits
  if (precision > MV_SUBPEL_NONE) {
    aom_cdf_prob *fp_cdf =
        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf;
    update_cdf(fp_cdf, fr, MV_FP_SIZE);
  }

  // High precision bit
  if (precision > MV_SUBPEL_LOW_PRECISION) {
    aom_cdf_prob *hp_cdf =
        mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf;
    update_cdf(hp_cdf, hp, 2);
  }
}

void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx,
                         MvSubpelPrecision precision) {
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);

  update_cdf(mvctx->joints_cdf, j, MV_JOINTS);

  if (mv_joint_vertical(j))
    update_mv_component_stats(diff.row, &mvctx->comps[0], precision);

  if (mv_joint_horizontal(j))
    update_mv_component_stats(diff.col, &mvctx->comps[1], precision);
}

static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
                                MvSubpelPrecision precision) {
  assert(comp != 0);
  int offset;
  const int sign = comp < 0;
  const int mag = sign ? -comp : comp;
  const int mv_class = av1_get_mv_class(mag - 1, &offset);
  const int d = offset >> 3;         // int mv data
  const int fr = (offset >> 1) & 3;  // fractional mv data
  const int hp = offset & 1;         // high precision mv data

  // Sign
  aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);

  // Class
  aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);

  // Integer bits
  if (mv_class == MV_CLASS_0) {
    aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
  } else {
    int i;
    const int n = mv_class + CLASS0_BITS - 1;  // number of bits
    for (i = 0; i < n; ++i)
      aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
  }
  // Fractional bits
  if (precision > MV_SUBPEL_NONE) {
    aom_write_symbol(
        w, fr,
        mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
        MV_FP_SIZE);
  }

  // High precision bit
  if (precision > MV_SUBPEL_LOW_PRECISION)
    aom_write_symbol(
        w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
        2);
}

/* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This
 *   is more than most L1D caches and is a significant chunk of L2. Write
 *   SIMD that uses streaming writes to avoid loading all of that into L1, or
 *   just don't update the larger component costs every time this called
 *   (or both).
 */
void av1_build_nmv_component_cost_table(int *mvcost,
                                        const nmv_component *const mvcomp,
                                        MvSubpelPrecision precision) {
  int i, j, v, o, mantissa;
  int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
  int bits_cost[MV_OFFSET_BITS][2];
  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 },
      fp_cost[MV_FP_SIZE] = { 0 };
  int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 };

  av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
  av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
  av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
  for (i = 0; i < MV_OFFSET_BITS; ++i) {
    av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
  }

  if (precision > MV_SUBPEL_NONE) {
    for (i = 0; i < CLASS0_SIZE; ++i)
      av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
                               NULL);
    av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
  }

  if (precision > MV_SUBPEL_LOW_PRECISION) {
    av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
    av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
  }

  // Instead of accumulating the cost of each vector component's bits
  //   individually, compute the costs based on smaller vectors. Costs for
  //   [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1]
  //   respectively. Offsets are maintained to swap both 1) class costs when
  //   treated as a complete vector component with the highest set bit when
  //   treated as a mantissa (significand) and 2) leading zeros to account for
  //   the current exponent.

  // Cost offsets
  int cost_swap[MV_OFFSET_BITS] = { 0 };
  // Delta to convert positive vector to negative vector costs
  int negate_sign = sign_cost[1] - sign_cost[0];

  // Initialize with offsets to swap the class costs with the costs of the
  //   highest set bit.
  for (i = 1; i < MV_OFFSET_BITS; ++i) {
    cost_swap[i] = bits_cost[i - 1][1];
    if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS];
  }

  // Seed the fractional costs onto the output (overwritten latter).
  for (o = 0; o < MV_FP_SIZE; ++o) {
    int hp;
    for (hp = 0; hp < 2; ++hp) {
      v = 2 * o + hp + 1;
      mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0];
    }
  }

  mvcost[0] = 0;
  // Fill the costs for each exponent's vectors, using the costs set in the
  //   previous exponents.
  for (i = 0; i < MV_OFFSET_BITS; ++i) {
    const int exponent = (2 * MV_FP_SIZE) << i;

    int class = 0;
    if (i >= CLASS0_BITS) {
      class = class_cost[i - CLASS0_BITS + 1];
    }

    // Iterate through mantissas, keeping track of the location
    //   of the highest set bit for the mantissa.
    // To be clear: in the outer loop, the position of the highest set bit
    //   (exponent) is tracked and, in this loop, the highest set bit of the
    //   mantissa is tracked.
    mantissa = 0;
    for (j = 0; j <= i; ++j) {
      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
        v = exponent + mantissa + 1;
        mvcost[v] = cost;
        mvcost[-v] = cost + negate_sign;
      }
      cost_swap[j] += bits_cost[i][0];
    }
  }

  // Special case to avoid buffer overrun
  {
    int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS;
    int class = class_cost[MV_CLASSES - 1];
    mantissa = 0;
    for (j = 0; j < MV_OFFSET_BITS; ++j) {
      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
        v = exponent + mantissa + 1;
        mvcost[v] = cost;
        mvcost[-v] = cost + negate_sign;
      }
    }
    // At this point: mantissa = exponent >> 1

    // Manually calculate the final cost offset
    int cost_swap_hi =
        bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2];
    for (; mantissa < exponent - 1; ++mantissa) {
      int cost = mvcost[mantissa + 1] + class + cost_swap_hi;
      v = exponent + mantissa + 1;
      mvcost[v] = cost;
      mvcost[-v] = cost + negate_sign;
    }
  }

  // Fill costs for class0 vectors, overwriting previous placeholder values
  //   used for calculating the costs of the larger vectors.
  for (i = 0; i < CLASS0_SIZE; ++i) {
    const int top = i * 2 * MV_FP_SIZE;
    for (o = 0; o < MV_FP_SIZE; ++o) {
      int hp;
      int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i];
      for (hp = 0; hp < 2; ++hp) {
        v = top + 2 * o + hp + 1;
        mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0];
        mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1];
      }
    }
  }
}

void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv,
                   const MV *ref, nmv_context *mvctx, int usehp) {
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
  // If the mv_diff is zero, then we should have used near or nearest instead.
  assert(j != MV_JOINT_ZERO);
  if (cpi->common.features.cur_frame_force_integer_mv) {
    usehp = MV_SUBPEL_NONE;
  }
  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
  if (mv_joint_vertical(j))
    encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);

  if (mv_joint_horizontal(j))
    encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);

  // If auto_mv_step_size is enabled then keep track of the largest
  // motion vector component used.
  if (cpi->sf.mv_sf.auto_mv_step_size) {
    int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
    td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude);
  }
}

void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
                   nmv_context *mvctx) {
  // DV and ref DV should not have sub-pel.
  assert((mv->col & 7) == 0);
  assert((mv->row & 7) == 0);
  assert((ref->col & 7) == 0);
  assert((ref->row & 7) == 0);
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
  const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);

  aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
  if (mv_joint_vertical(j))
    encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);

  if (mv_joint_horizontal(j))
    encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
}

void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
                              const nmv_context *ctx,
                              MvSubpelPrecision precision) {
  av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
  av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
  av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
}

int_mv av1_get_ref_mv_from_stack(int ref_idx,
                                 const MV_REFERENCE_FRAME *ref_frame,
                                 int ref_mv_idx,
                                 const MB_MODE_INFO_EXT *mbmi_ext) {
  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
  const CANDIDATE_MV *curr_ref_mv_stack =
      mbmi_ext->ref_mv_stack[ref_frame_type];

  if (ref_frame[1] > INTRA_FRAME) {
    assert(ref_idx == 0 || ref_idx == 1);
    return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
                   : curr_ref_mv_stack[ref_mv_idx].this_mv;
  }

  assert(ref_idx == 0);
  return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]
             ? curr_ref_mv_stack[ref_mv_idx].this_mv
             : mbmi_ext->global_mvs[ref_frame_type];
}

int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
  const MACROBLOCKD *xd = &x->e_mbd;
  const MB_MODE_INFO *mbmi = xd->mi[0];
  int ref_mv_idx = mbmi->ref_mv_idx;
  if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
    assert(has_second_ref(mbmi));
    ref_mv_idx += 1;
  }
  return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
                                   &x->mbmi_ext);
}

void av1_find_best_ref_mvs_from_stack(int allow_hp,
                                      const MB_MODE_INFO_EXT *mbmi_ext,
                                      MV_REFERENCE_FRAME ref_frame,
                                      int_mv *nearest_mv, int_mv *near_mv,
                                      int is_integer) {
  const int ref_idx = 0;
  MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
  *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
  lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
  *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
  lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
}

Coverage Report

Created: 2025-07-12 06:45

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3		*
4		* This source code is subject to the terms of the BSD 2 Clause License and
5		* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6		* was not distributed with this source code in the LICENSE file, you can
7		* obtain it at www.aomedia.org/license/software. If the Alliance for Open
8		* Media Patent License 1.0 was not distributed with this source code in the
9		* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10		*/
11
12		#include <math.h>
13
14		#include "av1/common/common.h"
15		#include "av1/common/entropymode.h"
16
17		#include "av1/encoder/cost.h"
18		#include "av1/encoder/encodemv.h"
19
20		#include "aom_dsp/aom_dsp_common.h"
21		#include "aom_ports/bitops.h"
22
23		static void update_mv_component_stats(int comp, nmv_component *mvcomp,
24	484k	MvSubpelPrecision precision) {
25	484k	assert(comp != 0);
26	484k	int offset;
27	484k	const int sign = comp < 0;
28	484k	const int mag = sign ? -comp : comp;
29	484k	const int mv_class = av1_get_mv_class(mag - 1, &offset);
30	484k	const int d = offset >> 3; // int mv data
31	484k	const int fr = (offset >> 1) & 3; // fractional mv data
32	484k	const int hp = offset & 1; // high precision mv data
33
34		// Sign
35	484k	update_cdf(mvcomp->sign_cdf, sign, 2);
36
37		// Class
38	484k	update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES);
39
40		// Integer bits
41	484k	if (mv_class == MV_CLASS_0) {
42	293k	update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE);
43	293k	} else {
44	191k	const int n = mv_class + CLASS0_BITS - 1; // number of bits
45	643k	for (int i = 0; i < n; ++i)
46	451k	update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2);
47	191k	}
48		// Fractional bits
49	484k	if (precision > MV_SUBPEL_NONE) {
50	484k	aom_cdf_prob *fp_cdf =
51	484k	mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf;
52	484k	update_cdf(fp_cdf, fr, MV_FP_SIZE);
53	484k	}
54
55		// High precision bit
56	484k	if (precision > MV_SUBPEL_LOW_PRECISION) {
57	269k	aom_cdf_prob *hp_cdf =
58	269k	mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf;
59	269k	update_cdf(hp_cdf, hp, 2);
60	269k	}
61	484k	}
62
63		void av1_update_mv_stats(const MV mv, const MV ref, nmv_context *mvctx,
64	298k	MvSubpelPrecision precision) {
65	298k	const MV diff = { mv->row - ref->row, mv->col - ref->col };
66	298k	const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
67
68	298k	update_cdf(mvctx->joints_cdf, j, MV_JOINTS);
69
70	298k	if (mv_joint_vertical(j))
71	244k	update_mv_component_stats(diff.row, &mvctx->comps[0], precision);
72
73	298k	if (mv_joint_horizontal(j))
74	240k	update_mv_component_stats(diff.col, &mvctx->comps[1], precision);
75	298k	}
76
77		static void encode_mv_component(aom_writer w, int comp, nmv_component mvcomp,
78	484k	MvSubpelPrecision precision) {
79	484k	assert(comp != 0);
80	484k	int offset;
81	484k	const int sign = comp < 0;
82	484k	const int mag = sign ? -comp : comp;
83	484k	const int mv_class = av1_get_mv_class(mag - 1, &offset);
84	484k	const int d = offset >> 3; // int mv data
85	484k	const int fr = (offset >> 1) & 3; // fractional mv data
86	484k	const int hp = offset & 1; // high precision mv data
87
88		// Sign
89	484k	aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);
90
91		// Class
92	484k	aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);
93
94		// Integer bits
95	484k	if (mv_class == MV_CLASS_0) {
96	293k	aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
97	293k	} else {
98	191k	int i;
99	191k	const int n = mv_class + CLASS0_BITS - 1; // number of bits
100	642k	for (i = 0; i < n; ++i)
101	451k	aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
102	191k	}
103		// Fractional bits
104	484k	if (precision > MV_SUBPEL_NONE) {
105	484k	aom_write_symbol(
106	484k	w, fr,
107	484k	mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
108	484k	MV_FP_SIZE);
109	484k	}
110
111		// High precision bit
112	484k	if (precision > MV_SUBPEL_LOW_PRECISION)
113	269k	aom_write_symbol(
114	269k	w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
115	269k	2);
116	484k	}
117
118		/* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This
119		* is more than most L1D caches and is a significant chunk of L2. Write
120		* SIMD that uses streaming writes to avoid loading all of that into L1, or
121		* just don't update the larger component costs every time this called
122		* (or both).
123		*/
124		void av1_build_nmv_component_cost_table(int *mvcost,
125		const nmv_component *const mvcomp,
126	326k	MvSubpelPrecision precision) {
127	326k	int i, j, v, o, mantissa;
128	326k	int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
129	326k	int bits_cost[MV_OFFSET_BITS][2];
130	326k	int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 },
131	326k	fp_cost[MV_FP_SIZE] = { 0 };
132	326k	int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 };
133
134	326k	av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
135	326k	av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
136	326k	av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
137	3.58M	for (i = 0; i < MV_OFFSET_BITS; ++i) {
138	3.26M	av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
139	3.26M	}
140
141	326k	if (precision > MV_SUBPEL_NONE) {
142	979k	for (i = 0; i < CLASS0_SIZE; ++i)
143	652k	av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
144	652k	NULL);
145	326k	av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
146	326k	}
147
148	326k	if (precision > MV_SUBPEL_LOW_PRECISION) {
149	244k	av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
150	244k	av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
151	244k	}
152
153		// Instead of accumulating the cost of each vector component's bits
154		// individually, compute the costs based on smaller vectors. Costs for
155		// [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1]
156		// respectively. Offsets are maintained to swap both 1) class costs when
157		// treated as a complete vector component with the highest set bit when
158		// treated as a mantissa (significand) and 2) leading zeros to account for
159		// the current exponent.
160
161		// Cost offsets
162	326k	int cost_swap[MV_OFFSET_BITS] = { 0 };
163		// Delta to convert positive vector to negative vector costs
164	326k	int negate_sign = sign_cost[1] - sign_cost[0];
165
166		// Initialize with offsets to swap the class costs with the costs of the
167		// highest set bit.
168	3.26M	for (i = 1; i < MV_OFFSET_BITS; ++i) {
169	2.93M	cost_swap[i] = bits_cost[i - 1][1];
170	2.93M	if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS];
171	2.93M	}
172
173		// Seed the fractional costs onto the output (overwritten latter).
174	1.63M	for (o = 0; o < MV_FP_SIZE; ++o) {
175	1.30M	int hp;
176	3.91M	for (hp = 0; hp < 2; ++hp) {
177	2.60M	v = 2 * o + hp + 1;
178	2.60M	mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0];
179	2.60M	}
180	1.30M	}
181
182	326k	mvcost[0] = 0;
183		// Fill the costs for each exponent's vectors, using the costs set in the
184		// previous exponents.
185	3.58M	for (i = 0; i < MV_OFFSET_BITS; ++i) {
186	3.26M	const int exponent = (2 * MV_FP_SIZE) << i;
187
188	3.26M	int class = 0;
189	3.26M	if (i >= CLASS0_BITS) {
190	2.93M	class = class_cost[i - CLASS0_BITS + 1];
191	2.93M	}
192
193		// Iterate through mantissas, keeping track of the location
194		// of the highest set bit for the mantissa.
195		// To be clear: in the outer loop, the position of the highest set bit
196		// (exponent) is tracked and, in this loop, the highest set bit of the
197		// mantissa is tracked.
198	3.26M	mantissa = 0;
199	21.1M	for (j = 0; j <= i; ++j) {
200	2.59G	for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
201	2.57G	int cost = mvcost[mantissa + 1] + class + cost_swap[j];
202	2.57G	v = exponent + mantissa + 1;
203	2.57G	mvcost[v] = cost;
204	2.57G	mvcost[-v] = cost + negate_sign;
205	2.57G	}
206	17.8M	cost_swap[j] += bits_cost[i][0];
207	17.8M	}
208	3.26M	}
209
210		// Special case to avoid buffer overrun
211	326k	{
212	326k	int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS;
213	326k	int class = class_cost[MV_CLASSES - 1];
214	326k	mantissa = 0;
215	3.58M	for (j = 0; j < MV_OFFSET_BITS; ++j) {
216	1.31G	for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
217	1.31G	int cost = mvcost[mantissa + 1] + class + cost_swap[j];
218	1.31G	v = exponent + mantissa + 1;
219	1.31G	mvcost[v] = cost;
220	1.31G	mvcost[-v] = cost + negate_sign;
221	1.31G	}
222	3.26M	}
223		// At this point: mantissa = exponent >> 1
224
225		// Manually calculate the final cost offset
226	326k	int cost_swap_hi =
227	326k	bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2];
228	1.31G	for (; mantissa < exponent - 1; ++mantissa) {
229	1.31G	int cost = mvcost[mantissa + 1] + class + cost_swap_hi;
230	1.31G	v = exponent + mantissa + 1;
231	1.31G	mvcost[v] = cost;
232	1.31G	mvcost[-v] = cost + negate_sign;
233	1.31G	}
234	326k	}
235
236		// Fill costs for class0 vectors, overwriting previous placeholder values
237		// used for calculating the costs of the larger vectors.
238	979k	for (i = 0; i < CLASS0_SIZE; ++i) {
239	652k	const int top = i * 2 * MV_FP_SIZE;
240	3.26M	for (o = 0; o < MV_FP_SIZE; ++o) {
241	2.61M	int hp;
242	2.61M	int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i];
243	7.83M	for (hp = 0; hp < 2; ++hp) {
244	5.22M	v = top + 2 * o + hp + 1;
245	5.22M	mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0];
246	5.22M	mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1];
247	5.22M	}
248	2.61M	}
249	652k	}
250	326k	}
251
252		void av1_encode_mv(AV1_COMP cpi, aom_writer w, ThreadData td, const MV mv,
253	297k	const MV ref, nmv_context mvctx, int usehp) {
254	297k	const MV diff = { mv->row - ref->row, mv->col - ref->col };
255	297k	const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
256		// If the mv_diff is zero, then we should have used near or nearest instead.
257	297k	assert(j != MV_JOINT_ZERO);
258	297k	if (cpi->common.features.cur_frame_force_integer_mv) {
259	0	usehp = MV_SUBPEL_NONE;
260	0	}
261	297k	aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
262	297k	if (mv_joint_vertical(j))
263	244k	encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
264
265	297k	if (mv_joint_horizontal(j))
266	240k	encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
267
268		// If auto_mv_step_size is enabled then keep track of the largest
269		// motion vector component used.
270	298k	if (cpi->sf.mv_sf.auto_mv_step_size) {
271	298k	int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
272	298k	td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude);
273	298k	}
274	297k	}
275
276		void av1_encode_dv(aom_writer w, const MV mv, const MV *ref,
277	0	nmv_context *mvctx) {
278		// DV and ref DV should not have sub-pel.
279	0	assert((mv->col & 7) == 0);
280	0	assert((mv->row & 7) == 0);
281	0	assert((ref->col & 7) == 0);
282	0	assert((ref->row & 7) == 0);
283	0	const MV diff = { mv->row - ref->row, mv->col - ref->col };
284	0	const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
285
286	0	aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
287	0	if (mv_joint_vertical(j))
288	0	encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);
289
290	0	if (mv_joint_horizontal(j))
291	0	encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
292	0	}
293
294		void av1_build_nmv_cost_table(int mvjoint, int mvcost[2],
295		const nmv_context *ctx,
296	163k	MvSubpelPrecision precision) {
297	163k	av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
298	163k	av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
299	163k	av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
300	163k	}
301
302		int_mv av1_get_ref_mv_from_stack(int ref_idx,
303		const MV_REFERENCE_FRAME *ref_frame,
304		int ref_mv_idx,
305	12.7M	const MB_MODE_INFO_EXT *mbmi_ext) {
306	12.7M	const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
307	12.7M	const CANDIDATE_MV *curr_ref_mv_stack =
308	12.7M	mbmi_ext->ref_mv_stack[ref_frame_type];
309
310	12.7M	if (ref_frame[1] > INTRA_FRAME) {
311	0	assert(ref_idx == 0 \|\| ref_idx == 1);
312	0	return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
313	0	: curr_ref_mv_stack[ref_mv_idx].this_mv;
314	0	}
315
316	12.7M	assert(ref_idx == 0);
317	12.7M	return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]
318	12.7M	? curr_ref_mv_stack[ref_mv_idx].this_mv
319	12.7M	: mbmi_ext->global_mvs[ref_frame_type];
320	12.7M	}
321
322	6.03M	int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
323	6.03M	const MACROBLOCKD *xd = &x->e_mbd;
324	6.03M	const MB_MODE_INFO *mbmi = xd->mi[0];
325	6.03M	int ref_mv_idx = mbmi->ref_mv_idx;
326	6.03M	if (mbmi->mode == NEAR_NEWMV \|\| mbmi->mode == NEW_NEARMV) {
327	0	assert(has_second_ref(mbmi));
328	0	ref_mv_idx += 1;
329	0	}
330	6.03M	return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
331	6.03M	&x->mbmi_ext);
332	6.03M	}
333
334		void av1_find_best_ref_mvs_from_stack(int allow_hp,
335		const MB_MODE_INFO_EXT *mbmi_ext,
336		MV_REFERENCE_FRAME ref_frame,
337		int_mv nearest_mv, int_mv near_mv,
338	773k	int is_integer) {
339	773k	const int ref_idx = 0;
340	773k	MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
341	773k	*nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
342	773k	lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
343	773k	*near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
344	773k	lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
345	773k	}