/src/libavif/ext/aom/av1/encoder/encodemv.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <math.h> |
13 | | |
14 | | #include "av1/common/common.h" |
15 | | #include "av1/common/entropymode.h" |
16 | | |
17 | | #include "av1/encoder/cost.h" |
18 | | #include "av1/encoder/encodemv.h" |
19 | | |
20 | | #include "aom_dsp/aom_dsp_common.h" |
21 | | #include "aom_ports/bitops.h" |
22 | | |
23 | | static void update_mv_component_stats(int comp, nmv_component *mvcomp, |
24 | 486k | MvSubpelPrecision precision) { |
25 | 486k | assert(comp != 0); |
26 | 486k | int offset; |
27 | 486k | const int sign = comp < 0; |
28 | 486k | const int mag = sign ? -comp : comp; |
29 | 486k | const int mv_class = av1_get_mv_class(mag - 1, &offset); |
30 | 486k | const int d = offset >> 3; // int mv data |
31 | 486k | const int fr = (offset >> 1) & 3; // fractional mv data |
32 | 486k | const int hp = offset & 1; // high precision mv data |
33 | | |
34 | | // Sign |
35 | 486k | update_cdf(mvcomp->sign_cdf, sign, 2); |
36 | | |
37 | | // Class |
38 | 486k | update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES); |
39 | | |
40 | | // Integer bits |
41 | 486k | if (mv_class == MV_CLASS_0) { |
42 | 293k | update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE); |
43 | 293k | } else { |
44 | 192k | const int n = mv_class + CLASS0_BITS - 1; // number of bits |
45 | 649k | for (int i = 0; i < n; ++i) |
46 | 456k | update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2); |
47 | 192k | } |
48 | | // Fractional bits |
49 | 486k | if (precision > MV_SUBPEL_NONE) { |
50 | 486k | aom_cdf_prob *fp_cdf = |
51 | 486k | mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf; |
52 | 486k | update_cdf(fp_cdf, fr, MV_FP_SIZE); |
53 | 486k | } |
54 | | |
55 | | // High precision bit |
56 | 486k | if (precision > MV_SUBPEL_LOW_PRECISION) { |
57 | 271k | aom_cdf_prob *hp_cdf = |
58 | 271k | mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf; |
59 | 271k | update_cdf(hp_cdf, hp, 2); |
60 | 271k | } |
61 | 486k | } |
62 | | |
63 | | void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx, |
64 | 298k | MvSubpelPrecision precision) { |
65 | 298k | const MV diff = { mv->row - ref->row, mv->col - ref->col }; |
66 | 298k | const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); |
67 | | |
68 | 298k | update_cdf(mvctx->joints_cdf, j, MV_JOINTS); |
69 | | |
70 | 298k | if (mv_joint_vertical(j)) |
71 | 245k | update_mv_component_stats(diff.row, &mvctx->comps[0], precision); |
72 | | |
73 | 298k | if (mv_joint_horizontal(j)) |
74 | 241k | update_mv_component_stats(diff.col, &mvctx->comps[1], precision); |
75 | 298k | } |
76 | | |
77 | | static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, |
78 | 486k | MvSubpelPrecision precision) { |
79 | 486k | assert(comp != 0); |
80 | 486k | int offset; |
81 | 486k | const int sign = comp < 0; |
82 | 486k | const int mag = sign ? -comp : comp; |
83 | 486k | const int mv_class = av1_get_mv_class(mag - 1, &offset); |
84 | 486k | const int d = offset >> 3; // int mv data |
85 | 486k | const int fr = (offset >> 1) & 3; // fractional mv data |
86 | 486k | const int hp = offset & 1; // high precision mv data |
87 | | |
88 | | // Sign |
89 | 486k | aom_write_symbol(w, sign, mvcomp->sign_cdf, 2); |
90 | | |
91 | | // Class |
92 | 486k | aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES); |
93 | | |
94 | | // Integer bits |
95 | 486k | if (mv_class == MV_CLASS_0) { |
96 | 293k | aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE); |
97 | 293k | } else { |
98 | 192k | int i; |
99 | 192k | const int n = mv_class + CLASS0_BITS - 1; // number of bits |
100 | 649k | for (i = 0; i < n; ++i) |
101 | 456k | aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2); |
102 | 192k | } |
103 | | // Fractional bits |
104 | 486k | if (precision > MV_SUBPEL_NONE) { |
105 | 486k | aom_write_symbol( |
106 | 486k | w, fr, |
107 | 486k | mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf, |
108 | 486k | MV_FP_SIZE); |
109 | 486k | } |
110 | | |
111 | | // High precision bit |
112 | 486k | if (precision > MV_SUBPEL_LOW_PRECISION) |
113 | 271k | aom_write_symbol( |
114 | 271k | w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, |
115 | 271k | 2); |
116 | 486k | } |
117 | | |
118 | | /* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This |
119 | | * is more than most L1D caches and is a significant chunk of L2. Write |
120 | | * SIMD that uses streaming writes to avoid loading all of that into L1, or |
121 | | * just don't update the larger component costs every time this called |
122 | | * (or both). |
123 | | */ |
124 | | void av1_build_nmv_component_cost_table(int *mvcost, |
125 | | const nmv_component *const mvcomp, |
126 | 324k | MvSubpelPrecision precision) { |
127 | 324k | int i, j, v, o, mantissa; |
128 | 324k | int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; |
129 | 324k | int bits_cost[MV_OFFSET_BITS][2]; |
130 | 324k | int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 }, |
131 | 324k | fp_cost[MV_FP_SIZE] = { 0 }; |
132 | 324k | int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 }; |
133 | | |
134 | 324k | av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL); |
135 | 324k | av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL); |
136 | 324k | av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL); |
137 | 3.56M | for (i = 0; i < MV_OFFSET_BITS; ++i) { |
138 | 3.24M | av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL); |
139 | 3.24M | } |
140 | | |
141 | 324k | if (precision > MV_SUBPEL_NONE) { |
142 | 972k | for (i = 0; i < CLASS0_SIZE; ++i) |
143 | 648k | av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], |
144 | 648k | NULL); |
145 | 324k | av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL); |
146 | 324k | } |
147 | | |
148 | 324k | if (precision > MV_SUBPEL_LOW_PRECISION) { |
149 | 243k | av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL); |
150 | 243k | av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL); |
151 | 243k | } |
152 | | |
153 | | // Instead of accumulating the cost of each vector component's bits |
154 | | // individually, compute the costs based on smaller vectors. Costs for |
155 | | // [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1] |
156 | | // respectively. Offsets are maintained to swap both 1) class costs when |
157 | | // treated as a complete vector component with the highest set bit when |
158 | | // treated as a mantissa (significand) and 2) leading zeros to account for |
159 | | // the current exponent. |
160 | | |
161 | | // Cost offsets |
162 | 324k | int cost_swap[MV_OFFSET_BITS] = { 0 }; |
163 | | // Delta to convert positive vector to negative vector costs |
164 | 324k | int negate_sign = sign_cost[1] - sign_cost[0]; |
165 | | |
166 | | // Initialize with offsets to swap the class costs with the costs of the |
167 | | // highest set bit. |
168 | 3.24M | for (i = 1; i < MV_OFFSET_BITS; ++i) { |
169 | 2.91M | cost_swap[i] = bits_cost[i - 1][1]; |
170 | 2.91M | if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS]; |
171 | 2.91M | } |
172 | | |
173 | | // Seed the fractional costs onto the output (overwritten latter). |
174 | 1.62M | for (o = 0; o < MV_FP_SIZE; ++o) { |
175 | 1.29M | int hp; |
176 | 3.88M | for (hp = 0; hp < 2; ++hp) { |
177 | 2.59M | v = 2 * o + hp + 1; |
178 | 2.59M | mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0]; |
179 | 2.59M | } |
180 | 1.29M | } |
181 | | |
182 | 324k | mvcost[0] = 0; |
183 | | // Fill the costs for each exponent's vectors, using the costs set in the |
184 | | // previous exponents. |
185 | 3.56M | for (i = 0; i < MV_OFFSET_BITS; ++i) { |
186 | 3.24M | const int exponent = (2 * MV_FP_SIZE) << i; |
187 | | |
188 | 3.24M | int class = 0; |
189 | 3.24M | if (i >= CLASS0_BITS) { |
190 | 2.91M | class = class_cost[i - CLASS0_BITS + 1]; |
191 | 2.91M | } |
192 | | |
193 | | // Iterate through mantissas, keeping track of the location |
194 | | // of the highest set bit for the mantissa. |
195 | | // To be clear: in the outer loop, the position of the highest set bit |
196 | | // (exponent) is tracked and, in this loop, the highest set bit of the |
197 | | // mantissa is tracked. |
198 | 3.24M | mantissa = 0; |
199 | 21.0M | for (j = 0; j <= i; ++j) { |
200 | 2.57G | for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) { |
201 | 2.55G | int cost = mvcost[mantissa + 1] + class + cost_swap[j]; |
202 | 2.55G | v = exponent + mantissa + 1; |
203 | 2.55G | mvcost[v] = cost; |
204 | 2.55G | mvcost[-v] = cost + negate_sign; |
205 | 2.55G | } |
206 | 17.7M | cost_swap[j] += bits_cost[i][0]; |
207 | 17.7M | } |
208 | 3.24M | } |
209 | | |
210 | | // Special case to avoid buffer overrun |
211 | 324k | { |
212 | 324k | int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS; |
213 | 324k | int class = class_cost[MV_CLASSES - 1]; |
214 | 324k | mantissa = 0; |
215 | 3.56M | for (j = 0; j < MV_OFFSET_BITS; ++j) { |
216 | 1.30G | for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) { |
217 | 1.30G | int cost = mvcost[mantissa + 1] + class + cost_swap[j]; |
218 | 1.30G | v = exponent + mantissa + 1; |
219 | 1.30G | mvcost[v] = cost; |
220 | 1.30G | mvcost[-v] = cost + negate_sign; |
221 | 1.30G | } |
222 | 3.24M | } |
223 | | // At this point: mantissa = exponent >> 1 |
224 | | |
225 | | // Manually calculate the final cost offset |
226 | 324k | int cost_swap_hi = |
227 | 324k | bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2]; |
228 | 1.30G | for (; mantissa < exponent - 1; ++mantissa) { |
229 | 1.30G | int cost = mvcost[mantissa + 1] + class + cost_swap_hi; |
230 | 1.30G | v = exponent + mantissa + 1; |
231 | 1.30G | mvcost[v] = cost; |
232 | 1.30G | mvcost[-v] = cost + negate_sign; |
233 | 1.30G | } |
234 | 324k | } |
235 | | |
236 | | // Fill costs for class0 vectors, overwriting previous placeholder values |
237 | | // used for calculating the costs of the larger vectors. |
238 | 973k | for (i = 0; i < CLASS0_SIZE; ++i) { |
239 | 648k | const int top = i * 2 * MV_FP_SIZE; |
240 | 3.24M | for (o = 0; o < MV_FP_SIZE; ++o) { |
241 | 2.59M | int hp; |
242 | 2.59M | int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i]; |
243 | 7.78M | for (hp = 0; hp < 2; ++hp) { |
244 | 5.18M | v = top + 2 * o + hp + 1; |
245 | 5.18M | mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0]; |
246 | 5.18M | mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1]; |
247 | 5.18M | } |
248 | 2.59M | } |
249 | 648k | } |
250 | 324k | } |
251 | | |
252 | | void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv, |
253 | 298k | const MV *ref, nmv_context *mvctx, int usehp) { |
254 | 298k | const MV diff = { mv->row - ref->row, mv->col - ref->col }; |
255 | 298k | const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); |
256 | | // If the mv_diff is zero, then we should have used near or nearest instead. |
257 | 298k | assert(j != MV_JOINT_ZERO); |
258 | 298k | if (cpi->common.features.cur_frame_force_integer_mv) { |
259 | 0 | usehp = MV_SUBPEL_NONE; |
260 | 0 | } |
261 | 298k | aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS); |
262 | 298k | if (mv_joint_vertical(j)) |
263 | 245k | encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); |
264 | | |
265 | 298k | if (mv_joint_horizontal(j)) |
266 | 241k | encode_mv_component(w, diff.col, &mvctx->comps[1], usehp); |
267 | | |
268 | | // If auto_mv_step_size is enabled then keep track of the largest |
269 | | // motion vector component used. |
270 | 298k | if (cpi->sf.mv_sf.auto_mv_step_size) { |
271 | 298k | int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3; |
272 | 298k | td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude); |
273 | 298k | } |
274 | 298k | } |
275 | | |
276 | | void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref, |
277 | 0 | nmv_context *mvctx) { |
278 | | // DV and ref DV should not have sub-pel. |
279 | 0 | assert((mv->col & 7) == 0); |
280 | 0 | assert((mv->row & 7) == 0); |
281 | 0 | assert((ref->col & 7) == 0); |
282 | 0 | assert((ref->row & 7) == 0); |
283 | 0 | const MV diff = { mv->row - ref->row, mv->col - ref->col }; |
284 | 0 | const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); |
285 | |
|
286 | 0 | aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS); |
287 | 0 | if (mv_joint_vertical(j)) |
288 | 0 | encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE); |
289 | |
|
290 | 0 | if (mv_joint_horizontal(j)) |
291 | 0 | encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE); |
292 | 0 | } |
293 | | |
294 | | void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], |
295 | | const nmv_context *ctx, |
296 | 161k | MvSubpelPrecision precision) { |
297 | 161k | av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL); |
298 | 161k | av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision); |
299 | 161k | av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision); |
300 | 161k | } |
301 | | |
302 | | int_mv av1_get_ref_mv_from_stack(int ref_idx, |
303 | | const MV_REFERENCE_FRAME *ref_frame, |
304 | | int ref_mv_idx, |
305 | 12.8M | const MB_MODE_INFO_EXT *mbmi_ext) { |
306 | 12.8M | const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); |
307 | 12.8M | const CANDIDATE_MV *curr_ref_mv_stack = |
308 | 12.8M | mbmi_ext->ref_mv_stack[ref_frame_type]; |
309 | | |
310 | 12.8M | if (ref_frame[1] > INTRA_FRAME) { |
311 | 0 | assert(ref_idx == 0 || ref_idx == 1); |
312 | 0 | return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv |
313 | 0 | : curr_ref_mv_stack[ref_mv_idx].this_mv; |
314 | 0 | } |
315 | | |
316 | 12.8M | assert(ref_idx == 0); |
317 | 12.8M | return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type] |
318 | 12.8M | ? curr_ref_mv_stack[ref_mv_idx].this_mv |
319 | 12.8M | : mbmi_ext->global_mvs[ref_frame_type]; |
320 | 12.8M | } |
321 | | |
322 | 6.10M | int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) { |
323 | 6.10M | const MACROBLOCKD *xd = &x->e_mbd; |
324 | 6.10M | const MB_MODE_INFO *mbmi = xd->mi[0]; |
325 | 6.10M | int ref_mv_idx = mbmi->ref_mv_idx; |
326 | 6.10M | if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) { |
327 | 0 | assert(has_second_ref(mbmi)); |
328 | 0 | ref_mv_idx += 1; |
329 | 0 | } |
330 | 6.10M | return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx, |
331 | 6.10M | &x->mbmi_ext); |
332 | 6.10M | } |
333 | | |
334 | | void av1_find_best_ref_mvs_from_stack(int allow_hp, |
335 | | const MB_MODE_INFO_EXT *mbmi_ext, |
336 | | MV_REFERENCE_FRAME ref_frame, |
337 | | int_mv *nearest_mv, int_mv *near_mv, |
338 | 771k | int is_integer) { |
339 | 771k | const int ref_idx = 0; |
340 | 771k | MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; |
341 | 771k | *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext); |
342 | 771k | lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer); |
343 | 771k | *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext); |
344 | 771k | lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer); |
345 | 771k | } |