Coverage Report

Created: 2025-07-16 07:53

/src/aom/aom_dsp/flow_estimation/ransac.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <memory.h>
13
#include <math.h>
14
#include <time.h>
15
#include <stdio.h>
16
#include <stdbool.h>
17
#include <string.h>
18
#include <assert.h>
19
20
#include "aom_dsp/flow_estimation/ransac.h"
21
#include "aom_dsp/mathutils.h"
22
#include "aom_mem/aom_mem.h"
23
24
// TODO(rachelbarker): Remove dependence on code in av1/encoder/
25
#include "av1/encoder/random.h"
26
27
#define MAX_MINPTS 4
28
0
#define MINPTS_MULTIPLIER 5
29
30
0
#define INLIER_THRESHOLD 1.25
31
0
#define INLIER_THRESHOLD_SQUARED (INLIER_THRESHOLD * INLIER_THRESHOLD)
32
33
// Number of initial models to generate
34
0
#define NUM_TRIALS 20
35
36
// Number of times to refine the best model found
37
0
#define NUM_REFINES 5
38
39
// Flag to enable functions for finding TRANSLATION type models.
40
//
41
// These modes are not considered currently due to a spec bug (see comments
42
// in gm_get_motion_vector() in av1/common/mv.h). Thus we don't need to compile
43
// the corresponding search functions, but it is nice to keep the source around
44
// but disabled, for completeness.
45
#define ALLOW_TRANSLATION_MODELS 0
46
47
typedef struct {
48
  int num_inliers;
49
  double sse;  // Sum of squared errors of inliers
50
  int *inlier_indices;
51
} RANSAC_MOTION;
52
53
////////////////////////////////////////////////////////////////////////////////
54
// ransac
55
typedef bool (*FindTransformationFunc)(const Correspondence *points,
56
                                       const int *indices, int num_indices,
57
                                       double *params);
58
typedef void (*ScoreModelFunc)(const double *mat, const Correspondence *points,
59
                               int num_points, RANSAC_MOTION *model);
60
61
// vtable-like structure which stores all of the information needed by RANSAC
62
// for a particular model type
63
typedef struct {
64
  FindTransformationFunc find_transformation;
65
  ScoreModelFunc score_model;
66
67
  // The minimum number of points which can be passed to find_transformation
68
  // to generate a model.
69
  //
70
  // This should be set as small as possible. This is due to an observation
71
  // from section 4 of "Optimal Ransac" by A. Hast, J. Nysjö and
72
  // A. Marchetti (https://dspace5.zcu.cz/bitstream/11025/6869/1/Hast.pdf):
73
  // using the minimum possible number of points in the initial model maximizes
74
  // the chance that all of the selected points are inliers.
75
  //
76
  // That paper proposes a method which can deal with models which are
77
  // contaminated by outliers, which helps in cases where the inlier fraction
78
  // is low. However, for our purposes, global motion only gives significant
79
  // gains when the inlier fraction is high.
80
  //
81
  // So we do not use the method from this paper, but we do find that
82
  // minimizing the number of points used for initial model fitting helps
83
  // make the best use of the limited number of models we consider.
84
  int minpts;
85
} RansacModelInfo;
86
87
#if ALLOW_TRANSLATION_MODELS
88
static void score_translation(const double *mat, const Correspondence *points,
89
                              int num_points, RANSAC_MOTION *model) {
90
  model->num_inliers = 0;
91
  model->sse = 0.0;
92
93
  for (int i = 0; i < num_points; ++i) {
94
    const double x1 = points[i].x;
95
    const double y1 = points[i].y;
96
    const double x2 = points[i].rx;
97
    const double y2 = points[i].ry;
98
99
    const double proj_x = x1 + mat[0];
100
    const double proj_y = y1 + mat[1];
101
102
    const double dx = proj_x - x2;
103
    const double dy = proj_y - y2;
104
    const double sse = dx * dx + dy * dy;
105
106
    if (sse < INLIER_THRESHOLD_SQUARED) {
107
      model->inlier_indices[model->num_inliers++] = i;
108
      model->sse += sse;
109
    }
110
  }
111
}
112
#endif  // ALLOW_TRANSLATION_MODELS
113
114
static void score_affine(const double *mat, const Correspondence *points,
115
0
                         int num_points, RANSAC_MOTION *model) {
116
0
  model->num_inliers = 0;
117
0
  model->sse = 0.0;
118
119
0
  for (int i = 0; i < num_points; ++i) {
120
0
    const double x1 = points[i].x;
121
0
    const double y1 = points[i].y;
122
0
    const double x2 = points[i].rx;
123
0
    const double y2 = points[i].ry;
124
125
0
    const double proj_x = mat[2] * x1 + mat[3] * y1 + mat[0];
126
0
    const double proj_y = mat[4] * x1 + mat[5] * y1 + mat[1];
127
128
0
    const double dx = proj_x - x2;
129
0
    const double dy = proj_y - y2;
130
0
    const double sse = dx * dx + dy * dy;
131
132
0
    if (sse < INLIER_THRESHOLD_SQUARED) {
133
0
      model->inlier_indices[model->num_inliers++] = i;
134
0
      model->sse += sse;
135
0
    }
136
0
  }
137
0
}
138
139
#if ALLOW_TRANSLATION_MODELS
140
static bool find_translation(const Correspondence *points, const int *indices,
141
                             int num_indices, double *params) {
142
  double sumx = 0;
143
  double sumy = 0;
144
145
  for (int i = 0; i < num_indices; ++i) {
146
    int index = indices[i];
147
    const double sx = points[index].x;
148
    const double sy = points[index].y;
149
    const double dx = points[index].rx;
150
    const double dy = points[index].ry;
151
152
    sumx += dx - sx;
153
    sumy += dy - sy;
154
  }
155
156
  params[0] = sumx / np;
157
  params[1] = sumy / np;
158
  params[2] = 1;
159
  params[3] = 0;
160
  params[4] = 0;
161
  params[5] = 1;
162
  return true;
163
}
164
#endif  // ALLOW_TRANSLATION_MODELS
165
166
static bool find_rotzoom(const Correspondence *points, const int *indices,
167
0
                         int num_indices, double *params) {
168
0
  const int n = 4;    // Size of least-squares problem
169
0
  double mat[4 * 4];  // Accumulator for A'A
170
0
  double y[4];        // Accumulator for A'b
171
0
  double a[4];        // Single row of A
172
0
  double b;           // Single element of b
173
174
0
  least_squares_init(mat, y, n);
175
0
  for (int i = 0; i < num_indices; ++i) {
176
0
    int index = indices[i];
177
0
    const double sx = points[index].x;
178
0
    const double sy = points[index].y;
179
0
    const double dx = points[index].rx;
180
0
    const double dy = points[index].ry;
181
182
0
    a[0] = 1;
183
0
    a[1] = 0;
184
0
    a[2] = sx;
185
0
    a[3] = sy;
186
0
    b = dx;
187
0
    least_squares_accumulate(mat, y, a, b, n);
188
189
0
    a[0] = 0;
190
0
    a[1] = 1;
191
0
    a[2] = sy;
192
0
    a[3] = -sx;
193
0
    b = dy;
194
0
    least_squares_accumulate(mat, y, a, b, n);
195
0
  }
196
197
  // Fill in params[0] .. params[3] with output model
198
0
  if (!least_squares_solve(mat, y, params, n)) {
199
0
    return false;
200
0
  }
201
202
  // Fill in remaining parameters
203
0
  params[4] = -params[3];
204
0
  params[5] = params[2];
205
206
0
  return true;
207
0
}
208
209
static bool find_affine(const Correspondence *points, const int *indices,
210
0
                        int num_indices, double *params) {
211
  // Note: The least squares problem for affine models is 6-dimensional,
212
  // but it splits into two independent 3-dimensional subproblems.
213
  // Solving these two subproblems separately and recombining at the end
214
  // results in less total computation than solving the 6-dimensional
215
  // problem directly.
216
  //
217
  // The two subproblems correspond to all the parameters which contribute
218
  // to the x output of the model, and all the parameters which contribute
219
  // to the y output, respectively.
220
221
0
  const int n = 3;       // Size of each least-squares problem
222
0
  double mat[2][3 * 3];  // Accumulator for A'A
223
0
  double y[2][3];        // Accumulator for A'b
224
0
  double x[2][3];        // Output vector
225
0
  double a[2][3];        // Single row of A
226
0
  double b[2];           // Single element of b
227
228
0
  least_squares_init(mat[0], y[0], n);
229
0
  least_squares_init(mat[1], y[1], n);
230
0
  for (int i = 0; i < num_indices; ++i) {
231
0
    int index = indices[i];
232
0
    const double sx = points[index].x;
233
0
    const double sy = points[index].y;
234
0
    const double dx = points[index].rx;
235
0
    const double dy = points[index].ry;
236
237
0
    a[0][0] = 1;
238
0
    a[0][1] = sx;
239
0
    a[0][2] = sy;
240
0
    b[0] = dx;
241
0
    least_squares_accumulate(mat[0], y[0], a[0], b[0], n);
242
243
0
    a[1][0] = 1;
244
0
    a[1][1] = sx;
245
0
    a[1][2] = sy;
246
0
    b[1] = dy;
247
0
    least_squares_accumulate(mat[1], y[1], a[1], b[1], n);
248
0
  }
249
250
0
  if (!least_squares_solve(mat[0], y[0], x[0], n)) {
251
0
    return false;
252
0
  }
253
0
  if (!least_squares_solve(mat[1], y[1], x[1], n)) {
254
0
    return false;
255
0
  }
256
257
  // Rearrange least squares result to form output model
258
0
  params[0] = x[0][0];
259
0
  params[1] = x[1][0];
260
0
  params[2] = x[0][1];
261
0
  params[3] = x[0][2];
262
0
  params[4] = x[1][1];
263
0
  params[5] = x[1][2];
264
265
0
  return true;
266
0
}
267
268
// Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise.
269
0
static int compare_motions(const void *arg_a, const void *arg_b) {
270
0
  const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a;
271
0
  const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b;
272
273
0
  if (motion_a->num_inliers > motion_b->num_inliers) return -1;
274
0
  if (motion_a->num_inliers < motion_b->num_inliers) return 1;
275
0
  if (motion_a->sse < motion_b->sse) return -1;
276
0
  if (motion_a->sse > motion_b->sse) return 1;
277
0
  return 0;
278
0
}
279
280
static bool is_better_motion(const RANSAC_MOTION *motion_a,
281
0
                             const RANSAC_MOTION *motion_b) {
282
0
  return compare_motions(motion_a, motion_b) < 0;
283
0
}
284
285
// Returns true on success, false on error
286
static bool ransac_internal(const Correspondence *matched_points, int npoints,
287
                            MotionModel *motion_models, int num_desired_motions,
288
                            const RansacModelInfo *model_info,
289
0
                            bool *mem_alloc_failed) {
290
0
  assert(npoints >= 0);
291
0
  int i = 0;
292
0
  int minpts = model_info->minpts;
293
0
  bool ret_val = true;
294
295
0
  unsigned int seed = (unsigned int)npoints;
296
297
0
  int indices[MAX_MINPTS] = { 0 };
298
299
  // Store information for the num_desired_motions best transformations found
300
  // and the worst motion among them, as well as the motion currently under
301
  // consideration.
302
0
  RANSAC_MOTION *motions, *worst_kept_motion = NULL;
303
0
  RANSAC_MOTION current_motion;
304
305
  // Store the parameters and the indices of the inlier points for the motion
306
  // currently under consideration.
307
0
  double params_this_motion[MAX_PARAMDIM];
308
309
  // Initialize output models, as a fallback in case we can't find a model
310
0
  for (i = 0; i < num_desired_motions; i++) {
311
0
    memcpy(motion_models[i].params, kIdentityParams,
312
0
           MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
313
0
    motion_models[i].num_inliers = 0;
314
0
  }
315
316
0
  if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
317
0
    return false;
318
0
  }
319
320
0
  int min_inliers = AOMMAX((int)(MIN_INLIER_PROB * npoints), minpts);
321
322
0
  motions =
323
0
      (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION));
324
325
  // Allocate one large buffer which will be carved up to store the inlier
326
  // indices for the current motion plus the num_desired_motions many
327
  // output models
328
  // This allows us to keep the allocation/deallocation logic simple, without
329
  // having to (for example) check that `motions` is non-null before allocating
330
  // the inlier arrays
331
0
  int *inlier_buffer = (int *)aom_malloc(sizeof(*inlier_buffer) * npoints *
332
0
                                         (num_desired_motions + 1));
333
334
0
  if (!(motions && inlier_buffer)) {
335
0
    ret_val = false;
336
0
    *mem_alloc_failed = true;
337
0
    goto finish_ransac;
338
0
  }
339
340
  // Once all our allocations are known-good, we can fill in our structures
341
0
  worst_kept_motion = motions;
342
343
0
  for (i = 0; i < num_desired_motions; ++i) {
344
0
    motions[i].inlier_indices = inlier_buffer + i * npoints;
345
0
  }
346
0
  memset(&current_motion, 0, sizeof(current_motion));
347
0
  current_motion.inlier_indices = inlier_buffer + num_desired_motions * npoints;
348
349
0
  for (int trial_count = 0; trial_count < NUM_TRIALS; trial_count++) {
350
0
    lcg_pick(npoints, minpts, indices, &seed);
351
352
0
    if (!model_info->find_transformation(matched_points, indices, minpts,
353
0
                                         params_this_motion)) {
354
0
      continue;
355
0
    }
356
357
0
    model_info->score_model(params_this_motion, matched_points, npoints,
358
0
                            &current_motion);
359
360
0
    if (current_motion.num_inliers < min_inliers) {
361
      // Reject models with too few inliers
362
0
      continue;
363
0
    }
364
365
0
    if (is_better_motion(&current_motion, worst_kept_motion)) {
366
      // This motion is better than the worst currently kept motion. Remember
367
      // the inlier points and sse. The parameters for each kept motion
368
      // will be recomputed later using only the inliers.
369
0
      worst_kept_motion->num_inliers = current_motion.num_inliers;
370
0
      worst_kept_motion->sse = current_motion.sse;
371
372
      // Rather than copying the (potentially many) inlier indices from
373
      // current_motion.inlier_indices to worst_kept_motion->inlier_indices,
374
      // we can swap the underlying pointers.
375
      //
376
      // This is okay because the next time current_motion.inlier_indices
377
      // is used will be in the next trial, where we ignore its previous
378
      // contents anyway. And both arrays will be deallocated together at the
379
      // end of this function, so there are no lifetime issues.
380
0
      int *tmp = worst_kept_motion->inlier_indices;
381
0
      worst_kept_motion->inlier_indices = current_motion.inlier_indices;
382
0
      current_motion.inlier_indices = tmp;
383
384
      // Determine the new worst kept motion and its num_inliers and sse.
385
0
      for (i = 0; i < num_desired_motions; ++i) {
386
0
        if (is_better_motion(worst_kept_motion, &motions[i])) {
387
0
          worst_kept_motion = &motions[i];
388
0
        }
389
0
      }
390
0
    }
391
0
  }
392
393
  // Sort the motions, best first.
394
0
  qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
395
396
  // Refine each of the best N models using iterative estimation.
397
  //
398
  // The idea here is loosely based on the iterative method from
399
  // "Locally Optimized RANSAC" by O. Chum, J. Matas and Josef Kittler:
400
  // https://cmp.felk.cvut.cz/ftp/articles/matas/chum-dagm03.pdf
401
  //
402
  // However, we implement a simpler version than their proposal, and simply
403
  // refit the model repeatedly until the number of inliers stops increasing,
404
  // with a cap on the number of iterations to defend against edge cases which
405
  // only improve very slowly.
406
0
  for (i = 0; i < num_desired_motions; ++i) {
407
0
    if (motions[i].num_inliers <= 0) {
408
      // Output model has already been initialized to the identity model,
409
      // so just skip setup
410
0
      continue;
411
0
    }
412
413
0
    bool bad_model = false;
414
0
    for (int refine_count = 0; refine_count < NUM_REFINES; refine_count++) {
415
0
      int num_inliers = motions[i].num_inliers;
416
0
      assert(num_inliers >= min_inliers);
417
418
0
      if (!model_info->find_transformation(matched_points,
419
0
                                           motions[i].inlier_indices,
420
0
                                           num_inliers, params_this_motion)) {
421
        // In the unlikely event that this model fitting fails, we don't have a
422
        // good fallback. So leave this model set to the identity model
423
0
        bad_model = true;
424
0
        break;
425
0
      }
426
427
      // Score the newly generated model
428
0
      model_info->score_model(params_this_motion, matched_points, npoints,
429
0
                              &current_motion);
430
431
      // At this point, there are three possibilities:
432
      // 1) If we found more inliers, keep refining.
433
      // 2) If we found the same number of inliers but a lower SSE, we want to
434
      //    keep the new model, but further refinement is unlikely to gain much.
435
      //    So commit to this new model
436
      // 3) It is possible, but very unlikely, that the new model will have
437
      //    fewer inliers. If it does happen, we probably just lost a few
438
      //    borderline inliers. So treat the same as case (2).
439
0
      if (current_motion.num_inliers > motions[i].num_inliers) {
440
0
        motions[i].num_inliers = current_motion.num_inliers;
441
0
        motions[i].sse = current_motion.sse;
442
0
        int *tmp = motions[i].inlier_indices;
443
0
        motions[i].inlier_indices = current_motion.inlier_indices;
444
0
        current_motion.inlier_indices = tmp;
445
0
      } else {
446
        // Refined model is no better, so stop
447
        // This shouldn't be significantly worse than the previous model,
448
        // so it's fine to use the parameters in params_this_motion.
449
        // This saves us from having to cache the previous iteration's params.
450
0
        break;
451
0
      }
452
0
    }
453
454
0
    if (bad_model) continue;
455
456
    // Fill in output struct
457
0
    memcpy(motion_models[i].params, params_this_motion,
458
0
           MAX_PARAMDIM * sizeof(*motion_models[i].params));
459
0
    for (int j = 0; j < motions[i].num_inliers; j++) {
460
0
      int index = motions[i].inlier_indices[j];
461
0
      const Correspondence *corr = &matched_points[index];
462
0
      motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x);
463
0
      motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y);
464
0
    }
465
0
    motion_models[i].num_inliers = motions[i].num_inliers;
466
0
  }
467
468
0
finish_ransac:
469
0
  aom_free(inlier_buffer);
470
0
  aom_free(motions);
471
472
0
  return ret_val;
473
0
}
474
475
static const RansacModelInfo ransac_model_info[TRANS_TYPES] = {
476
  // IDENTITY
477
  { NULL, NULL, 0 },
478
// TRANSLATION
479
#if ALLOW_TRANSLATION_MODELS
480
  { find_translation, score_translation, 1 },
481
#else
482
  { NULL, NULL, 0 },
483
#endif
484
  // ROTZOOM
485
  { find_rotzoom, score_affine, 2 },
486
  // AFFINE
487
  { find_affine, score_affine, 3 },
488
};
489
490
// Returns true on success, false on error
491
bool ransac(const Correspondence *matched_points, int npoints,
492
            TransformationType type, MotionModel *motion_models,
493
0
            int num_desired_motions, bool *mem_alloc_failed) {
494
#if ALLOW_TRANSLATION_MODELS
495
  assert(type > IDENTITY && type < TRANS_TYPES);
496
#else
497
0
  assert(type > TRANSLATION && type < TRANS_TYPES);
498
0
#endif  // ALLOW_TRANSLATION_MODELS
499
500
0
  return ransac_internal(matched_points, npoints, motion_models,
501
0
                         num_desired_motions, &ransac_model_info[type],
502
0
                         mem_alloc_failed);
503
0
}