aom: Improve RANSAC algorithm

SDLPushPostBot · January 25, 2024, 10:32pm

Improve RANSAC algorithm

committed 10:27PM - 25 Jan 24 UTC

Implement a few ideas from the literature about RANSAC. Many ideas were tested, …but the ones which proved to be useful are: * Remove degeneracy checks. The model fitting functions implicitly check for situations where the model is underdetermined, and inlier counts provide us with a better way to exclude bad models * Fit initial models using the minimum number of points possible. Counterintuitively, this is better than using more points. The reason is that we generate a good model if and only if *all* of the points in the initial fit are inliers. Selecting fewer points at first raises the probability of this happening, and so is more likely to generate good models. * Refine the best detected model multiple times. This helps us find a large inlier set, and therefore leads to a better final model. Speed | BDRATE-PSNR | BDRATE-SSIM | Enc time -------+-------------+-------------+------------- 1 | -0.029% | -0.030% | +0.053% 2 | -0.044% | -0.042% | +0.021% 3 | -0.066% | -0.050% | +0.046% 4 | -0.076% | -0.084% | +0.160% STATS_CHANGED Change-Id: I46f04071f6927f50a53ebc60a23503a6e4cf8f64

From 8b41631760aa3637cd005bd8664aceb566283f84 Mon Sep 17 00:00:00 2001
From: Rachel Barker <[EMAIL REDACTED]>
Date: Fri, 19 Jan 2024 17:01:42 +0000
Subject: [PATCH] Improve RANSAC algorithm

Implement a few ideas from the literature about RANSAC. Many ideas
were tested, but the ones which proved to be useful are:

* Remove degeneracy checks. The model fitting functions implicitly
  check for situations where the model is underdetermined, and
  inlier counts provide us with a better way to exclude bad models

* Fit initial models using the minimum number of points possible.
  Counterintuitively, this is better than using more points. The reason
  is that we generate a good model if and only if *all* of the points
  in the initial fit are inliers. Selecting fewer points at first raises
  the probability of this happening, and so is more likely to generate
  good models.

* Refine the best detected model multiple times. This helps us find
  a large inlier set, and therefore leads to a better final model.

 Speed | BDRATE-PSNR | BDRATE-SSIM |   Enc time
-------+-------------+-------------+-------------
   1   |   -0.029%   |   -0.030%   |   +0.053%
   2   |   -0.044%   |   -0.042%   |   +0.021%
   3   |   -0.066%   |   -0.050%   |   +0.046%
   4   |   -0.076%   |   -0.084%   |   +0.160%

STATS_CHANGED

Change-Id: I46f04071f6927f50a53ebc60a23503a6e4cf8f64
---
 aom_dsp/flow_estimation/ransac.c | 162 +++++++++++++++++++++----------
 1 file changed, 110 insertions(+), 52 deletions(-)

diff --git a/aom_dsp/flow_estimation/ransac.c b/aom_dsp/flow_estimation/ransac.c
index b88a07b023..b622ff2254 100644
--- a/aom_dsp/flow_estimation/ransac.c
+++ b/aom_dsp/flow_estimation/ransac.c
@@ -29,8 +29,13 @@
 
 #define INLIER_THRESHOLD 1.25
 #define INLIER_THRESHOLD_SQUARED (INLIER_THRESHOLD * INLIER_THRESHOLD)
+
+// Number of initial models to generate
 #define NUM_TRIALS 20
 
+// Number of times to refine the best model found
+#define NUM_REFINES 5
+
 // Flag to enable functions for finding TRANSLATION type models.
 //
 // These modes are not considered currently due to a spec bug (see comments
@@ -41,7 +46,6 @@
 
 ////////////////////////////////////////////////////////////////////////////////
 // ransac
-typedef bool (*IsDegenerateFunc)(double *p);
 typedef bool (*FindTransformationFunc)(int points, const double *points1,
                                        const double *points2, double *params);
 typedef void (*ProjectPointsFunc)(const double *mat, const double *points,
@@ -51,9 +55,26 @@ typedef void (*ProjectPointsFunc)(const double *mat, const double *points,
 // vtable-like structure which stores all of the information needed by RANSAC
 // for a particular model type
 typedef struct {
-  IsDegenerateFunc is_degenerate;
   FindTransformationFunc find_transformation;
   ProjectPointsFunc project_points;
+
+  // The minimum number of points which can be passed to find_transformation
+  // to generate a model.
+  //
+  // This should be set as small as possible. This is due to an observation
+  // from section 4 of "Optimal Ransac" by A. Hast, J. Nysjö and
+  // A. Marchetti (https://dspace5.zcu.cz/bitstream/11025/6869/1/Hast.pdf):
+  // using the minimum possible number of points in the initial model maximizes
+  // the chance that all of the selected points are inliers.
+  //
+  // That paper proposes a method which can deal with models which are
+  // contaminated by outliers, which helps in cases where the inlier fraction
+  // is low. However, for our purposes, global motion only gives significant
+  // gains when the inlier fraction is high.
+  //
+  // So we do not use the method from this paper, but we do find that
+  // minimizing the number of points used for initial model fitting helps
+  // make the best use of the limited number of models we consider.
   int minpts;
 } RansacModelInfo;
 
@@ -271,6 +292,13 @@ static bool ransac_internal(const Correspondence *matched_points, int npoints,
   // currently under consideration.
   double params_this_motion[MAX_PARAMDIM];
 
+  // Initialize output models, as a fallback in case we can't find a model
+  for (i = 0; i < num_desired_motions; i++) {
+    memcpy(motion_models[i].params, kIdentityParams,
+           MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
+    motion_models[i].num_inliers = 0;
+  }
+
   if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
     return false;
   }
@@ -324,10 +352,6 @@ static bool ransac_internal(const Correspondence *matched_points, int npoints,
     copy_points_at_indices(points1, corners1, indices, minpts);
     copy_points_at_indices(points2, corners2, indices, minpts);
 
-    if (model_info->is_degenerate(points1)) {
-      continue;
-    }
-
     if (!model_info->find_transformation(minpts, points1, points2,
                                          params_this_motion)) {
       continue;
@@ -386,11 +410,27 @@ static bool ransac_internal(const Correspondence *matched_points, int npoints,
   // Sort the motions, best first.
   qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
 
-  // Recompute the motions using only the inliers.
+  // Refine each of the best N models using iterative estimation.
+  //
+  // The idea here is loosely based on the iterative method from
+  // "Locally Optimized RANSAC" by O. Chum, J. Matas and Josef Kittler:
+  // https://cmp.felk.cvut.cz/ftp/articles/matas/chum-dagm03.pdf
+  //
+  // However, we implement a simpler version than their proposal, and simply
+  // refit the model repeatedly until the number of inliers stops increasing,
+  // with a cap on the number of iterations to defend against edge cases which
+  // only improve very slowly.
   for (i = 0; i < num_desired_motions; ++i) {
-    int num_inliers = motions[i].num_inliers;
-    if (num_inliers > 0) {
-      assert(num_inliers >= minpts);
+    if (motions[i].num_inliers <= 0) {
+      // Output model has already been initialized to the identity model,
+      // so just skip setup
+      continue;
+    }
+
+    bool bad_model = false;
+    for (int refine_count = 0; refine_count < NUM_REFINES; refine_count++) {
+      int num_inliers = motions[i].num_inliers;
+      assert(num_inliers >= min_inliers);
 
       copy_points_at_indices(points1, corners1, motions[i].inlier_indices,
                              num_inliers);
@@ -398,29 +438,65 @@ static bool ransac_internal(const Correspondence *matched_points, int npoints,
                              num_inliers);
 
       if (!model_info->find_transformation(num_inliers, points1, points2,
-                                           motion_models[i].params)) {
-        // In the unlikely event that this model fitting fails,
-        // we don't have a good fallback. So just clear the output
-        // model and move on
-        memcpy(motion_models[i].params, kIdentityParams,
-               MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
-        motion_models[i].num_inliers = 0;
-        continue;
+                                           params_this_motion)) {
+        // In the unlikely event that this model fitting fails, we don't have a
+        // good fallback. So leave this model set to the identity model
+        bad_model = true;
+        break;
       }
 
-      // Populate inliers array
-      for (int j = 0; j < num_inliers; j++) {
-        int index = motions[i].inlier_indices[j];
-        const Correspondence *corr = &matched_points[index];
-        motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x);
-        motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y);
+      // Score the newly generated model
+      model_info->project_points(params_this_motion, corners1,
+                                 projected_corners, npoints, 2, 2);
+      current_motion.num_inliers = 0;
+      double sse = 0.0;
+      for (int k = 0; k < npoints; ++k) {
+        double dx = projected_corners[k * 2] - corners2[k * 2];
+        double dy = projected_corners[k * 2 + 1] - corners2[k * 2 + 1];
+        double squared_error = dx * dx + dy * dy;
+
+        if (squared_error < INLIER_THRESHOLD_SQUARED) {
+          current_motion.inlier_indices[current_motion.num_inliers++] = k;
+          sse += squared_error;
+        }
+      }
+      current_motion.sse = sse;
+
+      // At this point, there are three possibilities:
+      // 1) If we found more inliers, keep refining.
+      // 2) If we found the same number of inliers but a lower SSE, we want to
+      //    keep the new model, but further refinement is unlikely to gain much.
+      //    So commit to this new model
+      // 3) It is possible, but very unlikely, that the new model will have
+      //    fewer inliers. If it does happen, we probably just lost a few
+      //    borderline inliers. So treat the same as case (2).
+      if (current_motion.num_inliers > motions[i].num_inliers) {
+        motions[i].num_inliers = current_motion.num_inliers;
+        motions[i].sse = current_motion.sse;
+        int *tmp = motions[i].inlier_indices;
+        motions[i].inlier_indices = current_motion.inlier_indices;
+        current_motion.inlier_indices = tmp;
+      } else {
+        // Refined model is no better, so stop
+        // This shouldn't be significantly worse than the previous model,
+        // so it's fine to use the parameters in params_this_motion.
+        // This saves us from having to cache the previous iteration's params.
+        break;
       }
-      motion_models[i].num_inliers = num_inliers;
-    } else {
-      memcpy(motion_models[i].params, kIdentityParams,
-             MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
-      motion_models[i].num_inliers = 0;
     }
+
+    if (bad_model) continue;
+
+    // Fill in output struct
+    memcpy(motion_models[i].params, params_this_motion,
+           MAX_PARAMDIM * sizeof(*motion_models[i].params));
+    for (int j = 0; j < motions[i].num_inliers; j++) {
+      int index = motions[i].inlier_indices[j];
+      const Correspondence *corr = &matched_points[index];
+      motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x);
+      motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y);
+    }
+    motion_models[i].num_inliers = motions[i].num_inliers;
   }
 
 finish_ransac:
@@ -435,37 +511,19 @@ static bool ransac_internal(const Correspondence *matched_points, int npoints,
   return ret_val;
 }
 
-static bool is_collinear3(double *p1, double *p2, double *p3) {
-  static const double collinear_eps = 1e-3;
-  const double v =
-      (p2[0] - p1[0]) * (p3[1] - p1[1]) - (p2[1] - p1[1]) * (p3[0] - p1[0]);
-  return fabs(v) < collinear_eps;
-}
-
-#if ALLOW_TRANSLATION_MODELS
-static bool is_degenerate_translation(double *p) {
-  return (p[0] - p[2]) * (p[0] - p[2]) + (p[1] - p[3]) * (p[1] - p[3]) <= 2;
-}
-#endif  // ALLOW_TRANSLATION_MODELS
-
-static bool is_degenerate_affine(double *p) {
-  return is_collinear3(p, p + 2, p + 4);
-}
-
 static const RansacModelInfo ransac_model_info[TRANS_TYPES] = {
   // IDENTITY
-  { NULL, NULL, NULL, 0 },
+  { NULL, NULL, 0 },
 // TRANSLATION
 #if ALLOW_TRANSLATION_MODELS
-  { is_degenerate_translation, find_translation, project_points_translation,
-    3 },
+  { find_translation, project_points_translation, 1 },
 #else
-  { NULL, NULL, NULL, 0 },
+  { NULL, NULL, 0 },
 #endif
   // ROTZOOM
-  { is_degenerate_affine, find_rotzoom, project_points_affine, 3 },
+  { find_rotzoom, project_points_affine, 2 },
   // AFFINE
-  { is_degenerate_affine, find_affine, project_points_affine, 3 },
+  { find_affine, project_points_affine, 3 },
 };
 
 // Returns true on success, false on error