aom: Add new global motion speed feature

From a55ceb124226fd25650ddc84b8462e4dc2f7e1c5 Mon Sep 17 00:00:00 2001
From: Rachel Barker <[EMAIL REDACTED]>
Date: Tue, 13 Feb 2024 14:56:53 +0000
Subject: [PATCH] Add new global motion speed feature

Add a speed feature which generates the feature list used
in global motion estimation to be generated from a downsampled
version of the input frame.

Tuning:
* downsample_level = 0 for speed <= 3
* downsample_level = 1 for speed 4,5
* downsample_level = 2 for speed 6

Relative to the previous patch *with global motion forced on
for speed 5 and 6*, the impact of this change is:

 Speed | BDRATE-PSNR | BDRATE-SSIM |   Enc time
-------+-------------+-------------+-------------
   4   |   +0.032%   |   +0.016%   |   -0.762%
   5   |   +0.053%   |   +0.051%   |   -1.569%
   6   |   +0.272%   |   +0.286%   |   -3.242%

IMPORTANT: Global motion is not enabled yet on speed 5 or 6,
so only speed 4 is impacted right now. However, this patch is
a step toward enabling global motion for higher speeds.

STATS_CHANGED

Change-Id: If0a1640c6f24b9bf91c7350b9a905c9d7f673695
---
 aom_dsp/flow_estimation/corner_detect.c   | 43 ++++++++++++++++-------
 aom_dsp/flow_estimation/corner_detect.h   |  3 +-
 aom_dsp/flow_estimation/corner_match.c    |  8 ++---
 aom_dsp/flow_estimation/corner_match.h    |  4 +--
 aom_dsp/flow_estimation/disflow.c         | 12 +++----
 aom_dsp/flow_estimation/disflow.h         | 10 +++---
 aom_dsp/flow_estimation/flow_estimation.c | 12 +++----
 aom_dsp/flow_estimation/flow_estimation.h |  2 +-
 av1/encoder/global_motion_facade.c        |  8 +++--
 av1/encoder/speed_features.c              |  4 +++
 av1/encoder/speed_features.h              |  3 ++
 11 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/aom_dsp/flow_estimation/corner_detect.c b/aom_dsp/flow_estimation/corner_detect.c
index c3c0939e32..44d423dcdf 100644
--- a/aom_dsp/flow_estimation/corner_detect.c
+++ b/aom_dsp/flow_estimation/corner_detect.c
@@ -40,11 +40,24 @@ CornerList *av1_alloc_corner_list(void) {
   return corners;
 }
 
-static bool compute_corner_list(const ImagePyramid *pyr, CornerList *corners) {
-  const uint8_t *buf = pyr->layers[0].buffer;
-  int width = pyr->layers[0].width;
-  int height = pyr->layers[0].height;
-  int stride = pyr->layers[0].stride;
+static bool compute_corner_list(const YV12_BUFFER_CONFIG *frame, int bit_depth,
+                                int downsample_level, CornerList *corners) {
+  ImagePyramid *pyr = frame->y_pyramid;
+  const int layers =
+      aom_compute_pyramid(frame, bit_depth, downsample_level + 1, pyr);
+
+  if (layers < 0) {
+    return false;
+  }
+
+  // Clamp downsampling ratio base on max number of layers allowed
+  // for this frame size
+  downsample_level = layers - 1;
+
+  const uint8_t *buf = pyr->layers[downsample_level].buffer;
+  int width = pyr->layers[downsample_level].width;
+  int height = pyr->layers[downsample_level].height;
+  int stride = pyr->layers[downsample_level].stride;
 
   int *scores = NULL;
   int num_corners;
@@ -54,9 +67,11 @@ static bool compute_corner_list(const ImagePyramid *pyr, CornerList *corners) {
 
   if (num_corners <= MAX_CORNERS) {
     // Use all detected corners
-    if (num_corners != 0) {
-      memcpy(corners->corners, frame_corners_xy,
-             sizeof(*frame_corners_xy) * num_corners);
+    for (int i = 0; i < num_corners; i++) {
+      corners->corners[2 * i + 0] =
+          frame_corners_xy[i].x * (1 << downsample_level);
+      corners->corners[2 * i + 1] =
+          frame_corners_xy[i].y * (1 << downsample_level);
     }
     corners->num_corners = num_corners;
   } else {
@@ -86,8 +101,10 @@ static bool compute_corner_list(const ImagePyramid *pyr, CornerList *corners) {
     for (int i = 0; i < num_corners; i++) {
       if (scores[i] > threshold) {
         assert(copied_corners < MAX_CORNERS);
-        corners->corners[2 * copied_corners + 0] = frame_corners_xy[i].x;
-        corners->corners[2 * copied_corners + 1] = frame_corners_xy[i].y;
+        corners->corners[2 * copied_corners + 0] =
+            frame_corners_xy[i].x * (1 << downsample_level);
+        corners->corners[2 * copied_corners + 1] =
+            frame_corners_xy[i].y * (1 << downsample_level);
         copied_corners += 1;
       }
     }
@@ -100,7 +117,8 @@ static bool compute_corner_list(const ImagePyramid *pyr, CornerList *corners) {
   return true;
 }
 
-bool av1_compute_corner_list(const ImagePyramid *pyr, CornerList *corners) {
+bool av1_compute_corner_list(const YV12_BUFFER_CONFIG *frame, int bit_depth,
+                             int downsample_level, CornerList *corners) {
   assert(corners);
 
 #if CONFIG_MULTITHREAD
@@ -108,7 +126,8 @@ bool av1_compute_corner_list(const ImagePyramid *pyr, CornerList *corners) {
 #endif  // CONFIG_MULTITHREAD
 
   if (!corners->valid) {
-    corners->valid = compute_corner_list(pyr, corners);
+    corners->valid =
+        compute_corner_list(frame, bit_depth, downsample_level, corners);
   }
   bool valid = corners->valid;
 
diff --git a/aom_dsp/flow_estimation/corner_detect.h b/aom_dsp/flow_estimation/corner_detect.h
index 4c6add2bf9..54d94309ed 100644
--- a/aom_dsp/flow_estimation/corner_detect.h
+++ b/aom_dsp/flow_estimation/corner_detect.h
@@ -57,7 +57,8 @@ size_t av1_get_corner_list_size(void);
 
 CornerList *av1_alloc_corner_list(void);
 
-bool av1_compute_corner_list(const ImagePyramid *pyr, CornerList *corners);
+bool av1_compute_corner_list(const YV12_BUFFER_CONFIG *frame, int bit_depth,
+                             int downsample_level, CornerList *corners);
 
 #ifndef NDEBUG
 // Check if a corner list has already been computed.
diff --git a/aom_dsp/flow_estimation/corner_match.c b/aom_dsp/flow_estimation/corner_match.c
index 5f995aeb6b..c78edb8910 100644
--- a/aom_dsp/flow_estimation/corner_match.c
+++ b/aom_dsp/flow_estimation/corner_match.c
@@ -253,8 +253,8 @@ static int determine_correspondence(const unsigned char *src,
 
 bool av1_compute_global_motion_feature_match(
     TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
-    int bit_depth, MotionModel *motion_models, int num_motion_models,
-    bool *mem_alloc_failed) {
+    int bit_depth, int downsample_level, MotionModel *motion_models,
+    int num_motion_models, bool *mem_alloc_failed) {
   int num_correspondences;
   Correspondence *correspondences;
   ImagePyramid *src_pyramid = src->y_pyramid;
@@ -267,7 +267,7 @@ bool av1_compute_global_motion_feature_match(
     *mem_alloc_failed = true;
     return false;
   }
-  if (!av1_compute_corner_list(src_pyramid, src_corners)) {
+  if (!av1_compute_corner_list(src, bit_depth, downsample_level, src_corners)) {
     *mem_alloc_failed = true;
     return false;
   }
@@ -275,7 +275,7 @@ bool av1_compute_global_motion_feature_match(
     *mem_alloc_failed = true;
     return false;
   }
-  if (!av1_compute_corner_list(ref_pyramid, ref_corners)) {
+  if (!av1_compute_corner_list(src, bit_depth, downsample_level, ref_corners)) {
     *mem_alloc_failed = true;
     return false;
   }
diff --git a/aom_dsp/flow_estimation/corner_match.h b/aom_dsp/flow_estimation/corner_match.h
index 99507dcab7..77ebee2ea3 100644
--- a/aom_dsp/flow_estimation/corner_match.h
+++ b/aom_dsp/flow_estimation/corner_match.h
@@ -37,8 +37,8 @@ extern "C" {
 
 bool av1_compute_global_motion_feature_match(
     TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
-    int bit_depth, MotionModel *motion_models, int num_motion_models,
-    bool *mem_alloc_failed);
+    int bit_depth, int downsample_level, MotionModel *motion_models,
+    int num_motion_models, bool *mem_alloc_failed);
 
 #ifdef __cplusplus
 }
diff --git a/aom_dsp/flow_estimation/disflow.c b/aom_dsp/flow_estimation/disflow.c
index eb2bb47f38..f511a6eb49 100644
--- a/aom_dsp/flow_estimation/disflow.c
+++ b/aom_dsp/flow_estimation/disflow.c
@@ -762,12 +762,10 @@ static void free_flow_field(FlowField *flow) {
 // Following the convention in flow_estimation.h, the flow vectors are computed
 // at fixed points in `src` and point to the corresponding locations in `ref`,
 // regardless of the temporal ordering of the frames.
-bool av1_compute_global_motion_disflow(TransformationType type,
-                                       YV12_BUFFER_CONFIG *src,
-                                       YV12_BUFFER_CONFIG *ref, int bit_depth,
-                                       MotionModel *motion_models,
-                                       int num_motion_models,
-                                       bool *mem_alloc_failed) {
+bool av1_compute_global_motion_disflow(
+    TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
+    int bit_depth, int downsample_level, MotionModel *motion_models,
+    int num_motion_models, bool *mem_alloc_failed) {
   // Precompute information we will need about each frame
   ImagePyramid *src_pyramid = src->y_pyramid;
   CornerList *src_corners = src->corners;
@@ -782,7 +780,7 @@ bool av1_compute_global_motion_disflow(TransformationType type,
     *mem_alloc_failed = true;
     return false;
   }
-  if (!av1_compute_corner_list(src_pyramid, src_corners)) {
+  if (!av1_compute_corner_list(src, bit_depth, downsample_level, src_corners)) {
     *mem_alloc_failed = true;
     return false;
   }
diff --git a/aom_dsp/flow_estimation/disflow.h b/aom_dsp/flow_estimation/disflow.h
index ef877b638c..1ba5e230b8 100644
--- a/aom_dsp/flow_estimation/disflow.h
+++ b/aom_dsp/flow_estimation/disflow.h
@@ -92,12 +92,10 @@ typedef struct {
   int stride;
 } FlowField;
 
-bool av1_compute_global_motion_disflow(TransformationType type,
-                                       YV12_BUFFER_CONFIG *src,
-                                       YV12_BUFFER_CONFIG *ref, int bit_depth,
-                                       MotionModel *motion_models,
-                                       int num_motion_models,
-                                       bool *mem_alloc_failed);
+bool av1_compute_global_motion_disflow(
+    TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
+    int bit_depth, int downsample_level, MotionModel *motion_models,
+    int num_motion_models, bool *mem_alloc_failed);
 
 #ifdef __cplusplus
 }
diff --git a/aom_dsp/flow_estimation/flow_estimation.c b/aom_dsp/flow_estimation/flow_estimation.c
index 5eb36a2341..96624eb863 100644
--- a/aom_dsp/flow_estimation/flow_estimation.c
+++ b/aom_dsp/flow_estimation/flow_estimation.c
@@ -35,17 +35,17 @@ const double kIdentityParams[MAX_PARAMDIM] = {
 bool aom_compute_global_motion(TransformationType type, YV12_BUFFER_CONFIG *src,
                                YV12_BUFFER_CONFIG *ref, int bit_depth,
                                GlobalMotionMethod gm_method,
-                               MotionModel *motion_models,
+                               int downsample_level, MotionModel *motion_models,
                                int num_motion_models, bool *mem_alloc_failed) {
   switch (gm_method) {
     case GLOBAL_MOTION_METHOD_FEATURE_MATCH:
       return av1_compute_global_motion_feature_match(
-          type, src, ref, bit_depth, motion_models, num_motion_models,
-          mem_alloc_failed);
+          type, src, ref, bit_depth, downsample_level, motion_models,
+          num_motion_models, mem_alloc_failed);
     case GLOBAL_MOTION_METHOD_DISFLOW:
-      return av1_compute_global_motion_disflow(type, src, ref, bit_depth,
-                                               motion_models, num_motion_models,
-                                               mem_alloc_failed);
+      return av1_compute_global_motion_disflow(
+          type, src, ref, bit_depth, downsample_level, motion_models,
+          num_motion_models, mem_alloc_failed);
     default: assert(0 && "Unknown global motion estimation type");
   }
   return false;
diff --git a/aom_dsp/flow_estimation/flow_estimation.h b/aom_dsp/flow_estimation/flow_estimation.h
index ec5e217e08..a38b03fc4e 100644
--- a/aom_dsp/flow_estimation/flow_estimation.h
+++ b/aom_dsp/flow_estimation/flow_estimation.h
@@ -80,7 +80,7 @@ extern const double kIdentityParams[MAX_PARAMDIM];
 bool aom_compute_global_motion(TransformationType type, YV12_BUFFER_CONFIG *src,
                                YV12_BUFFER_CONFIG *ref, int bit_depth,
                                GlobalMotionMethod gm_method,
-                               MotionModel *motion_models,
+                               int downsample_level, MotionModel *motion_models,
                                int num_motion_models, bool *mem_alloc_failed);
 
 #ifdef __cplusplus
diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c
index 4679a2cb83..687eeee18a 100644
--- a/av1/encoder/global_motion_facade.c
+++ b/av1/encoder/global_motion_facade.c
@@ -89,6 +89,7 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
   assert(ref_buf[frame] != NULL);
   int bit_depth = cpi->common.seq_params->bit_depth;
   GlobalMotionMethod global_motion_method = default_global_motion_method;
+  int downsample_level = cpi->sf.gm_sf.downsample_level;
   int num_refinements = cpi->sf.gm_sf.num_refinement_steps;
   bool mem_alloc_failed = false;
 
@@ -99,9 +100,10 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
   double best_erroradv = erroradv_tr;
   for (TransformationType model = FIRST_GLOBAL_TRANS_TYPE;
        model <= LAST_GLOBAL_TRANS_TYPE; ++model) {
-    if (!aom_compute_global_motion(
-            model, cpi->source, ref_buf[frame], bit_depth, global_motion_method,
-            motion_models, RANSAC_NUM_MOTIONS, &mem_alloc_failed)) {
+    if (!aom_compute_global_motion(model, cpi->source, ref_buf[frame],
+                                   bit_depth, global_motion_method,
+                                   downsample_level, motion_models,
+                                   RANSAC_NUM_MOTIONS, &mem_alloc_failed)) {
       if (mem_alloc_failed) {
         aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,
                            "Failed to allocate global motion buffers");
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 7dc84ee5ed..0c1447b7b0 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1177,6 +1177,7 @@ static void set_good_speed_features_framesize_independent(
     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
 
     sf->gm_sf.prune_zero_mv_with_sse = 2;
+    sf->gm_sf.downsample_level = 1;
 
     sf->part_sf.simple_motion_search_prune_agg =
         allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL2;
@@ -1282,6 +1283,8 @@ static void set_good_speed_features_framesize_independent(
     sf->hl_sf.disable_extra_sc_testing = 1;
     sf->hl_sf.second_alt_ref_filtering = 0;
 
+    sf->gm_sf.downsample_level = 2;
+
     sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
     sf->inter_sf.selective_ref_frame = 6;
     sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 2;
@@ -1975,6 +1978,7 @@ static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
   gm_sf->prune_ref_frame_for_gm_search = 0;
   gm_sf->prune_zero_mv_with_sse = 0;
   gm_sf->disable_gm_search_based_on_stats = 0;
+  gm_sf->downsample_level = 0;
   gm_sf->num_refinement_steps = GM_MAX_REFINEMENT_STEPS;
 }
 
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 60c000e4f4..ef93e1d7d5 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -587,6 +587,9 @@ typedef struct GLOBAL_MOTION_SPEED_FEATURES {
   // GF group
   int disable_gm_search_based_on_stats;
 
+  // Downsampling pyramid level to use for global motion estimation
+  int downsample_level;
+
   // Number of refinement steps to apply after initial model generation
   int num_refinement_steps;
 } GLOBAL_MOTION_SPEED_FEATURES;