aom: Add encoder control for setting column/row tiling

From 1e9c7a2416003d79c92e90aacf19a087c2ef4a38 Mon Sep 17 00:00:00 2001
From: Marco Paniconi <[EMAIL REDACTED]>
Date: Thu, 13 Jun 2024 11:53:21 -0700
Subject: [PATCH] Add encoder control for setting column/row tiling

If the control is enabled (set to 1): the number of
column and row tiles is set, based on number of input
threads and resolution.

Add SVC unittest with 4 threads with new control.

Bug:b/349189136
Change-Id: I8e39fe1938d2e7470a1327254c3db02998ce75b7
---
 aom/aomcx.h                 | 11 +++++
 apps/aomenc.c               |  1 +
 av1/arg_defs.c              |  2 +
 av1/arg_defs.h              |  1 +
 av1/av1_cx_iface.c          | 90 ++++++++++++++++++++++++++++++++++---
 av1/encoder/encoder.h       |  1 +
 examples/svc_encoder_rtc.cc |  5 +--
 test/datarate_test.h        | 10 ++++-
 test/svc_datarate_test.cc   | 43 +++++++++++++++++-
 9 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/aom/aomcx.h b/aom/aomcx.h
index 002b5d37d..835eab84d 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1539,6 +1539,14 @@ enum aome_enc_control_id {
    */
   AV1E_SET_SVC_FRAME_DROP_MODE = 165,
 
+  /*!\brief Codec control to set auto tiling, unsigned int parameter.
+   * Value of 1 means encoder will set number of tile_columns and tile_rows,
+   * based on the number of threads and resolution. This will override any
+   * settings set via SET_TILE_COLUMNS/ROWS. If the value is 0 no change is
+   * done, the previous setting (if any) for tile_columns/rows is preserved.
+   */
+  AV1E_SET_AUTO_TILES = 166,
+
   // Any new encoder control IDs should be added above.
   // Maximum allowed encoder control ID is 229.
   // No encoder control ID should be added below.
@@ -2196,6 +2204,9 @@ AOM_CTRL_USE_TYPE(AV1E_SET_SVC_FRAME_DROP_MODE, unsigned int)
 AOM_CTRL_USE_TYPE(AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, int)
 #define AOM_CTRL_AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR
 
+AOM_CTRL_USE_TYPE(AV1E_SET_AUTO_TILES, unsigned int)
+#define AOM_CTRL_AV1E_SET_AUTO_TILES
+
 /*!\endcond */
 /*! @} - end defgroup aom_encoder */
 #ifdef __cplusplus
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 00461e09a..0bfce0ae1 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -466,6 +466,7 @@ static const arg_def_t *const av1_key_val_args[] = {
   &g_av1_codec_arg_defs.sb_qp_sweep,
   &g_av1_codec_arg_defs.dist_metric,
   &g_av1_codec_arg_defs.kf_max_pyr_height,
+  &g_av1_codec_arg_defs.auto_tiles,
   NULL,
 };
 
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index d0f6814e5..7aef17c86 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -315,6 +315,8 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2"),
   .tile_rows =
       ARG_DEF(NULL, "tile-rows", 1, "Number of tile rows to use, log2"),
+  .auto_tiles = ARG_DEF(NULL, "auto-tiles", 1,
+                        "Enable auto tiles (0: false (default), 1: true)"),
   .enable_tpl_model = ARG_DEF(NULL, "enable-tpl-model", 1,
                               "RDO based on frame temporal dependency "
                               "(0: off, 1: backward source based); "
diff --git a/av1/arg_defs.h b/av1/arg_defs.h
index b7e8440f1..a206036f4 100644
--- a/av1/arg_defs.h
+++ b/av1/arg_defs.h
@@ -124,6 +124,7 @@ typedef struct av1_codec_arg_definitions {
   arg_def_t fpmtarg;
   arg_def_t tile_cols;
   arg_def_t tile_rows;
+  arg_def_t auto_tiles;
   arg_def_t enable_tpl_model;
   arg_def_t enable_keyframe_filtering;
   arg_def_t tile_width;
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index f6ef0ca4e..a03fed9fc 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -9,6 +9,7 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include <limits.h>
+#include <math.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -57,6 +58,7 @@ struct av1_extracfg {
   unsigned int fp_mt;
   unsigned int tile_columns;  // log2 number of tile columns
   unsigned int tile_rows;     // log2 number of tile rows
+  unsigned int auto_tiles;
   unsigned int enable_tpl_model;
   unsigned int enable_keyframe_filtering;
   unsigned int arnr_max_frames;
@@ -237,6 +239,7 @@ static const struct av1_extracfg default_extra_cfg = {
   0,              // fp_mt
   0,              // tile_columns
   0,              // tile_rows
+  0,              // auto_tiles
   0,              // enable_tpl_model
   1,              // enable_keyframe_filtering
   7,              // arnr_max_frames
@@ -388,6 +391,7 @@ static const struct av1_extracfg default_extra_cfg = {
   0,              // fp_mt
   0,              // tile_columns
   0,              // tile_rows
+  0,              // auto_tiles
   1,              // enable_tpl_model
   1,              // enable_keyframe_filtering
   7,              // arnr_max_frames
@@ -751,6 +755,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
 
   RANGE_CHECK_HI(extra_cfg, tile_columns, 6);
   RANGE_CHECK_HI(extra_cfg, tile_rows, 6);
+  RANGE_CHECK_HI(extra_cfg, auto_tiles, 1);
 
   RANGE_CHECK_HI(cfg, monochrome, 1);
 
@@ -986,6 +991,40 @@ static void disable_superres(SuperResCfg *const superres_cfg) {
   superres_cfg->superres_kf_qthresh = 255;
 }
 
+static void set_auto_tiles(TileConfig *const tile_cfg, unsigned int width,
+                           unsigned int height, unsigned int threads) {
+  int tile_cols_log2 = 0;
+  int tile_rows_log2 = 0;
+  if (threads < 2) return;
+  // Avoid small tiles because they are particularly bad for coding.
+  // Use no more tiles than the number of threads. Aim for one tile per
+  // thread. Using more than one thread inside one tile could be less
+  // efficient. Using more tiles than the number of threads would result
+  // in a compression penalty without much benefit.
+  const uint32_t kMinTileArea = 128 * 128;
+  const uint32_t kMaxTiles = 32;
+  uint32_t frame_area = width * height;
+  uint32_t tiles = (frame_area + kMinTileArea - 1) / kMinTileArea;
+  if (tiles > kMaxTiles) {
+    tiles = kMaxTiles;
+  }
+  if (tiles > threads) {
+    tiles = threads;
+  }
+  int tiles_log2 = (int)log2(tiles);
+  // If the frame width is equal or greater than the height, use more tile
+  // columns than tile rows.
+  if (width >= height) {
+    tile_cols_log2 = (tiles_log2 + 1) / 2;
+    tile_rows_log2 = tiles_log2 - tile_cols_log2;
+  } else {
+    tile_rows_log2 = (tiles_log2 + 1) / 2;
+    tile_cols_log2 = tiles_log2 - tile_rows_log2;
+  }
+  tile_cfg->tile_columns = tile_cols_log2;
+  tile_cfg->tile_rows = tile_rows_log2;
+}
+
 static void update_default_encoder_config(const cfg_options_t *cfg,
                                           struct av1_extracfg *extra_cfg) {
   extra_cfg->enable_cdef = (cfg->disable_cdef == 0) ? 1 : 0;
@@ -1326,8 +1365,14 @@ static void set_encoder_config(AV1EncoderConfig *oxcf,
   tile_cfg->enable_large_scale_tile = cfg->large_scale_tile;
   tile_cfg->enable_single_tile_decoding =
       (tile_cfg->enable_large_scale_tile) ? extra_cfg->single_tile_decoding : 0;
-  tile_cfg->tile_columns = extra_cfg->tile_columns;
-  tile_cfg->tile_rows = extra_cfg->tile_rows;
+  if (extra_cfg->auto_tiles) {
+    set_auto_tiles(tile_cfg, cfg->g_w, cfg->g_h, cfg->g_threads);
+    extra_cfg->tile_columns = tile_cfg->tile_columns;
+    extra_cfg->tile_rows = tile_cfg->tile_rows;
+  } else {
+    tile_cfg->tile_columns = extra_cfg->tile_columns;
+    tile_cfg->tile_rows = extra_cfg->tile_rows;
+  }
   tile_cfg->tile_width_count = AOMMIN(cfg->tile_width_count, MAX_TILE_COLS);
   tile_cfg->tile_height_count = AOMMIN(cfg->tile_height_count, MAX_TILE_ROWS);
   for (int i = 0; i < tile_cfg->tile_width_count; i++) {
@@ -1688,18 +1733,28 @@ static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_set_tile_columns(aom_codec_alg_priv_t *ctx,
                                              va_list args) {
-  unsigned int tile_columns = CAST(AV1E_SET_TILE_COLUMNS, args);
-  if (tile_columns == ctx->extra_cfg.tile_columns) return AOM_CODEC_OK;
+  // If the control AUTO_TILES is used (set to 1) then don't override
+  // the tile_columns set via the AUTO_TILES control.
+  if (ctx->extra_cfg.auto_tiles) {
+    ERROR("AUTO_TILES is set so AV1E_SET_TILE_COLUMNS should not be called.");
+  }
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  unsigned int tile_columns = CAST(AV1E_SET_TILE_COLUMNS, args);
+  if (tile_columns == extra_cfg.tile_columns) return AOM_CODEC_OK;
   extra_cfg.tile_columns = tile_columns;
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
 static aom_codec_err_t ctrl_set_tile_rows(aom_codec_alg_priv_t *ctx,
                                           va_list args) {
-  unsigned int tile_rows = CAST(AV1E_SET_TILE_ROWS, args);
-  if (tile_rows == ctx->extra_cfg.tile_rows) return AOM_CODEC_OK;
+  // If the control AUTO_TILES is used (set to 1) then don't override
+  // the tile_rows set via the AUTO_TILES control.
+  if (ctx->extra_cfg.auto_tiles) {
+    ERROR("AUTO_TILES is set so AV1E_SET_TILE_ROWS should not be called.");
+  }
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  unsigned int tile_rows = CAST(AV1E_SET_TILE_ROWS, args);
+  if (tile_rows == extra_cfg.tile_rows) return AOM_CODEC_OK;
   extra_cfg.tile_rows = tile_rows;
   return update_extra_cfg(ctx, &extra_cfg);
 }
@@ -2637,6 +2692,15 @@ static aom_codec_err_t ctrl_set_svc_frame_drop_mode(aom_codec_alg_priv_t *ctx,
     return AOM_CODEC_OK;
 }
 
+static aom_codec_err_t ctrl_set_auto_tiles(aom_codec_alg_priv_t *ctx,
+                                           va_list args) {
+  unsigned int auto_tiles = CAST(AV1E_SET_AUTO_TILES, args);
+  if (auto_tiles == ctx->extra_cfg.auto_tiles) return AOM_CODEC_OK;
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.auto_tiles = auto_tiles;
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 #if !CONFIG_REALTIME_ONLY
 static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
                                            STATS_BUFFER_CTX *stats_buf_context,
@@ -3916,9 +3980,22 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_cols, argv,
                               err_string)) {
     extra_cfg.tile_columns = arg_parse_uint_helper(&arg, err_string);
+    if (extra_cfg.auto_tiles) {
+      snprintf(err_string, ARG_ERR_MSG_MAX_LEN,
+               "Cannot set tile-cols because auto-tiles is already set.");
+      err = AOM_CODEC_INVALID_PARAM;
+    }
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_rows, argv,
                               err_string)) {
     extra_cfg.tile_rows = arg_parse_uint_helper(&arg, err_string);
+    if (extra_cfg.auto_tiles) {
+      snprintf(err_string, ARG_ERR_MSG_MAX_LEN,
+               "Cannot set tile-rows because auto-tiles is already set.");
+      err = AOM_CODEC_INVALID_PARAM;
+    }
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.auto_tiles, argv,
+                              err_string)) {
+    extra_cfg.auto_tiles = arg_parse_uint_helper(&arg, err_string);
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_tpl_model,
                               argv, err_string)) {
     extra_cfg.enable_tpl_model = arg_parse_uint_helper(&arg, err_string);
@@ -4504,6 +4581,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV1E_SET_BITRATE_ONE_PASS_CBR, ctrl_set_bitrate_one_pass_cbr },
   { AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, ctrl_set_max_consec_frame_drop_cbr },
   { AV1E_SET_SVC_FRAME_DROP_MODE, ctrl_set_svc_frame_drop_mode },
+  { AV1E_SET_AUTO_TILES, ctrl_set_auto_tiles },
 
   // Getters
   { AOME_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 5966da738..38fe8a6fa 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -3790,6 +3790,7 @@ AV1_COMP *av1_get_parallel_frame_enc_data(AV1_PRIMARY *const ppi,
 int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
                                     AV1_PRIMARY *const ppi,
                                     int *ref_buffers_used_map);
+
 /*!\endcond */
 
 /*!\brief Obtain the raw frame data
diff --git a/examples/svc_encoder_rtc.cc b/examples/svc_encoder_rtc.cc
index 58f103f11..c9ff20cdf 100644
--- a/examples/svc_encoder_rtc.cc
+++ b/examples/svc_encoder_rtc.cc
@@ -1689,10 +1689,7 @@ int main(int argc, const char **argv) {
   aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
   aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
 
-  if (cfg.g_threads > 1) {
-    aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS,
-                      (unsigned int)log2(cfg.g_threads));
-  }
+  aom_codec_control(&codec, AV1E_SET_AUTO_TILES, 1);
 
   aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
   if (app_input.tune_content == AOM_CONTENT_SCREEN) {
diff --git a/test/datarate_test.h b/test/datarate_test.h
index 24ce832a6..af35dff8d 100644
--- a/test/datarate_test.h
+++ b/test/datarate_test.h
@@ -44,6 +44,7 @@ class DatarateTest : public ::libaom_test::EncoderTest {
     denoiser_offon_period_ = -1;
     tile_columns_ = 0;
     tile_rows_ = 0;
+    auto_tiles_ = false;
     screen_mode_ = false;
     max_perc_spike_ = 1.0;
     max_perc_spike_high_ = 1.0;
@@ -63,8 +64,12 @@ class DatarateTest : public ::libaom_test::EncoderTest {
     if (video->frame() == 0) {
       encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
       encoder->Control(AV1E_SET_AQ_MODE, aq_mode_);
-      encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
-      encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+      if (auto_tiles_) {
+        encoder->Control(AV1E_SET_AUTO_TILES, 1);
+      } else {
+        encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
+        encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+      }
       encoder->Control(AV1E_SET_ROW_MT, 1);
       if (cfg_.g_usage == AOM_USAGE_REALTIME) {
         encoder->Control(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
@@ -207,6 +212,7 @@ class DatarateTest : public ::libaom_test::EncoderTest {
   bool speed_change_test_;
   int tile_columns_;
   int tile_rows_;
+  bool auto_tiles_;
   bool screen_mode_;
   double max_perc_spike_;
   double max_perc_spike_high_;
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 2284ce524..2a540e1e2 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -118,8 +118,12 @@ class DatarateTestSVC
       encoder->Control(AV1E_SET_ENABLE_TPL_MODEL, 0);
       encoder->Control(AV1E_SET_DELTAQ_MODE, 0);
       if (cfg_.g_threads > 1) {
-        encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
-        encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+        if (auto_tiles_) {
+          encoder->Control(AV1E_SET_AUTO_TILES, 1);
+        } else {
+          encoder->Control(AV1E_SET_TILE_COLUMNS, tile_columns_);
+          encoder->Control(AV1E_SET_TILE_ROWS, tile_rows_);
+        }
         encoder->Control(AV1E_SET_ROW_MT, 1);
       }
       if (screen_mode_) {
@@ -1672,6 +1676,36 @@ class DatarateTestSVC
     }
   }
 
+  virtual void BasicRateTargetingSVC2TL1SLHDMultiThread4AutoTilesTest() {
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 500;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_min_quantizer = 0;
+    cfg_.rc_max_quantizer = 63;
+    cfg_.rc_end_usage = AOM_CBR;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.g_error_resilient = 0;
+    cfg_.g_threads = 4;
+
+    ::libaom_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+    const int bitrate_array[2] = { 600, 1200 };
+    cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
+    ResetModel();
+    auto_tiles_ = 1;
+    number_temporal_layers_ = 2;
+    number_spatial_layers_ = 1;
+    target_layer_bitrate_[0] = 60 * cfg_.rc_target_bitrate / 100;
+    target_layer_bitrate_[1] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.70)
+          << " The datarate for the file is lower than target by too much!";
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.45)
+          << " The datarate for the file is greater than target by too much!";
+    }
+  }
+
   virtual void BasicRateTargetingSVC3TL3SLHDMultiThread4Test() {
     cfg_.rc_buf_initial_sz = 500;
     cfg_.rc_buf_optimal_sz = 500;
@@ -2541,6 +2575,11 @@ TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLHDMultiThread4) {
   BasicRateTargetingSVC2TL1SLHDMultiThread4Test();
 }
 
+// Check basic rate targeting for CBR, for 1 spatial, 2 temporal layers,
+// for 4 threads, row-mt enabled, and auto_tiling enabled.
+TEST_P(DatarateTestSVC, BasicRateTargetingSVC2TL1SLHDMultiThread4AutoTiles) {
+  BasicRateTargetingSVC2TL1SLHDMultiThread4AutoTilesTest();
+}
 // Check basic rate targeting for CBR, for 3 spatial, 3 temporal layers,
 // for 4 threads, 2 tile_columns, 2 tiles_rows, row-mt enabled.
 TEST_P(DatarateTestSVC, BasicRateTargetingSVC3TL3SLHDMultiThread4) {