aom: rtc: Speedup for speed 11 video mode

From 0414f4e9abe9e859a1a26c92e9c79af2da63bb0b Mon Sep 17 00:00:00 2001
From: Marco Paniconi <[EMAIL REDACTED]>
Date: Thu, 22 Feb 2024 22:49:24 +0000
Subject: [PATCH] rtc: Speedup for speed 11 video mode

Base condition for selecting fixed partition
explicitly on source sad metrics. This only affects
360p/480p video. Needed to get more speedup for the
aggressive speed 11.

Also adjust some source_sad thresholds, this also
affects low resolutions.

rtc_derf: small/neutral bdrate change, ~1-2% IC speedup
rtc_set(vga only): ~7% bdrate loss, ~12% IC speedup

Change-Id: I1f26f6839b96e46a381897f9a9ab59238e20cef9
---
 av1/encoder/block.h             |  3 +++
 av1/encoder/encodeframe.c       | 13 +++++++++----
 av1/encoder/encodeframe_utils.c |  4 ++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 33d2d8c2a0..a4aa870370 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1348,6 +1348,9 @@ typedef struct macroblock {
   //! Motion vector from superblock MV derived from int_pro_motion() in
   // the variance_partitioning.
   int_mv sb_me_mv;
+  //! Flag to indicate if a fixed partition should be used, only if the
+  // speeed feature rt_sf->use_fast_fixed_part is enabled.
+  int sb_force_fixed_part;
   //! SSE of the current predictor.
   unsigned int pred_sse[REF_FRAMES];
   //! Prediction for ML based partition.
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 63b194b04c..19e59d5d0c 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -536,8 +536,7 @@ static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
 #endif
   // Set the partition
   if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
-      (sf->rt_sf.use_fast_fixed_part &&
-       x->content_state_sb.source_sad_nonrd < kMedSad)) {
+      (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1)) {
     // set a fixed-size partition
     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
     BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
@@ -1054,8 +1053,13 @@ static AOM_INLINE bool is_calc_src_content_needed(AV1_COMP *cpi,
 
     // The threshold is determined based on kLowSad and kHighSad threshold and
     // test results.
-    const uint64_t thresh_low = 15000;
-    const uint64_t thresh_high = 40000;
+    uint64_t thresh_low = 15000;
+    uint64_t thresh_high = 40000;
+
+    if (cpi->sf.rt_sf.increase_source_sad_thresh) {
+      thresh_low = thresh_low << 1;
+      thresh_high = thresh_high << 1;
+    }
 
     if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
       do_calc_src_content = false;
@@ -1203,6 +1207,7 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
     x->sb_me_block = 0;
     x->sb_me_partition = 0;
     x->sb_me_mv.as_int = 0;
+    x->sb_force_fixed_part = 1;
 
     if (cpi->oxcf.mode == ALLINTRA) {
       x->intra_sb_rdmult_modifier = 128;
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 949837184a..947434c7e7 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1431,6 +1431,10 @@ void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
   if ((tmp_sse - tmp_variance) < (sum_sq_thresh >> 1))
     x->content_state_sb.low_sumdiff = 1;
 
+  if (tmp_sse > ((avg_source_sse_threshold_high * 7) >> 3) &&
+      !x->content_state_sb.lighting_change && !x->content_state_sb.low_sumdiff)
+    x->sb_force_fixed_part = 0;
+
   if (!cpi->sf.rt_sf.use_rtc_tf || cpi->rc.high_source_sad ||
       cpi->rc.frame_source_sad > 20000 || cpi->svc.number_spatial_layers > 1)
     return;