aom: rtc: Speedup for active_maps with screen (fb613)

From fb61366f849ae9259e39a9bc8caf33a5aac7d3c9 Mon Sep 17 00:00:00 2001
From: Marco Paniconi <[EMAIL REDACTED]>
Date: Wed, 20 Mar 2024 14:52:44 -0700
Subject: [PATCH] rtc: Speedup for active_maps with screen

-Disable loopfilter and cdef based at frame-level
 based on percent of inactive blocks.
-Remove source_variance calculation for some blocks
 labelled as seg_skip (inactive via active_maps), mainly
 this is for blocks on the boundary of active/inactive.
-Force cdef to always skip for blocks labelled as
 inactive (seg_skip=1).
-Allow skip_over4x4 to be enabled

This change is not bitexact, but has small quality
difference. Speedup ~3% in offline test.

Change-Id: Ia0ae3788f54c436dced5cbba3aa98fcda0287ff1
(cherry picked from commit 23d4875b813d4d1e7fb1e1a94a129a9c606a481c)
---
 av1/encoder/aq_cyclicrefresh.c | 10 +++++++++-
 av1/encoder/encoder.c          |  5 ++++-
 av1/encoder/partition_search.c | 30 ++++++++++++++++++------------
 av1/encoder/speed_features.c   |  2 ++
 av1/encoder/speed_features.h   |  4 ++++
 5 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 11b6ea629..73357eb07 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -179,6 +179,10 @@ void av1_cyclic_reset_segment_skip(const AV1_COMP *cpi, MACROBLOCK *const x,
         memset(&cm->cur_frame->seg_map[map_offset], segment_id, xmis);
       }
     }
+  } else if (prev_segment_id == AM_SEGMENT_ID_INACTIVE) {
+    // TODO(marpan): Look into why this condition is needed
+    // (when skip_over4x4 = 1) to prevent decoder failure.
+    mbmi->segment_id = 0;
   }
   if (!dry_run) {
     if (cyclic_refresh_segment_id(prev_segment_id) == CR_SEGMENT_ID_BOOST1)
@@ -434,7 +438,7 @@ void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
   // function av1_cyclic_reset_segment_skip(). Skipping over
   // 4x4 will therefore have small bdrate loss (~0.2%), so
   // we use it only for speed > 9 for now.
-  cr->skip_over4x4 = (cpi->oxcf.speed > 9 && !cpi->active_map.enabled) ? 1 : 0;
+  cr->skip_over4x4 = (cpi->oxcf.speed > 9) ? 1 : 0;
 
   // should we enable cyclic refresh on this frame.
   cr->apply_cyclic_refresh = 1;
@@ -668,6 +672,10 @@ void av1_cyclic_refresh_reset_resize(AV1_COMP *const cpi) {
 int av1_cyclic_refresh_disable_lf_cdef(AV1_COMP *const cpi) {
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   const int qindex = cpi->common.quant_params.base_qindex;
+  if (cpi->active_map.enabled &&
+      cpi->rc.percent_blocks_inactive >
+          cpi->sf.rt_sf.thresh_active_maps_skip_lf_cdef)
+    return 1;
   if (cpi->rc.frames_since_key > 30 && cr->percent_refresh > 0 &&
       cr->counter_encode_maxq_scene_change > 300 / cr->percent_refresh &&
       cpi->rc.frame_source_sad < 1000 &&
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c9605dca6..d550bb10d 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2394,7 +2394,10 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
 
   const int use_loopfilter =
       is_loopfilter_used(cm) && !cpi->mt_info.pipeline_lpf_mt_with_enc;
-  const int use_cdef = is_cdef_used(cm);
+  const int use_cdef =
+      is_cdef_used(cm) && (!cpi->active_map.enabled ||
+                           cpi->rc.percent_blocks_inactive <=
+                               cpi->sf.rt_sf.thresh_active_maps_skip_lf_cdef);
   const int use_superres = av1_superres_scaled(cm);
   const int use_restoration = is_restoration_used(cm);
 
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index cef339757..61d49a23f 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -2255,6 +2255,8 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
   const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode;
   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   int i;
+  const int seg_skip =
+      segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
 
   // This is only needed for real time/allintra row-mt enabled multi-threaded
   // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF.
@@ -2277,15 +2279,17 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
   }
   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
 
-  x->force_zeromv_skip_for_blk =
-      get_force_zeromv_skip_flag_for_blk(cpi, x, bsize);
+  if (!seg_skip) {
+    x->force_zeromv_skip_for_blk =
+        get_force_zeromv_skip_flag_for_blk(cpi, x, bsize);
 
-  // Source variance may be already compute at superblock level, so no need
-  // to recompute, unless bsize < sb_size or source_variance is not yet set.
-  if (!x->force_zeromv_skip_for_blk &&
-      (x->source_variance == UINT_MAX || bsize < cm->seq_params->sb_size))
-    x->source_variance = av1_get_perpixel_variance_facade(
-        cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
+    // Source variance may be already compute at superblock level, so no need
+    // to recompute, unless bsize < sb_size or source_variance is not yet set.
+    if (!x->force_zeromv_skip_for_blk &&
+        (x->source_variance == UINT_MAX || bsize < cm->seq_params->sb_size))
+      x->source_variance = av1_get_perpixel_variance_facade(
+          cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
+  }
 
   // Save rdmult before it might be changed, so it can be restored later.
   const int orig_rdmult = x->rdmult;
@@ -2306,7 +2310,7 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
 #if CONFIG_COLLECT_COMPONENT_TIMING
     start_timing(cpi, nonrd_pick_inter_mode_sb_time);
 #endif
-    if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    if (seg_skip) {
       x->force_zeromv_skip_for_blk = 1;
       // TODO(marpan): Consider adding a function for nonrd:
       // av1_nonrd_pick_inter_mode_sb_seg_skip(), instead of setting
@@ -2320,10 +2324,12 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
   if (cpi->sf.rt_sf.skip_cdef_sb) {
     // cdef_strength is initialized to 1 which means skip_cdef, and is updated
     // here. Check to see is skipping cdef is allowed.
+    // Always allow cdef_skip for seg_skip = 1.
     const int allow_cdef_skipping =
-        cpi->rc.frames_since_key > 10 && !cpi->rc.high_source_sad &&
-        !(x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] ||
-          x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)]);
+        seg_skip ||
+        (cpi->rc.frames_since_key > 10 && !cpi->rc.high_source_sad &&
+         !(x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] ||
+           x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)]));
 
     // Find the corresponding 64x64 block. It'll be the 128x128 block if that's
     // the block size.
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index f7242f8f9..256b6fc9e 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1577,6 +1577,7 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
       sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
       sf->rt_sf.part_early_exit_zeromv = 1;
       sf->rt_sf.nonrd_aggressive_skip = 1;
+      sf->rt_sf.thresh_active_maps_skip_lf_cdef = 90;
     }
     if (speed >= 11) {
       sf->rt_sf.skip_lf_screen = 2;
@@ -2275,6 +2276,7 @@ static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
   rt_sf->part_early_exit_zeromv = 0;
   rt_sf->sse_early_term_inter_search = EARLY_TERM_DISABLED;
   rt_sf->skip_lf_screen = 0;
+  rt_sf->thresh_active_maps_skip_lf_cdef = 100;
   rt_sf->sad_based_adp_altref_lag = 0;
   rt_sf->partition_direct_merging = 0;
   rt_sf->var_part_based_on_qidx = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index ef93e1d7d..d59cb38a7 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1774,6 +1774,10 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // where rc->high_source_sad = 0 (no slide-changes).
   int skip_lf_screen;
 
+  // Threshold on the active/inactive region percent to disable
+  // the loopfilter and cdef. Setting to 100 disables this feature.
+  int thresh_active_maps_skip_lf_cdef;
+
   // For nonrd: early exit out of variance partition that sets the
   // block size to superblock size, and sets mode to zeromv-last skip.
   // 0: disabled