aom: rtc: Palette mode for nonrd intra pickmode

From d97381f8656a8a16d9be29f89378bc7590bb7d02 Mon Sep 17 00:00:00 2001
From: Marco Paniconi <[EMAIL REDACTED]>
Date: Thu, 2 May 2024 11:35:58 -0700
Subject: [PATCH] rtc: Palette mode for nonrd intra pickmode

Add the palette mode to the nord intra pickmode,
and disable hybrid_intra_pickmode for screen mode
with speed >= 10. Increase the dist_thresh to test
paletter for speed 11.

This makes key frame encoding much faster for screen:
~2x faster with little quality loss.

Bug: b/337757868

Change-Id: Iffc5f6a83615d7901e90917f6adb586d89a24879
---
 av1/encoder/nonrd_pickmode.c | 36 ++++++++++++++++++++++++++++++++++++
 av1/encoder/speed_features.c |  1 +
 test/svc_datarate_test.cc    |  6 +++---
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index dcaf13f1d9..6a734cbd1c 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1648,6 +1648,42 @@ void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
     }
   }
 
+  const int64_t thresh_dist = cpi->sf.rt_sf.prune_palette_nonrd ? 80000 : 20000;
+  const int64_t best_dist_norm = best_rdc.dist >> (b_width_log2_lookup[bsize] +
+                                                   b_height_log2_lookup[bsize]);
+
+  // Try palette if it's enabled.
+  bool try_palette =
+      best_dist_norm > thresh_dist && cpi->oxcf.tool_cfg.enable_palette &&
+      bsize <= BLOCK_16X16 && x->source_variance > 200 &&
+      av1_allow_palette(cpi->common.features.allow_screen_content_tools,
+                        mi->bsize);
+  if (try_palette) {
+    const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
+    const unsigned int intra_ref_frame_cost = 0;
+    // Search palette mode for Luma plane in intra frame.
+    av1_search_palette_mode_luma(cpi, x, bsize, intra_ref_frame_cost, ctx,
+                                 &this_rdc, best_rdc.rdcost);
+    // Update best mode data.
+    if (this_rdc.rdcost < best_rdc.rdcost &&
+        this_rdc.rate < (3 * (best_rdc.rate >> 1))) {
+      best_mode = DC_PRED;
+      mi->mv[0].as_int = INVALID_MV;
+      mi->mv[1].as_int = INVALID_MV;
+      best_rdc.rate = this_rdc.rate;
+      best_rdc.dist = this_rdc.dist;
+      best_rdc.rdcost = this_rdc.rdcost;
+      if (!this_rdc.skip_txfm) {
+        memcpy(ctx->blk_skip, txfm_info->blk_skip,
+               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
+      }
+      if (xd->tx_type_map[0] != DCT_DCT)
+        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
+    } else {
+      av1_zero(mi->palette_mode_info);
+    }
+  }
+
   mi->mode = best_mode;
   // Keep DC for UV since mode test is based on Y channel only.
   mi->uv_mode = UV_DC_PRED;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index b0ab7feb3b..25c0b3af41 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1578,6 +1578,7 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
       sf->rt_sf.part_early_exit_zeromv = 1;
       sf->rt_sf.nonrd_aggressive_skip = 1;
       sf->rt_sf.thresh_active_maps_skip_lf_cdef = 90;
+      sf->rt_sf.hybrid_intra_pickmode = 0;
     }
     if (speed >= 11) {
       sf->rt_sf.skip_lf_screen = 2;
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index cc3fb674b3..28f795cf2a 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -986,7 +986,7 @@ class DatarateTestSVC
 
     ::libaom_test::Y4mVideoSource video("screendata.y4m", 0, 60);
 
-    const int bitrate_array[2] = { 800, 1200 };
+    const int bitrate_array[2] = { 1000, 1500 };
     cfg_.rc_target_bitrate = bitrate_array[GET_PARAM(4)];
     ResetModel();
     screen_mode_ = 1;
@@ -997,9 +997,9 @@ class DatarateTestSVC
     target_layer_bitrate_[2] = cfg_.rc_target_bitrate;
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
     for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
-      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.50)
+      ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.40)
           << " The datarate for the file is lower than target by too much!";
-      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.7)
+      ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 2.0)
           << " The datarate for the file is greater than target by too much!";
     }
     // Top temporal layers are non_reference, so exlcude them from