aom: Improvements to VBR rate accuracy trade offs.

From 59c592bb8b5c69bc29746c78d7399e15cf272683 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <[EMAIL REDACTED]>
Date: Mon, 21 Aug 2023 16:06:16 +0100
Subject: [PATCH] Improvements to VBR rate accuracy trade offs.

The VBR rate control mechanism relies on an estimate of
the Q required to generate a target number of bits.

Better metrics are achieved for a clip if the Q is constrained
within a limited range so the encoder tries to estimate
a suitable min / max q range. However, if the initial estimate
is poor the clip may substantially overshoot or undershoot
the target range.

There are two main adjustment mechanisms to deal with
such cases.

The first allowed for some limited adjustment of the Q range
and the heuristics used to estimate Q after each ARF group.

The second allows for adjustments to the min and max Q
each frame but is only activated if the accumulated error
exceeds a given threshold.  If this overshoot / undershoot
percentage is set to 100% on the command line, this second
stage is disabled.

This patch seeks to improve (on average) the accuracy of the
initial Q to rate estimate and to allow more adjustment via
the first (non thresholded) mechanism.

The secondary mechanism is left in place but acts more
slowly and is more limited in terms of the maximum amount
by which the min or max Q can be adjusted.

The net effect of these changes is a huge improvement in
the baseline rate accuracy and the rate accuracy / metrics trade off
when the overshoot and undershoot threshold is set to 100 or to
higher values such as 75% or 50%.

Results below show the bdrate change (-ve better) for psnr and vmaf
followed by change in average absolute rate error. This last
number is a absolute change in the % error not a % change of % error.
More detailed results and explanation are given in a seperate
document.

Old code threshold 100% vs 75% (rate error reduced, but metrics
much worse)

Low Res opsnr 1.282%  Vmaf 1.949%  (abs rate error -4.088%)
Ugc 360 opsnr 2.308%  Vmaf 2.474%  (abs rate error -4.817%)
Mid Res opsnr 1.362%  Vmaf 1.314%  (abs rate error -4.210%)
Hd Res  opsnr 2.637%  Vmaf 3.984%  (abs rate error -7.369%)

New code threshold 100% vs old code 100%

Low Res opsnr 0.113%  Vmaf -0.324%  (abs rate error -6.888%)
Ugc 360 opsnr 0.084%  Vmaf -0.492%  (abs rate error -4.121%)
Mid Res opsnr -0.832%  Vmaf -0.937%  (abs rate error -2.772%)
Hd Res  opsnr 0.521%  Vmaf -0.732%  (abs rate error -8.547%)

In summary the worst case (threshold 100%) rate accuracy
with the new patch is as good as the 75% threshold case
previously but instead of a metrics hit of circa 1-4% the metrics
are broadly comparable for opsnr and improved for Vmaf.

Change to level monitoring unit tests to be over one second
(30 frames) instead of 40 as this is the key frame interval for
the tests. Otherwise the level assessment is just done on the
average rate over the last 10 frames. Also change level
compliance test to <= rather than ==.

STATS_CHANGED

Change-Id: I4dacd7c62bb55289fe0cd3d46c455dc90f48e9d9
---
 av1/encoder/pass2_strategy.c | 97 +++++++++++++-----------------------
 av1/encoder/ratectrl.c       |  8 +--
 test/level_test.cc           | 14 +++---
 3 files changed, 45 insertions(+), 74 deletions(-)

diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 40d9b7843b..bd8620c2be 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -158,28 +158,12 @@ static int frame_max_bits(const RATE_CONTROL *rc,
   return (int)max_bits;
 }
 
-static const double q_pow_term[(QINDEX_RANGE >> 5) + 1] = { 0.65, 0.70, 0.75,
-                                                            0.80, 0.85, 0.90,
-                                                            0.95, 0.95, 0.95 };
-#define ERR_DIVISOR 96.0
-static double calc_correction_factor(double err_per_mb, int q) {
-  const double error_term = err_per_mb / ERR_DIVISOR;
-  const int index = q >> 5;
-  // Adjustment to power term based on qindex
-  const double power_term =
-      q_pow_term[index] +
-      (((q_pow_term[index + 1] - q_pow_term[index]) * (q % 32)) / 32.0);
-  assert(error_term >= 0.0);
-  return fclamp(pow(error_term, power_term), 0.05, 5.0);
-}
-
 // Based on history adjust expectations of bits per macroblock.
 static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
   TWO_PASS *const twopass = &cpi->ppi->twopass;
   const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
 
   // Based on recent history adjust expectations of bits per macroblock.
-  double damp_fac = AOMMAX(5.0, rate_err_tol / 10.0);
   double rate_err_factor = 1.0;
   const double adj_limit = AOMMAX(0.2, (double)(100 - rate_err_tol) / 200.0);
   const double min_fac = 1.0 - adj_limit;
@@ -214,9 +198,7 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
   }
 
   int err_estimate = p_rc->rate_error_estimate;
-  int64_t bits_left = twopass->bits_left;
   int64_t total_actual_bits = p_rc->total_actual_bits;
-  int64_t bits_off_target = p_rc->vbr_bits_off_target;
   double rolling_arf_group_actual_bits =
       (double)twopass->rolling_arf_group_actual_bits;
   double rolling_arf_group_target_bits =
@@ -231,10 +213,6 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
           : 0;
   total_actual_bits = simulate_parallel_frame ? p_rc->temp_total_actual_bits
                                               : p_rc->total_actual_bits;
-  bits_off_target = simulate_parallel_frame ? p_rc->temp_vbr_bits_off_target
-                                            : p_rc->vbr_bits_off_target;
-  bits_left =
-      simulate_parallel_frame ? p_rc->temp_bits_left : twopass->bits_left;
   rolling_arf_group_target_bits =
       (double)(simulate_parallel_frame
                    ? p_rc->temp_rolling_arf_group_target_bits
@@ -247,21 +225,21 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
                                          : p_rc->rate_error_estimate;
 #endif
 
-  if (p_rc->bits_off_target && total_actual_bits > 0) {
-    if (cpi->ppi->lap_enabled) {
-      rate_err_factor = rolling_arf_group_actual_bits /
-                        DOUBLE_DIVIDE_CHECK(rolling_arf_group_target_bits);
+  if ((p_rc->bits_off_target && total_actual_bits > 0) &&
+      (rolling_arf_group_target_bits >= 1.0)) {
+    if (rolling_arf_group_actual_bits > rolling_arf_group_target_bits) {
+      double error_fraction =
+          (rolling_arf_group_actual_bits - rolling_arf_group_target_bits) /
+          rolling_arf_group_target_bits;
+      error_fraction = (error_fraction > 1.0) ? 1.0 : error_fraction;
+      rate_err_factor = 1.0 + error_fraction;
     } else {
-      rate_err_factor = 1.0 - ((double)(bits_off_target) /
-                               AOMMAX(total_actual_bits, bits_left));
+      double error_fraction =
+          (rolling_arf_group_target_bits - rolling_arf_group_actual_bits) /
+          rolling_arf_group_target_bits;
+      rate_err_factor = 1.0 - error_fraction;
     }
 
-    // Adjustment is damped if this is 1 pass with look ahead processing
-    // (as there are only ever a few frames of data) and for all but the first
-    // GOP in normal two pass.
-    if ((twopass->bpm_factor != 1.0) || cpi->ppi->lap_enabled) {
-      rate_err_factor = 1.0 + ((rate_err_factor - 1.0) / damp_fac);
-    }
     rate_err_factor = AOMMAX(min_fac, AOMMIN(max_fac, rate_err_factor));
   }
 
@@ -270,36 +248,38 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
   if ((rate_err_factor < 1.0 && err_estimate >= 0) ||
       (rate_err_factor > 1.0 && err_estimate <= 0)) {
     twopass->bpm_factor *= rate_err_factor;
-    if (rate_err_tol >= 100) {
-      twopass->bpm_factor =
-          AOMMAX(min_fac, AOMMIN(max_fac, twopass->bpm_factor));
-    } else {
-      twopass->bpm_factor = AOMMAX(0.1, AOMMIN(10.0, twopass->bpm_factor));
-    }
+    twopass->bpm_factor = AOMMAX(min_fac, AOMMIN(max_fac, twopass->bpm_factor));
   }
 }
 
-static int qbpm_enumerator(int rate_err_tol) {
-  return 1200000 + ((300000 * AOMMIN(75, AOMMAX(rate_err_tol - 25, 0))) / 75);
+static const double q_div_term[(QINDEX_RANGE >> 5) + 1] = { 32.0, 40.0, 46.0,
+                                                            52.0, 56.0, 60.0,
+                                                            64.0, 68.0, 72.0 };
+#define EPMB_SCALER 1250000
+static double calc_correction_factor(double err_per_mb, int q) {
+  double power_term = 0.90;
+  const int index = q >> 5;
+  const double divisor =
+      q_div_term[index] +
+      (((q_div_term[index + 1] - q_div_term[index]) * (q % 32)) / 32.0);
+  double error_term = EPMB_SCALER * pow(err_per_mb, power_term);
+  return error_term / divisor;
 }
 
 // Similar to find_qindex_by_rate() function in ratectrl.c, but includes
 // calculation of a correction_factor.
 static int find_qindex_by_rate_with_correction(
     int desired_bits_per_mb, aom_bit_depth_t bit_depth, double error_per_mb,
-    double group_weight_factor, int rate_err_tol, int best_qindex,
-    int worst_qindex) {
+    double group_weight_factor, int best_qindex, int worst_qindex) {
   assert(best_qindex <= worst_qindex);
   int low = best_qindex;
   int high = worst_qindex;
 
   while (low < high) {
     const int mid = (low + high) >> 1;
-    const double mid_factor = calc_correction_factor(error_per_mb, mid);
+    const double q_factor = calc_correction_factor(error_per_mb, mid);
     const double q = av1_convert_qindex_to_q(mid, bit_depth);
-    const int enumerator = qbpm_enumerator(rate_err_tol);
-    const int mid_bits_per_mb =
-        (int)((enumerator * mid_factor * group_weight_factor) / q);
+    const int mid_bits_per_mb = (int)((q_factor * group_weight_factor) / q);
 
     if (mid_bits_per_mb > desired_bits_per_mb) {
       low = mid + 1;
@@ -359,8 +339,8 @@ static int get_twopass_worst_quality(AV1_COMP *cpi, const double av_frame_err,
     // content at the given rate.
     int q = find_qindex_by_rate_with_correction(
         target_norm_bits_per_mb, cpi->common.seq_params->bit_depth,
-        av_err_per_mb, cpi->ppi->twopass.bpm_factor, rate_err_tol,
-        rc->best_quality, rc->worst_quality);
+        av_err_per_mb, cpi->ppi->twopass.bpm_factor, rc->best_quality,
+        rc->worst_quality);
 
     // Restriction on active max q for constrained quality mode.
     if (rc_cfg->mode == AOM_CQ) q = AOMMAX(q, rc_cfg->cq_level);
@@ -4241,7 +4221,7 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
     int maxq_adj_limit;
     minq_adj_limit =
         (rc_cfg->mode == AOM_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT);
-    maxq_adj_limit = rc->worst_quality - rc->active_worst_quality;
+    maxq_adj_limit = (rc->worst_quality - rc->active_worst_quality);
 
     // Undershoot
     if ((rc_cfg->under_shoot_pct < 100) &&
@@ -4253,8 +4233,9 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
       if ((pct_error >= rc_cfg->under_shoot_pct) &&
           (p_rc->rate_error_estimate > 0)) {
         twopass->extend_minq += 1;
+        twopass->extend_maxq -= 1;
       }
-      twopass->extend_maxq -= 1;
+
       // Overshoot
     } else if ((rc_cfg->over_shoot_pct < 100) &&
                (p_rc->rolling_actual_bits > p_rc->rolling_target_bits)) {
@@ -4266,18 +4247,8 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
       if ((pct_error >= rc_cfg->over_shoot_pct) &&
           (p_rc->rate_error_estimate < 0)) {
         twopass->extend_maxq += 1;
+        twopass->extend_minq -= 1;
       }
-      twopass->extend_minq -= 1;
-    } else {
-      // Adjustment for extreme local overshoot.
-      // Only applies when normal adjustment above is not used (e.g.
-      // when threshold is set to 100).
-      if (rc->projected_frame_size > (2 * rc->base_frame_target) &&
-          rc->projected_frame_size > (2 * rc->avg_frame_bandwidth))
-        ++twopass->extend_maxq;
-      // Unwind extreme overshoot adjustment.
-      else if (p_rc->rolling_target_bits > p_rc->rolling_actual_bits)
-        --twopass->extend_maxq;
     }
     twopass->extend_minq =
         clamp(twopass->extend_minq, -minq_adj_limit, minq_adj_limit);
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 7defafcaae..c31651c972 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -1742,16 +1742,16 @@ static void adjust_active_best_and_worst_quality(const AV1_COMP *cpi,
       active_best_quality -= extend_minq;
       active_worst_quality += (extend_maxq / 2);
 #else
-      active_best_quality -= cpi->ppi->twopass.extend_minq / 4;
-      active_worst_quality += (cpi->ppi->twopass.extend_maxq / 2);
+      active_best_quality -= cpi->ppi->twopass.extend_minq / 8;
+      active_worst_quality += (cpi->ppi->twopass.extend_maxq / 4);
 #endif
     } else {
 #if CONFIG_FPMT_TEST
       active_best_quality -= extend_minq / 2;
       active_worst_quality += extend_maxq;
 #else
-      active_best_quality -= cpi->ppi->twopass.extend_minq / 4;
-      active_worst_quality += cpi->ppi->twopass.extend_maxq;
+      active_best_quality -= cpi->ppi->twopass.extend_minq / 8;
+      active_worst_quality += cpi->ppi->twopass.extend_maxq / 4;
 #endif
     }
   }
diff --git a/test/level_test.cc b/test/level_test.cc
index a7c26d2305..6d59f45272 100644
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -135,12 +135,12 @@ TEST_P(LevelTest, TestLevelMonitoringLowBitrate) {
   // To save run time, we only test speed 4.
   if (cpu_used_ == 4) {
     libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 40);
+                                       30, 1, 0, 30);
     target_level_ = kLevelKeepStats;
     cfg_.rc_target_bitrate = 1000;
-    cfg_.g_limit = 40;
+    cfg_.g_limit = 30;
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_EQ(level_[0], 0);
+    ASSERT_LE(level_[0], 0);
   }
 }
 
@@ -148,12 +148,12 @@ TEST_P(LevelTest, TestLevelMonitoringHighBitrate) {
   // To save run time, we only test speed 4.
   if (cpu_used_ == 4) {
     libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 40);
+                                       30, 1, 0, 30);
     target_level_ = kLevelKeepStats;
     cfg_.rc_target_bitrate = 4000;
-    cfg_.g_limit = 40;
+    cfg_.g_limit = 30;
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_EQ(level_[0], 4);
+    ASSERT_LE(level_[0], 4);
   }
 }
 
@@ -166,7 +166,7 @@ TEST_P(LevelTest, TestTargetLevel0) {
     target_level_ = target_level;
     cfg_.rc_target_bitrate = 4000;
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_EQ(level_[0], target_level);
+    ASSERT_LE(level_[0], target_level);
   }
 }