aom: Merge tag 'v3.8.2' into main

From fa4304274a644acfbb87c8d67631e664e3f06cad Mon Sep 17 00:00:00 2001
From: Cheng Chen <[EMAIL REDACTED]>
Date: Mon, 22 Jan 2024 16:59:05 -0800
Subject: [PATCH 01/12] Zero initialize an array in cdef search

This array was not initialized.

The array is supposed to store the best corresponding cdef filter
strength. And its value is an index of another array.

Without the initialization, this might lead to out of bound access
to the other array.

Change-Id: Id65c448f479865992ada0b09ff6e97b2d2ac3362
(cherry picked from commit 6fcad835fbd1cba379f34ecb96755888d21a98df)
---
 av1/encoder/pickcdef.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 232a2f9edb..ed5fa55f17 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -894,7 +894,7 @@ void av1_cdef_search(AV1_COMP *cpi) {
   int rdmult = cpi->td.mb.rdmult;
   for (int i = 0; i <= 3; i++) {
     if (i > max_signaling_bits) break;
-    int best_lev0[CDEF_MAX_STRENGTHS];
+    int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
     int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
     const int nb_strengths = 1 << i;
     uint64_t tot_mse;

From 0a23e12394843688df13f8ff5121c26424e5eb7a Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Tue, 23 Jan 2024 16:58:32 +0000
Subject: [PATCH 02/12] Fix divide by zero.

Raised in issue Issue # 3523:

Explicitly check p_rc->rolling_target_bits > 0 for the identified case.

Also:-

When initializing  p_rc->rolling_actual_bits and
p_rc->rolling_target_bits force to max(1,x).

In the encoder interface range check target rate > 0.

Bug: aomedia:3523
Change-Id: If7ba7e98af627227498443c2e9a739c65ec8d68b
(cherry picked from commit 77cf417565ad2c527d5c351927f11db3764fd93c)
---
 av1/av1_cx_iface.c           | 6 ++++++
 av1/encoder/pass2_strategy.c | 3 ++-
 av1/encoder/ratectrl.c       | 8 ++++----
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 1175a32ef6..0ad6f439e4 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -623,6 +623,11 @@ static aom_codec_err_t allocate_and_set_string(const char *src,
       ERROR(#memb " out of range [" #lo ".." #hi "]"); \
   } while (0)
 
+#define RANGE_CHECK_LO(p, memb, lo)                                     \
+  do {                                                                  \
+    if (!((p)->memb >= (lo))) ERROR(#memb " out of range [" #lo "..]"); \
+  } while (0)
+
 #define RANGE_CHECK_HI(p, memb, hi)                                     \
   do {                                                                  \
     if (!((p)->memb <= (hi))) ERROR(#memb " out of range [.." #hi "]"); \
@@ -662,6 +667,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
   RANGE_CHECK_HI(cfg, g_profile, MAX_PROFILES - 1);
 
+  RANGE_CHECK_LO(cfg, rc_target_bitrate, 1);
   RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
   RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
   RANGE_CHECK_BOOL(extra_cfg, lossless);
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index d85440df5e..68b10567b4 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -4228,7 +4228,8 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
   twopass->kf_group_bits = AOMMAX(twopass->kf_group_bits, 0);
 
   // If the rate control is drifting consider adjustment to min or maxq.
-  if ((rc_cfg->mode != AOM_Q) && !cpi->rc.is_src_frame_alt_ref) {
+  if ((rc_cfg->mode != AOM_Q) && !cpi->rc.is_src_frame_alt_ref &&
+      (p_rc->rolling_target_bits > 0)) {
     int minq_adj_limit;
     int maxq_adj_limit;
     minq_adj_limit =
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 9062136aad..1f1ff81386 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -404,10 +404,10 @@ void av1_primary_rc_init(const AV1EncoderConfig *oxcf,
   p_rc->rate_correction_factors[KF_STD] = 1.0;
   p_rc->bits_off_target = p_rc->starting_buffer_level;
 
-  p_rc->rolling_target_bits =
-      (int)(oxcf->rc_cfg.target_bandwidth / oxcf->input_cfg.init_framerate);
-  p_rc->rolling_actual_bits =
-      (int)(oxcf->rc_cfg.target_bandwidth / oxcf->input_cfg.init_framerate);
+  p_rc->rolling_target_bits = AOMMAX(
+      1, (int)(oxcf->rc_cfg.target_bandwidth / oxcf->input_cfg.init_framerate));
+  p_rc->rolling_actual_bits = AOMMAX(
+      1, (int)(oxcf->rc_cfg.target_bandwidth / oxcf->input_cfg.init_framerate));
 }
 
 void av1_rc_init(const AV1EncoderConfig *oxcf, RATE_CONTROL *rc) {

From e99100ea09e6b7079ffd667013e22729c028b271 Mon Sep 17 00:00:00 2001
From: Marco Paniconi <marpan@google.com>
Date: Thu, 1 Feb 2024 19:36:32 +0000
Subject: [PATCH 03/12] Disable the check on 0 target_bitrate in
 validate_config

This is causing failures in latest libaom roll here:
https://webrtc-review.googlesource.com/c/src/+/337284

That check was added as part of this CL:
https://aomedia-review.googlesource.com/c/aom/+/186601

Revert that part for now to enable roll to proceed.

Some webrtc tests are creating the encoder with 0 target_bitrate
and then assigning non-zero bitrate before encoding the first
frames, so the check in validate_config() was causing
InitEncode to fail.

Bug: aomedia:3523

Change-Id: I8462ffba1d80219272d6225e572e047184337cd7
(cherry picked from commit 0cee19cfc8b69661a4c808624d36def44450f14e)
---
 av1/av1_cx_iface.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 0ad6f439e4..1175a32ef6 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -623,11 +623,6 @@ static aom_codec_err_t allocate_and_set_string(const char *src,
       ERROR(#memb " out of range [" #lo ".." #hi "]"); \
   } while (0)
 
-#define RANGE_CHECK_LO(p, memb, lo)                                     \
-  do {                                                                  \
-    if (!((p)->memb >= (lo))) ERROR(#memb " out of range [" #lo "..]"); \
-  } while (0)
-
 #define RANGE_CHECK_HI(p, memb, hi)                                     \
   do {                                                                  \
     if (!((p)->memb <= (hi))) ERROR(#memb " out of range [.." #hi "]"); \
@@ -667,7 +662,6 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
   RANGE_CHECK_HI(cfg, g_profile, MAX_PROFILES - 1);
 
-  RANGE_CHECK_LO(cfg, rc_target_bitrate, 1);
   RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
   RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
   RANGE_CHECK_BOOL(extra_cfg, lossless);

From 80d175034c13f910002818c26f07f59f46d17336 Mon Sep 17 00:00:00 2001
From: Wan-Teh Chang <wtc@google.com>
Date: Mon, 8 Jan 2024 17:05:47 -0800
Subject: [PATCH 04/12] Fix over reads in aom_convolve_copy_neon()

Bug: aomedia:3535
Bug: b:317646516
Change-Id: Id5502ec6acb2e8813e605bbd2ba8c879418ccf9e
(cherry picked from commit e2ba9f09f2003da0a8117f4e5f2d6ab537dba650)
---
 aom_dsp/arm/aom_convolve_copy_neon.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/aom_dsp/arm/aom_convolve_copy_neon.c b/aom_dsp/arm/aom_convolve_copy_neon.c
index d746f9e4d8..325d6f29ff 100644
--- a/aom_dsp/arm/aom_convolve_copy_neon.c
+++ b/aom_dsp/arm/aom_convolve_copy_neon.c
@@ -9,6 +9,7 @@
  */
 
 #include <arm_neon.h>
+#include <string.h>
 
 #include "config/aom_dsp_rtcd.h"
 
@@ -38,13 +39,13 @@ void aom_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
     }
   } else if (!(w & 0x03)) {
     for (y = 0; y < h; ++y) {
-      vst1_lane_u32((uint32_t *)(dst), vreinterpret_u32_u8(vld1_u8(src)), 0);
+      memcpy(dst, src, sizeof(uint32_t));
       src += src_stride;
       dst += dst_stride;
     }
   } else if (!(w & 0x01)) {
     for (y = 0; y < h; ++y) {
-      vst1_lane_u16((uint16_t *)(dst), vreinterpret_u16_u8(vld1_u8(src)), 0);
+      memcpy(dst, src, sizeof(uint16_t));
       src += src_stride;
       dst += dst_stride;
     }

From 5f16a838d3f2236d2473d95d78e882f6d89f81bc Mon Sep 17 00:00:00 2001
From: George Steed <george.steed@arm.com>
Date: Wed, 7 Feb 2024 15:35:27 +0000
Subject: [PATCH 05/12] cpu.cmake: Fix AArch64 compiler flag tests

The existing check_c_compiler_flag test uses a regex internally to match
against common error message strings in stderr from the compiler,
however this does not match the "invalid feature modifier" error that is
emitted by certain versions of GCC. This leads to the feature being
incorrectly enabled only to then fail to compile the library later.

To get around this, use the aom_check_source_compiles helper routine to
compile a trivial program with the flag instead since this does not
suffer the same problems and correctly identifies the features as being
not available.

Bug: aomedia:3543
Change-Id: I072281b2d3e986ee859ff8268bfad7a0fce3fd4c
(cherry picked from commit faab48a3ab2a6b0061ff98b175032532e413b8b2)
---
 build/cmake/cpu.cmake | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
index a9b7a67070..bd13d035d5 100644
--- a/build/cmake/cpu.cmake
+++ b/build/cmake/cpu.cmake
@@ -26,8 +26,19 @@ if("${AOM_TARGET_CPU}" STREQUAL "arm64")
   foreach(flavor ${ARM64_FLAVORS})
     if(ENABLE_${flavor} AND NOT DEFINED AOM_${flavor}_FLAG)
       set(AOM_${flavor}_FLAG "${AOM_${flavor}_DEFAULT_FLAG}")
+      string(TOLOWER "${flavor}" flavor_lower)
+
+      # Do not use check_c_compiler_flag here since the regex used to match
+      # against stderr does not recognise the "invalid feature modifier" error
+      # produced by certain versions of GCC, leading to the feature being
+      # incorrectly marked as available.
+      set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+      set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AOM_${flavor}_FLAG}")
       unset(FLAG_SUPPORTED)
-      check_c_compiler_flag("${AOM_${flavor}_FLAG}" FLAG_SUPPORTED)
+      aom_check_source_compiles("arm_feature_flag_${flavor_lower}_available"
+                                "static void function(void) {}" FLAG_SUPPORTED)
+      set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
+
       if(NOT ${FLAG_SUPPORTED})
         set(ENABLE_${flavor} 0)
       endif()

From 9972ec4bdfbf4412b3ecafc9f76fc582080afbf8 Mon Sep 17 00:00:00 2001
From: George Steed <george.steed@arm.com>
Date: Wed, 7 Feb 2024 15:38:54 +0000
Subject: [PATCH 06/12] aarch64_cpudetect.c: Don't read AT_HWCAPs if they are
 not needed

If FEAT_I8MM is not supported by the compiler then we do not test the
value from getauxval(AT_HWCAP2), leading to an unused variable warning.

To suppress this, only read the variable if we are going to use it,
which for now is only to check FEAT_I8MM. Also do similarly for AT_HWCAP
to match.

Bug: aomedia:3543
Change-Id: I855c255105cbfc448f64df317507dd9184731479
(cherry picked from commit 2036fbe2d8449c1054b4fb8bdf22c78f92655afc)
---
 aom_ports/aarch64_cpudetect.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/aom_ports/aarch64_cpudetect.c b/aom_ports/aarch64_cpudetect.c
index 43d5a149c8..13299a688e 100644
--- a/aom_ports/aarch64_cpudetect.c
+++ b/aom_ports/aarch64_cpudetect.c
@@ -108,8 +108,13 @@ static int arm_get_cpu_caps(void) {
 
 static int arm_get_cpu_caps(void) {
   int flags = 0;
+#if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
   unsigned long hwcap = getauxval(AT_HWCAP);
+#endif
+#if HAVE_NEON_I8MM
   unsigned long hwcap2 = getauxval(AT_HWCAP2);
+#endif
+
 #if HAVE_NEON
   flags |= HAS_NEON;  // Neon is mandatory in Armv8.0-A.
 #endif  // HAVE_NEON

From fe8b483e32d2f2fd7c9768b638be22cdf19095af Mon Sep 17 00:00:00 2001
From: George Steed <george.steed@arm.com>
Date: Wed, 7 Feb 2024 15:40:34 +0000
Subject: [PATCH 07/12] mem_neon.h: Define vld1q_u16_x4 until GCC 8.5.0

When trying to build the library with GCC 8.3.0 the vld1q_u16_x4
intrinsic is not available, leading to the code failing to compile.

We already provide a definition for this helper for earlier versions of
GCC, so adjust the checks so we also provide it until GCC 8.5.0 instead.

Bug: aomedia:3543
Change-Id: I98a32ae6abd068f326a1075dd6782b190e2eac1d
(cherry picked from commit 0b2a8639c2aef7494d333fae47b3817d3c08e3d4)
---
 aom_dsp/arm/mem_neon.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/aom_dsp/arm/mem_neon.h b/aom_dsp/arm/mem_neon.h
index d1ac648d1e..b86397f3bb 100644
--- a/aom_dsp/arm/mem_neon.h
+++ b/aom_dsp/arm/mem_neon.h
@@ -56,17 +56,10 @@ static INLINE uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
 
 #elif defined(__GNUC__) && !defined(__clang__)  // GCC 64-bit.
 #if __GNUC__ < 8
-
 static INLINE uint8x16x2_t vld1q_u8_x2(const uint8_t *ptr) {
   uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };
   return res;
 }
-
-static INLINE uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
-  uint16x8x4_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8),
-                         vld1q_u16(ptr + 2 * 8), vld1q_u16(ptr + 3 * 8) } };
-  return res;
-}
 #endif  // __GNUC__ < 8
 
 #if __GNUC__ < 9
@@ -76,6 +69,15 @@ static INLINE uint8x16x3_t vld1q_u8_x3(const uint8_t *ptr) {
   return res;
 }
 #endif  // __GNUC__ < 9
+
+// vld1q_u16_x4 is defined from GCC 8.5.0 and onwards.
+#if ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
+static INLINE uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
+  uint16x8x4_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8),
+                         vld1q_u16(ptr + 2 * 8), vld1q_u16(ptr + 3 * 8) } };
+  return res;
+}
+#endif  // ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
 #endif  // defined(__GNUC__) && !defined(__clang__)
 
 static INLINE void store_u8_8x2(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,

From bff87d33a9ac9ffe08f679beea2b565bd1b2a388 Mon Sep 17 00:00:00 2001
From: Wan-Teh Chang <wtc@google.com>
Date: Wed, 28 Feb 2024 16:16:21 -0800
Subject: [PATCH 08/12] Redo multiply in update_a_sep_sym/update_b_sep_sym

Implement an alternative approach to avoid integer overflows in the last
multiplication in update_a_sep_sym() and update_b_sep_sym(). This
approach does not need data-dependent scaling.

Bug: b:319140742
Bug: oss-fuzz:66474
Change-Id: I1d0e8f092a9c8f3fb775be05931c652aa93bca3b
(cherry picked from commit 835fc058d0dbaa2085c87b5a60e50afd30927c80)
---
 av1/encoder/pickrst.c |  75 ++++-----
 test/wiener_test.cc   | 382 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 413 insertions(+), 44 deletions(-)

diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 6429064175..6753c9edc6 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -1103,6 +1103,17 @@ static INLINE int wrap_index(int i, int wiener_win) {
   return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i);
 }
 
+// Calculates x * w / WIENER_TAP_SCALE_FACTOR. The multiplication may overflow,
+// so we do the multiplication by components and combine it with the division.
+static INLINE int64_t multiply_and_scale(int64_t x, int32_t w) {
+  // Let w = w1 * WIENER_TAP_SCALE_FACTOR + w2
+  const int32_t w1 = w / WIENER_TAP_SCALE_FACTOR;
+  const int32_t w2 = w - w1 * WIENER_TAP_SCALE_FACTOR;
+  // Let y = x * w / WIENER_TAP_SCALE_FACTOR
+  const int64_t y = x * w1 + x * w2 / WIENER_TAP_SCALE_FACTOR;
+  return y;
+}
+
 // Solve linear equations to find Wiener filter tap values
 // Taps are output scaled by WIENER_FILT_STEP
 static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b,
@@ -1190,16 +1201,6 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
     }
   }
 
-  // b/274668506: This is the dual branch for the issue in b/272139363. The fix
-  // is similar. See comments in update_b_sep_sym() below.
-  int32_t max_b_l = 0;
-  for (int l = 0; l < wiener_win; ++l) {
-    const int32_t abs_b_l = abs(b[l]);
-    if (abs_b_l > max_b_l) max_b_l = abs_b_l;
-  }
-  const int scale_threshold = 128 * WIENER_TAP_SCALE_FACTOR;
-  const int scaler = max_b_l < scale_threshold ? 1 : 4;
-
   for (i = 0; i < wiener_win; i++) {
     for (j = 0; j < wiener_win; j++) {
       int k, l;
@@ -1207,10 +1208,16 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
         const int kk = wrap_index(k, wiener_win);
         for (l = 0; l < wiener_win; ++l) {
           const int ll = wrap_index(l, wiener_win);
-          B[ll * wiener_halfwin1 + kk] +=
-              Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
-              (scaler * WIENER_TAP_SCALE_FACTOR) * b[j] /
-              (WIENER_TAP_SCALE_FACTOR / scaler);
+          // Calculate
+          // B[ll * wiener_halfwin1 + kk] +=
+          //    Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
+          //    WIENER_TAP_SCALE_FACTOR * b[j] / WIENER_TAP_SCALE_FACTOR;
+          //
+          // The last multiplication may overflow, so we combine the last
+          // multiplication with the last division.
+          const int64_t x = Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] /
+                            WIENER_TAP_SCALE_FACTOR;
+          B[ll * wiener_halfwin1 + kk] += multiply_and_scale(x, b[j]);
         }
       }
     }
@@ -1261,32 +1268,6 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
     }
   }
 
-  // b/272139363: The computation,
-  //   Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
-  //          WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR;
-  // may generate a signed-integer-overflow. Conditionally scale the terms to
-  // avoid a potential overflow.
-  //
-  // Hc contains accumulated correlation statistics and it is desired to leave
-  // as much room as possible for Hc. It was experimentally observed that the
-  // primary issue manifests itself with the second, a[l], multiply. For
-  // max_a_l < WIENER_TAP_SCALE_FACTOR the first multiply with a[k] should not
-  // increase dynamic range and the second multiply should hence be safe.
-  // Thereafter a safe scale_threshold depends on the actual operational range
-  // of Hc. The largest scale_threshold is expected to depend on bit-depth
-  // (av1_compute_stats_highbd_c() scales highbd to 8-bit) and maximum
-  // restoration-unit size (256), leading up to 32-bit positive numbers in Hc.
-  // Noting that the caller, wiener_decompose_sep_sym(), initializes a[...]
-  // to a range smaller than 16 bits, the scale_threshold is set as below for
-  // convenience.
-  int32_t max_a_l = 0;
-  for (int l = 0; l < wiener_win; ++l) {
-    const int32_t abs_a_l = abs(a[l]);
-    if (abs_a_l > max_a_l) max_a_l = abs_a_l;
-  }
-  const int scale_threshold = 128 * WIENER_TAP_SCALE_FACTOR;
-  const int scaler = max_a_l < scale_threshold ? 1 : 4;
-
   for (i = 0; i < wiener_win; i++) {
     const int ii = wrap_index(i, wiener_win);
     for (j = 0; j < wiener_win; j++) {
@@ -1294,10 +1275,16 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
       int k, l;
       for (k = 0; k < wiener_win; ++k) {
         for (l = 0; l < wiener_win; ++l) {
-          B[jj * wiener_halfwin1 + ii] +=
-              Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
-              (scaler * WIENER_TAP_SCALE_FACTOR) * a[l] /
-              (WIENER_TAP_SCALE_FACTOR / scaler);
+          // Calculate
+          // B[jj * wiener_halfwin1 + ii] +=
+          //     Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
+          //     WIENER_TAP_SCALE_FACTOR * a[l] / WIENER_TAP_SCALE_FACTOR;
+          //
+          // The last multiplication may overflow, so we combine the last
+          // multiplication with the last division.
+          const int64_t x = Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] /
+                            WIENER_TAP_SCALE_FACTOR;
+          B[jj * wiener_halfwin1 + ii] += multiply_and_scale(x, a[l]);
         }
       }
     }
diff --git a/test/wiener_test.cc b/test/wiener_test.cc
index 7eb6372aaa..b995c84d8f 100644
--- a/test/wiener_test.cc
+++ b/test/wiener_test.cc
@@ -1075,6 +1075,233 @@ TEST(SearchWienerTest, 12bitSignedIntegerOverflowInUpdateBSepSym) {
   EXPECT_EQ(aom_codec_destroy(&enc), AOM_CODEC_OK);
 }
 
+// A test that reproduces crbug.com/oss-fuzz/66474: signed integer overflow in
+// update_b_sep_sym().
+TEST(SearchWienerTest, 12bitSignedIntegerOverflowInUpdateBSepSym2) {
+  constexpr int kWidth = 510;
+  constexpr int kHeight = 3;
+  static const uint16_t buffer[kWidth * kHeight] = {
+    // Y plane:
+    2136, 4095, 0,    0,    0,    4095, 4095, 0,    4095, 4095, 329,  0,
+    4095, 0,    4095, 2587, 0,    0,    0,    4095, 0,    0,    0,    0,
+    4095, 0,    4095, 878,  0,    4095, 0,    4095, 1474, 0,    573,  0,
+    2401, 0,    1663, 4095, 0,    9,    3381, 0,    1084, 0,    270,  0,
+    4095, 4095, 4095, 3992, 4095, 2047, 0,    0,    0,    4095, 41,   0,
+    2726, 279,  0,    0,    4095, 0,    0,    1437, 0,    4095, 4095, 0,
+    0,    0,    4095, 1683, 183,  3976, 3052, 0,    4095, 0,    0,    0,
+    4095, 4095, 1882, 4095, 0,    4095, 83,   4095, 0,    4095, 0,    0,
+    4095, 4095, 0,    0,    1637, 4095, 0,    4095, 0,    4095, 4095, 4095,
+    0,    4095, 197,  4095, 563,  0,    3696, 3073, 3670, 0,    4095, 4095,
+    0,    0,    0,    4095, 0,    0,    0,    0,    4095, 4095, 0,    0,
+    0,    3539, 3468, 0,    2856, 3880, 0,    0,    1350, 2358, 4095, 802,
+    4051, 0,    4095, 4095, 4095, 1677, 4095, 1135, 0,    4095, 0,    0,
+    0,    618,  4095, 4095, 4095, 0,    2080, 4095, 0,    0,    1917, 0,
+    0,    4095, 1937, 2835, 4095, 4095, 4095, 4095, 0,    4095, 4095, 3938,
+    1707, 0,    0,    0,    4095, 448,  4095, 0,    1000, 2481, 3408, 0,
+    0,    4095, 0,    3176, 0,    4095, 0,    4095, 4095, 4095, 0,    160,
+    222,  1134, 4095, 4095, 0,    3539, 4095, 569,  3364, 0,    4095, 3687,
+    0,    4095, 0,    0,    473,  0,    0,    4095, 298,  0,    3126, 4095,
+    3854, 424,  0,    0,    4095, 3893, 0,    0,    175,  2774, 0,    4095,
+    0,    2661, 950,  4095, 0,    1553, 0,    4095, 0,    4095, 4095, 2767,
+    3630, 799,  255,  0,    4095, 0,    0,    4095, 2375, 0,    0,    0,
+    0,    4095, 4095, 0,    0,    0,    1404, 4095, 4095, 4095, 4095, 2317,
+    4095, 1227, 2205, 775,  0,    4095, 0,    0,    797,  1125, 736,  1773,
+    2996, 4095, 2822, 4095, 4095, 0,    0,    0,    919,  0,    968,  3426,
+    2702, 2613, 3647, 0,    0,    4095, 4095, 129,  4095, 0,    0,    4095,
+    0,    0,    3632, 0,    3275, 123,  4095, 1566, 0,    0,    0,    1609,
+    0,    1466, 4095, 577,  4095, 4095, 0,    4095, 1103, 1103, 4095, 0,
+    1909, 0,    4095, 0,    4095, 4095, 227,  0,    4095, 2168, 4095, 374,
+    4095, 4095, 4095, 0,    0,    0,    4095, 2066, 4095, 4095, 1475, 0,
+    1959, 673,  4095, 0,    4095, 4095, 4095, 1142, 0,    464,  1819, 2033,
+    4095, 0,    2212, 4095, 4095, 3961, 0,    4095, 0,    2838, 0,    4095,
+    4095, 4095, 4095, 0,    3796, 3379, 2208, 0,    4095, 4095, 1943, 478,
+    3573, 4095, 1763, 0,    0,    4095, 4095, 4095, 4095, 2061, 3346, 4095,
+    0,    0,    4095, 0,    4095, 4095, 4095, 3738, 4095, 4095, 0,    4095,
+    0,    425,  0,    0,    0,    927,  0,    0,    1814, 966,  4095, 0,
+    0,    3185, 570,  3883, 2932, 0,    1413, 4095, 4095, 4095, 4095, 2477,
+    2270, 4095, 2531, 4095, 1936, 3110, 99,   3936, 4095, 1315, 4095, 0,
+    4095, 3564, 4095, 0,    0,    2797, 4095, 0,    1598, 0,    0,    3064,
+    3526, 4095, 4095, 0,    3473, 3661, 0,    2388, 0,    4095, 639,  4095,
+    0,    4095, 2390, 3715, 4095, 0,    0,    0,    740,  4095, 1432, 0,
+    0,    0,    4057, 0,    0,    757,  4095, 4095, 0,    1437, 0,    0,
+    4095, 0,    0,    0,    0,    0,    272,  4095, 4095, 4095, 2175, 4058,
+    0,    4095, 4095, 4095, 3959, 3535, 0,    4095, 0,    0,    4095, 4095,
+    4095, 4095, 0,    0,    4095, 4095, 4095, 3440, 3811, 0,    4095, 4095,
+    4095, 4095, 0,    4095, 3193, 3674, 2819, 4095, 4095, 4048, 0,    0,
+    4037, 4095, 3110, 4095, 1003, 0,    3650, 4095, 4095, 3154, 0,    1274,
+    2192, 4095, 0,    4095, 0,    2814, 981,  370,  1407, 0,    4095, 1518,
+    4095, 0,    0,    0,    0,    4095, 1577, 0,    4095, 0,    2607, 4095,
+    3583, 0,    0,    4095, 1983, 1498, 4095, 4095, 2645, 4095, 4095, 3480,
+    2587, 4095, 0,    0,    0,    0,    4095, 0,    4095, 4095, 0,    284,
+    3973, 0,    0,    3677, 2463, 4095, 1338, 0,    4095, 0,    0,    4095,
+    212,  2000, 4095, 4095, 0,    4095, 3780, 2039, 4095, 2453, 4095, 2050,
+    2660, 1,    3839, 5,    1,    505,  809,  2907, 0,    0,    0,    1421,
+    4095, 0,    0,    4095, 4095, 4095, 552,  0,    0,    4095, 3056, 0,
+    0,    0,    0,    0,    4095, 0,    3386, 0,    0,    0,    4095, 0,
+    0,    3404, 2702, 3534, 4095, 3562, 0,    4095, 4095, 150,  4095, 0,
+    0,    3599, 4095, 4095, 0,    0,    0,    4095, 4095, 2093, 4095, 3753,
+    3754, 4095, 0,    4095, 2733, 4095, 4095, 0,    0,    4095, 0,    0,
+    0,    1496, 4095, 2366, 2936, 2494, 4095, 744,  1173, 4095, 0,    0,
+    0,    1966, 4095, 4095, 0,    178,  3254, 4095, 4095, 995,  4095, 2083,
+    0,    2639, 4095, 3422, 4095, 4095, 4095, 0,    842,  4095, 4095, 552,
+    3681, 4095, 0,    1075, 2631, 554,  0,    0,    4095, 0,    0,    0,
+    4095, 4095, 0,    0,    0,    2234, 0,    1098, 4095, 3164, 4095, 0,
+    2748, 0,    0,    0,    4095, 4095, 4095, 1724, 891,  3496, 3964, 4095,
+    0,    0,    1923, 4095, 4095, 4095, 3118, 0,    0,    0,    4095, 4095,
+    0,    0,    3856, 4095, 0,    0,    4095, 4095, 2647, 0,    2089, 4095,
+    471,  0,    4095, 0,    0,    0,    4095, 0,    1263, 2969, 289,  0,
+    0,    4095, 289,  0,    0,    2965, 0,    0,    3280, 2279, 4091, 5,
+    512,  1776, 4,    2046, 3994, 1,    4095, 898,  4095, 0,    0,    0,
+    0,    4095, 0,    4095, 4095, 1930, 0,    0,    3725, 4095, 4095, 0,
+    2593, 4095, 0,    4095, 984,  0,    4095, 2388, 0,    0,    4095, 4095,
+    3341, 4095, 0,    2787, 0,    831,  2978, 4095, 0,    0,    0,    4095,
+    1624, 4095, 1054, 1039, 0,    89,   3565, 0,    4095, 468,  0,    4095,
+    4095, 0,    4095, 4095, 0,    3907, 0,    0,    0,    0,    0,    0,
+    4095, 1898, 2178, 4095, 0,    3708, 2825, 0,    4095, 0,    4095, 4095,
+    0,    0,    811,  1078, 0,    4095, 0,    3478, 0,    0,    1127, 0,
+    504,  4095, 4095, 2006, 4095, 0,    2666, 1172, 4095, 4095, 4095, 4095,
+    4095, 0,    199,  4095, 0,    2355, 2650, 2961, 0,    0,    0,    4095,
+    4095, 0,    4095, 0,    4095, 1477, 0,    0,    1946, 0,    3352, 1988,
+    0,    0,    2321, 4095, 0,    4095, 3367, 0,    0,    4095, 4095, 1946,
+    0,    4034, 0,    0,    4095, 4095, 0,    0,    0,    0,    4095, 973,
+    1734, 3966, 4095, 0,    3780, 1242, 0,    4095, 1301, 0,    1513, 4095,
+    1079, 4095, 0,    0,    1316, 4095, 4095, 675,  2713, 2006, 4095, 4095,
+    0,    0,    4095, 4095, 0,    3542, 4095, 0,    2365, 130,  4095, 2919,
+    0,    4095, 3434, 0,    905,  4095, 673,  4095, 4095, 0,    3923, 293,
+    4095, 213,  4095, 4095, 1334, 4095, 0,    3317, 0,    0,    0,    4095,
+    4095, 4095, 2598, 2010, 0,    0,    3507, 0,    0,    0,    489,  0,
+    0,    1782, 2681, 3303, 4095, 4095, 1955, 4095, 4095, 4095, 203,  1973,
+    4095, 4020, 0,    4095, 1538, 0,    373,  1934, 4095, 0,    4095, 2244,
+    4095, 1936, 4095, 640,  0,    4095, 0,    0,    0,    3653, 4095, 1966,
+    4095, 4095, 4095, 4095, 0,    4095, 843,  0,    4095, 4095, 4095, 1646,
+    4095, 0,    0,    4095, 4095, 4095, 2164, 0,    0,    0,    2141, 4095,
+    0,    903,  4095, 4095, 0,    624,  4095, 792,  0,    0,    0,    0,
+    0,    0,    0,    4095, 0,    4095, 4095, 2466, 0,    3631, 0,    4095,
+    4095, 4095, 0,    941,  4095, 4095, 1609, 4095, 4095, 0,    0,    2398,
+    4095, 4095, 2579, 0,    4020, 3485, 0,    0,    4095, 0,    4095, 0,
+    3158, 2355, 0,    4095, 4095, 4095, 0,    0,    4095, 0,    0,    4095,
+    475,  2272, 1010, 0,    0,    4095, 0,    0,    4095, 841,  4095, 4095,
+    4095, 4095, 0,    4095, 0,    1046, 4095, 1738, 708,  4095, 0,    4095,
+    4095, 0,    4095, 4095, 0,    4095, 4095, 0,    0,    0,    4032, 0,
+    2679, 0,    1564, 0,    0,    0,    659,  1915, 4095, 3682, 0,    3660,
+    4095, 723,  1383, 2499, 1353, 4095, 0,    3898, 2322, 3798, 4095, 0,
+    444,  2277, 3729, 4095, 4095, 4095, 3054, 387,  3309, 4048, 3793, 2842,
+    2087, 0,    3274, 2454, 518,  0,    4095, 0,    4095, 4095, 3358, 4095,
+    2083, 2105, 0,    0,    0,    1125, 2636, 0,    0,    0,    0,    736,
+    0,    349,  0,    4095, 2031, 4095, 992,  0,    4095, 3284, 4095, 214,
+    3692, 4010, 402,  0,    0,    3776, 4095, 4095, 4095, 4095, 803,  2095,
+    3864, 4095, 3323, 0,    0,    361,  1634, 0,    983,  0,    1181, 4095,
+    1791, 4095, 367,  792,  4095, 4095, 3315, 3149, 4095, 62,   4095, 1791,
+    3708, 2030, 4095, 1237, 0,    4095, 4095, 0,    0,    0,    0,    4095,
+    1902, 2257, 4095, 4095, 0,    0,    2929, 4095, 0,    4095, 2356, 4095,
+    2877, 1296, 4095, 0,    0,    0,    1310, 1968, 820,  4095, 4095, 4095,
+    4095, 4095, 0,    0,    4095, 4095, 4095, 2897, 1787, 2218, 0,    129,
+    4095, 4095, 0,    4095, 2331, 4095, 4095, 3192, 4095, 1744, 755,  0,
+    1905, 0,    4095, 4095, 4095, 0,    0,    4095, 4095, 4095, 0,    0,
+    0,    1467, 266,  1719, 4095, 729,  4095, 4095, 2647, 3543, 3388, 3326,
+    4095, 0,    4095, 4095, 4095, 1416, 4095, 2131, 810,  0,    0,    4095,
+    4095, 1250, 0,    0,    4095, 2722, 1493, 4095, 0,    4095, 0,    2895,
+    0,    3847, 0,    2078, 0,    0,    0,    4095, 4095, 4095, 4095, 0,
+    4095, 2651, 4095, 4095, 351,  2675, 4095, 0,    858,  0,    0,    0,
+    816,  4095, 0,    4095, 0,    3842, 1990, 593,  0,    0,    3992, 4095,
+    4095, 0,    4095, 1314, 4095, 4095, 1864, 2561, 4095, 1339, 0,    4095,
+    2201, 4095, 0,    1403, 0,    0,    4095, 4095, 4095, 0,    0,    0,
+    0,    0,    0,    577,  4095, 995,  2534, 827,  1431, 4095, 4095, 778,
+    1405, 0,    0,    4095, 0,    4095, 1327, 4095, 0,    2725, 3351, 3937,
+    741,  0,    2690, 2849, 4095, 4095, 2151, 0,    4095, 0,    4095, 4095,
+    4095, 1342, 142,  1920, 1007, 2001
+  };
+  unsigned char *img_data =
+      reinterpret_cast<unsigned char *>(const_cast<uint16_t *>(buffer));
+
+  aom_image_t img;
+  EXPECT_EQ(&img, aom_img_wrap(&img, AOM_IMG_FMT_I42016, kWidth, kHeight, 1,
+                               img_data));
+  img.cp = AOM_CICP_CP_UNSPECIFIED;
+  img.tc = AOM_CICP_TC_UNSPECIFIED;
+  img.mc = AOM_CICP_MC_UNSPECIFIED;
+  img.monochrome = 1;
+  img.csp = AOM_CSP_UNKNOWN;
+  img.range = AOM_CR_FULL_RANGE;
+  img.planes[1] = img.planes[2] = nullptr;
+  img.stride[1] = img.stride[2] = 0;
+
+  aom_codec_iface_t *iface = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_config_default(iface, &cfg, AOM_USAGE_GOOD_QUALITY));
+  cfg.rc_end_usage = AOM_Q;
+  cfg.g_profile = 2;
+  cfg.g_bit_depth = AOM_BITS_12;
+  cfg.g_input_bit_depth = 12;
+  cfg.g_w = kWidth;
+  cfg.g_h = kHeight;
+  cfg.g_lag_in_frames = 0;
+  cfg.g_threads = 53;
+  cfg.monochrome = 1;
+  cfg.rc_min_quantizer = 22;
+  cfg.rc_max_quantizer = 30;
+  aom_codec_ctx_t enc;
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_enc_init(&enc, iface, &cfg, AOM_CODEC_USE_HIGHBITDEPTH));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CQ_LEVEL, 26));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AV1E_SET_TILE_ROWS, 3));
+  EXPECT_EQ(AOM_CODEC_OK, aom_codec_control(&enc, AOME_SET_CPUUSED, 6));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AV1E_SET_COLOR_RANGE, AOM_CR_FULL_RANGE));
+  EXPECT_EQ(AOM_CODEC_OK,
+            aom_codec_control(&enc, AOME_SET_TUNING, AOM_

(Patch may be truncated, please check the link at the top of this post.)