From 5a46d2961fb233c8f099a7bc18a7a54c8883813b Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Tue, 25 Jun 2024 13:55:56 -0700
Subject: [PATCH] disable av1_{highbd_,}dr_prediction_z2_neon w/armv7
These two functions cause a SIGBUS.
Bug: aomedia:349428506, b:345667979, b:347825582
Change-Id: I9a25c224cf61fbf1053a05e0e4a0c830751b1fd4
---
aom_dsp/arm/highbd_intrapred_neon.c | 3 ++
aom_dsp/arm/intrapred_neon.c | 3 ++
av1/common/av1_rtcd_defs.pl | 14 +++++++--
build/cmake/rtcd.pl | 6 ++--
test/dr_prediction_test.cc | 44 +++++++++++++++++++++++++++++
5 files changed, 66 insertions(+), 4 deletions(-)
diff --git a/aom_dsp/arm/highbd_intrapred_neon.c b/aom_dsp/arm/highbd_intrapred_neon.c
index 3eda2ca46..5e6118dc6 100644
--- a/aom_dsp/arm/highbd_intrapred_neon.c
+++ b/aom_dsp/arm/highbd_intrapred_neon.c
@@ -1604,6 +1604,8 @@ static AOM_FORCE_INLINE uint16x8_t highbd_dr_prediction_z2_tbl_left_x8_from_x16(
}
#endif // AOM_ARCH_AARCH64
+// TODO(aomedia:349428506): enable this for armv7 after SIGBUS is fixed.
+#if AOM_ARCH_AARCH64
static AOM_FORCE_INLINE uint16x4x2_t highbd_dr_prediction_z2_gather_left_x4(
const uint16_t *left, const int16x4_t indices, int n) {
assert(n > 0);
@@ -2473,6 +2475,7 @@ void av1_highbd_dr_prediction_z2_neon(uint16_t *dst, ptrdiff_t stride, int bw,
assert(f != NULL);
f(dst, stride, above, left, upsample_above, upsample_left, dx, dy, bd);
}
+#endif // AOM_ARCH_AARCH64
// -----------------------------------------------------------------------------
// Z3
diff --git a/aom_dsp/arm/intrapred_neon.c b/aom_dsp/arm/intrapred_neon.c
index 3c12ca3c1..561a9f76a 100644
--- a/aom_dsp/arm/intrapred_neon.c
+++ b/aom_dsp/arm/intrapred_neon.c
@@ -1488,6 +1488,8 @@ void av1_dr_prediction_z1_neon(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
/* ---------------------P R E D I C T I O N Z 2--------------------------- */
+// TODO(aomedia:349428506): enable this for armv7 after SIGBUS is fixed.
+#if AOM_ARCH_AARCH64
#if !AOM_ARCH_AARCH64
static DECLARE_ALIGNED(16, uint8_t, LoadMaskz2[4][16]) = {
{ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
@@ -2038,6 +2040,7 @@ void av1_dr_prediction_z2_neon(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
break;
}
}
+#endif // AOM_ARCH_AARCH64
/* ---------------------P R E D I C T I O N Z 3--------------------------- */
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index d08d2194d..86f83a69e 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -115,7 +115,12 @@ ()
add_proto qw/void av1_dr_prediction_z1/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy";
specialize qw/av1_dr_prediction_z1 sse4_1 avx2 neon/;
add_proto qw/void av1_dr_prediction_z2/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy";
-specialize qw/av1_dr_prediction_z2 sse4_1 avx2 neon/;
+# TODO(aomedia:349428506): enable NEON for armv7 after SIGBUS is fixed.
+if (aom_config("AOM_ARCH_ARM") eq "yes" && aom_config("AOM_ARCH_AARCH64") eq "") {
+ specialize qw/av1_dr_prediction_z2 sse4_1 avx2/;
+} else {
+ specialize qw/av1_dr_prediction_z2 sse4_1 avx2 neon/;
+}
add_proto qw/void av1_dr_prediction_z3/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy";
specialize qw/av1_dr_prediction_z3 sse4_1 avx2 neon/;
@@ -220,7 +225,12 @@ ()
add_proto qw/void av1_highbd_dr_prediction_z1/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd";
specialize qw/av1_highbd_dr_prediction_z1 avx2 neon/;
add_proto qw/void av1_highbd_dr_prediction_z2/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd";
- specialize qw/av1_highbd_dr_prediction_z2 avx2 neon/;
+ # TODO(aomedia:349428506): enable NEON for armv7 after SIGBUS is fixed.
+ if (aom_config("AOM_ARCH_ARM") eq "yes" && aom_config("AOM_ARCH_AARCH64") eq "") {
+ specialize qw/av1_highbd_dr_prediction_z2 avx2/;
+ } else {
+ specialize qw/av1_highbd_dr_prediction_z2 avx2 neon/;
+ }
add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd";
specialize qw/av1_highbd_dr_prediction_z3 avx2 neon/;
}
diff --git a/build/cmake/rtcd.pl b/build/cmake/rtcd.pl
index dc827c2de..5d889cb74 100755
--- a/build/cmake/rtcd.pl
+++ b/build/cmake/rtcd.pl
@@ -58,13 +58,15 @@
my %config = ();
while (<CONFIG_FILE>) {
- next if !/^#define\s+(?:CONFIG_|HAVE_)/;
+ # TODO(aomedia:349428506,349436249,349450845,349455146): remove AOM_ARCH_
+ # after armv7 SIGBUS issues are fixed.
+ next if !/^#define\s+(?:AOM_ARCH_|CONFIG_|HAVE_)/;
chomp;
my @line_components = split /\s/;
scalar @line_components > 2 or
die "Invalid input passed to rtcd.pl via $opts{config}.";
# $line_components[0] = #define
- # $line_components[1] = flag name (CONFIG_SOMETHING or HAVE_SOMETHING)
+ # $line_components[1] = flag name ({AOM_ARCH,CONFIG,HAVE}_SOMETHING)
# $line_components[2] = flag value (0 or 1)
$config{$line_components[1]} = "$line_components[2]" eq "1" ? "yes" : "";
}
diff --git a/test/dr_prediction_test.cc b/test/dr_prediction_test.cc
index 0938a3db1..20cf60032 100644
--- a/test/dr_prediction_test.cc
+++ b/test/dr_prediction_test.cc
@@ -484,6 +484,7 @@ INSTANTIATE_TEST_SUITE_P(
#endif // HAVE_AVX2
#if HAVE_NEON
+#if AOM_ARCH_AARCH64
INSTANTIATE_TEST_SUITE_P(
NEON, LowbdDrPredTest,
::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
@@ -495,8 +496,21 @@ INSTANTIATE_TEST_SUITE_P(
DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
&z3_wrapper<av1_dr_prediction_z3_neon>,
AOM_BITS_8, kZ3Start)));
+#else
+// TODO(aomedia:349428506): enable av1_highbd_dr_prediction_z2_neon for armv7
+// after SIGBUS is fixed.
+INSTANTIATE_TEST_SUITE_P(
+ NEON, LowbdDrPredTest,
+ ::testing::Values(DrPredFunc<DrPred>(&z1_wrapper<av1_dr_prediction_z1_c>,
+ &z1_wrapper<av1_dr_prediction_z1_neon>,
+ AOM_BITS_8, kZ1Start),
+ DrPredFunc<DrPred>(&z3_wrapper<av1_dr_prediction_z3_c>,
+ &z3_wrapper<av1_dr_prediction_z3_neon>,
+ AOM_BITS_8, kZ3Start)));
+#endif
#if CONFIG_AV1_HIGHBITDEPTH
+#if AOM_ARCH_AARCH64
INSTANTIATE_TEST_SUITE_P(
NEON, HighbdDrPredTest,
::testing::Values(DrPredFunc<DrPred_Hbd>(
@@ -535,6 +549,36 @@ INSTANTIATE_TEST_SUITE_P(
&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
&z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
AOM_BITS_12, kZ3Start)));
+#else // !AOM_ARCH_AARCH64
+// TODO(aomedia:349428506): enable av1_highbd_dr_prediction_z2_neon for armv7
+// after SIGBUS is fixed.
+INSTANTIATE_TEST_SUITE_P(
+ NEON, HighbdDrPredTest,
+ ::testing::Values(DrPredFunc<DrPred_Hbd>(
+ &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+ &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>,
+ AOM_BITS_8, kZ1Start),
+ DrPredFunc<DrPred_Hbd>(
+ &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+ &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>,
+ AOM_BITS_10, kZ1Start),
+ DrPredFunc<DrPred_Hbd>(
+ &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_c>,
+ &z1_wrapper_hbd<av1_highbd_dr_prediction_z1_neon>,
+ AOM_BITS_12, kZ1Start),
+ DrPredFunc<DrPred_Hbd>(
+ &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+ &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
+ AOM_BITS_8, kZ3Start),
+ DrPredFunc<DrPred_Hbd>(
+ &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+ &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
+ AOM_BITS_10, kZ3Start),
+ DrPredFunc<DrPred_Hbd>(
+ &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_c>,
+ &z3_wrapper_hbd<av1_highbd_dr_prediction_z3_neon>,
+ AOM_BITS_12, kZ3Start)));
+#endif // AOM_ARCH_AARCH64
#endif // CONFIG_AV1_HIGHBITDEPTH
#endif // HAVE_NEON