aom: Add AArch64 SVE2 runtime feature detection

From 61f971f5e6e3921beb0ae712158e43e99e6f51d3 Mon Sep 17 00:00:00 2001
From: Salome Thirot <[EMAIL REDACTED]>
Date: Fri, 9 Feb 2024 11:20:58 +0000
Subject: [PATCH] Add AArch64 SVE2 runtime feature detection

Add runtime feature detection of Armv9-A SVE2 in preparation for adding
SVE2-only code in future commits. Enable running SVE2 unit tests as
well.

Change-Id: I0b3fdebea287f187c0e5be1c1e769e236e9e09f7
---
 aom_ports/aarch64_cpudetect.c         | 9 ++++++++-
 aom_ports/arm.h                       | 2 ++
 build/cmake/aom_config_defaults.cmake | 3 +++
 build/cmake/cpu.cmake                 | 8 +++++---
 build/cmake/rtcd.pl                   | 2 +-
 test/test_libaom.cc                   | 1 +
 6 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/aom_ports/aarch64_cpudetect.c b/aom_ports/aarch64_cpudetect.c
index 13299a688e..3490d6892f 100644
--- a/aom_ports/aarch64_cpudetect.c
+++ b/aom_ports/aarch64_cpudetect.c
@@ -104,6 +104,7 @@ static int arm_get_cpu_caps(void) {
 #define AOM_AARCH64_HWCAP_CRC32 (1 << 7)
 #define AOM_AARCH64_HWCAP_ASIMDDP (1 << 20)
 #define AOM_AARCH64_HWCAP_SVE (1 << 22)
+#define AOM_AARCH64_HWCAP2_SVE2 (1 << 1)
 #define AOM_AARCH64_HWCAP2_I8MM (1 << 13)
 
 static int arm_get_cpu_caps(void) {
@@ -111,7 +112,7 @@ static int arm_get_cpu_caps(void) {
 #if HAVE_ARM_CRC32 || HAVE_NEON_DOTPROD || HAVE_SVE
   unsigned long hwcap = getauxval(AT_HWCAP);
 #endif
-#if HAVE_NEON_I8MM
+#if HAVE_NEON_I8MM || HAVE_SVE2
   unsigned long hwcap2 = getauxval(AT_HWCAP2);
 #endif
 
@@ -130,6 +131,9 @@ static int arm_get_cpu_caps(void) {
 #if HAVE_SVE
   if (hwcap & AOM_AARCH64_HWCAP_SVE) flags |= HAS_SVE;
 #endif  // HAVE_SVE
+#if HAVE_SVE2
+  if (hwcap2 & AOM_AARCH64_HWCAP2_SVE2) flags |= HAS_SVE2;
+#endif  // HAVE_SVE2
   return flags;
 }
 
@@ -189,5 +193,8 @@ int aom_arm_cpu_caps(void) {
   if (!(flags & HAS_NEON_DOTPROD)) flags &= ~HAS_SVE;
   if (!(flags & HAS_NEON_I8MM)) flags &= ~HAS_SVE;
 
+  // Restrict flags: SVE2 assumes that FEAT_SVE is available.
+  if (!(flags & HAS_SVE)) flags &= ~HAS_SVE2;
+
   return flags;
 }
diff --git a/aom_ports/arm.h b/aom_ports/arm.h
index 853741d19a..a57510895b 100644
--- a/aom_ports/arm.h
+++ b/aom_ports/arm.h
@@ -29,6 +29,8 @@ extern "C" {
 #define HAS_NEON_I8MM (1 << 3)
 // Armv8.2-A optional SVE instructions, mandatory from Armv9.0-A.
 #define HAS_SVE (1 << 4)
+// Armv9.0-A SVE2 instructions.
+#define HAS_SVE2 (1 << 5)
 
 int aom_arm_cpu_caps(void);
 
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index da7de4b0f4..5b01ea270b 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -37,6 +37,7 @@ set_aom_detect_var(HAVE_NEON_DOTPROD 0
 set_aom_detect_var(HAVE_NEON_I8MM 0
                    "Enables Armv8.2-A Neon i8mm intrinsics optimizations.")
 set_aom_detect_var(HAVE_SVE 0 "Enables Armv8.2-A SVE intrinsics optimizations.")
+set_aom_detect_var(HAVE_SVE2 0 "Enables Armv9-A SVE2 intrinsics optimizations.")
 
 # PPC feature flags.
 set_aom_detect_var(HAVE_VSX 0 "Enables VSX optimizations.")
@@ -209,6 +210,8 @@ set_aom_option_var(
   "Enables Armv8.2-A Neon i8mm optimizations on AArch64 targets." ON)
 set_aom_option_var(ENABLE_SVE
                    "Enables Armv8.2-A SVE optimizations on AArch64 targets." ON)
+set_aom_option_var(ENABLE_SVE2
+                   "Enables Armv9-A SVE2 optimizations on AArch64 targets." ON)
 
 # VSX intrinsics flags.
 set_aom_option_var(ENABLE_VSX "Enables VSX optimizations on PowerPC targets."
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
index bd13d035d5..489dbcbf44 100644
--- a/build/cmake/cpu.cmake
+++ b/build/cmake/cpu.cmake
@@ -14,11 +14,12 @@ if("${AOM_TARGET_CPU}" STREQUAL "arm64")
   set(AOM_ARCH_AARCH64 1)
   set(RTCD_ARCH_ARM "yes")
 
-  set(ARM64_FLAVORS "NEON;ARM_CRC32;NEON_DOTPROD;NEON_I8MM;SVE")
+  set(ARM64_FLAVORS "NEON;ARM_CRC32;NEON_DOTPROD;NEON_I8MM;SVE;SVE2")
   set(AOM_ARM_CRC32_DEFAULT_FLAG "-march=armv8-a+crc")
   set(AOM_NEON_DOTPROD_DEFAULT_FLAG "-march=armv8.2-a+dotprod")
   set(AOM_NEON_I8MM_DEFAULT_FLAG "-march=armv8.2-a+dotprod+i8mm")
   set(AOM_SVE_DEFAULT_FLAG "-march=armv8.2-a+dotprod+i8mm+sve")
+  set(AOM_SVE2_DEFAULT_FLAG "-march=armv9-a+sve2") # SVE2 is a v9-only feature
 
   # Check that the compiler flag to enable each flavor is supported by the
   # compiler. This may not be the case for new architecture features on old
@@ -45,8 +46,8 @@ if("${AOM_TARGET_CPU}" STREQUAL "arm64")
     endif()
   endforeach()
 
-  # SVE requires that the Neon-SVE bridge header is also available.
-  if(ENABLE_SVE)
+  # SVE and SVE2 require that the Neon-SVE bridge header is also available.
+  if(ENABLE_SVE OR ENABLE_SVE2)
     set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS})
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${AOM_SVE_FLAG}")
     aom_check_source_compiles("arm_neon_sve_bridge_available" "
@@ -58,6 +59,7 @@ if("${AOM_TARGET_CPU}" STREQUAL "arm64")
     set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
     if(HAVE_SVE_HEADERS EQUAL 0)
       set(ENABLE_SVE 0)
+      set(ENABLE_SVE2 0)
     endif()
   endif()
 
diff --git a/build/cmake/rtcd.pl b/build/cmake/rtcd.pl
index 1cf52f076c..f4a70842d0 100755
--- a/build/cmake/rtcd.pl
+++ b/build/cmake/rtcd.pl
@@ -392,7 +392,7 @@ ()
   @ALL_ARCHS = filter(qw/neon/);
   arm;
 } elsif ($opts{arch} eq 'arm64' ) {
-  @ALL_ARCHS = filter(qw/neon arm_crc32 neon_dotprod neon_i8mm sve/);
+  @ALL_ARCHS = filter(qw/neon arm_crc32 neon_dotprod neon_i8mm sve sve2/);
   @REQUIRES = filter(qw/neon/);
   &require(@REQUIRES);
   arm;
diff --git a/test/test_libaom.cc b/test/test_libaom.cc
index fbd7f2e380..26abbb0a06 100644
--- a/test/test_libaom.cc
+++ b/test/test_libaom.cc
@@ -62,6 +62,7 @@ int main(int argc, char **argv) {
   if (!(caps & HAS_NEON_DOTPROD)) append_negative_gtest_filter("NEON_DOTPROD");
   if (!(caps & HAS_NEON_I8MM)) append_negative_gtest_filter("NEON_I8MM");
   if (!(caps & HAS_SVE)) append_negative_gtest_filter("SVE");
+  if (!(caps & HAS_SVE2)) append_negative_gtest_filter("SVE2");
 #elif AOM_ARCH_ARM
   const int caps = aom_arm_cpu_caps();
   if (!(caps & HAS_NEON)) append_negative_gtest_filter("NEON");