SDL-1.2: Backport the NEON detection code from SDL2

From c719a5d997ff276fc0a4b10b17c5fc672917fb3a Mon Sep 17 00:00:00 2001
From: Cameron Cawley <[EMAIL REDACTED]>
Date: Tue, 16 Feb 2021 15:28:16 +0000
Subject: [PATCH] Backport the NEON detection code from SDL2

---
 configure                 |   2 +-
 configure.ac              |   2 +-
 include/SDL_config.h.in   |   2 +
 include/SDL_cpuinfo.h     |   1 +
 src/cpuinfo/SDL_cpuinfo.c | 170 +++++++++++++++++++++++++++-----------
 src/video/SDL_blit_A.c    |   4 +-
 src/video/SDL_surface.c   |   2 +-
 7 files changed, 128 insertions(+), 55 deletions(-)

diff --git a/configure b/configure
index 37e78375..9e40706c 100755
--- a/configure
+++ b/configure
@@ -16196,7 +16196,7 @@ fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 fi
 
-    for ac_func in malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp _stricmp strcasecmp _strnicmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep
+    for ac_func in malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp _stricmp strcasecmp _strnicmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep getauxval elf_aux_info
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.ac b/configure.ac
index b02cd851..5c832a60 100644
--- a/configure.ac
+++ b/configure.ac
@@ -176,7 +176,7 @@ if test x$enable_libc = xyes; then
         AC_DEFINE(HAVE_MPROTECT)
         ]),
     )
-    AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp _stricmp strcasecmp _strnicmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep)
+    AC_CHECK_FUNCS(malloc calloc realloc free getenv putenv unsetenv qsort abs bcopy memset memcpy memmove strlen strlcpy strlcat strdup _strrev _strupr _strlwr strchr strrchr strstr itoa _ltoa _uitoa _ultoa strtol strtoul _i64toa _ui64toa strtoll strtoull atoi atof strcmp strncmp _stricmp strcasecmp _strnicmp strncasecmp sscanf snprintf vsnprintf iconv sigaction setjmp nanosleep getauxval elf_aux_info)
 
     AC_CHECK_LIB(iconv, libiconv_open, [EXTRA_LDFLAGS="$EXTRA_LDFLAGS -liconv"])
     AC_CHECK_LIB(m, pow, [EXTRA_LDFLAGS="$EXTRA_LDFLAGS -lm"])
diff --git a/include/SDL_config.h.in b/include/SDL_config.h.in
index c0161dd7..7453e1ad 100644
--- a/include/SDL_config.h.in
+++ b/include/SDL_config.h.in
@@ -141,6 +141,8 @@
 #undef HAVE_GETPAGESIZE
 #undef HAVE_MPROTECT
 #undef HAVE_SEM_TIMEDWAIT
+#undef HAVE_GETAUXVAL
+#undef HAVE_ELF_AUX_INFO
 
 #else
 /* We may need some replacement for stdarg.h here */
diff --git a/include/SDL_cpuinfo.h b/include/SDL_cpuinfo.h
index 1335b982..f873100d 100644
--- a/include/SDL_cpuinfo.h
+++ b/include/SDL_cpuinfo.h
@@ -64,6 +64,7 @@ extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void);
 extern DECLSPEC SDL_bool SDLCALL SDL_HasARMSIMD(void);
 
 /** This function returns true if the CPU has ARM NEON features */
+extern DECLSPEC SDL_bool SDLCALL SDL_HasNEON(void);
 extern DECLSPEC SDL_bool SDLCALL SDL_HasARMNEON(void);
 
 /* Ends C function definitions when using C++ */
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index 3f5e1f80..9ba83f46 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -37,6 +37,38 @@
 #include <setjmp.h>
 #endif
 
+#if defined(__QNXNTO__)
+#include <sys/syspage.h>
+#endif
+
+#if defined(__LINUX__) && defined(__arm__)
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <elf.h>
+
+/*#include <asm/hwcap.h>*/
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
+#endif
+#ifndef AT_PLATFORM
+#define AT_PLATFORM 15
+#endif
+#ifndef HWCAP_NEON
+#define HWCAP_NEON (1 << 12)
+#endif
+#endif
+
+#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
+#include <sys/auxv.h>
+#endif
+
+#ifdef __RISCOS__
+#include <kernel.h>
+#include <swis.h>
+#endif
+
 #define CPU_HAS_RDTSC	0x00000001
 #define CPU_HAS_MMX	0x00000002
 #define CPU_HAS_MMXEXT	0x00000004
@@ -46,7 +78,7 @@
 #define CPU_HAS_SSE2	0x00000080
 #define CPU_HAS_ALTIVEC	0x00000100
 #define CPU_HAS_ARM_SIMD 0x00000200
-#define CPU_HAS_ARM_NEON 0x00000400
+#define CPU_HAS_NEON     0x00000400
 
 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
 /* This is the brute force way of detecting instruction sets...
@@ -392,14 +424,21 @@ static __inline__ int CPU_haveAltiVec(void)
 	return altivec; 
 }
 
-#ifdef __linux__
+#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__)
+static int
+CPU_haveARMSIMD(void)
+{
+	return 1;
+}
 
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <elf.h>
+#elif !defined(__arm__)
+static int
+CPU_haveARMSIMD(void)
+{
+	return 0;
+}
 
+#elif defined(__LINUX__)
 static __inline__ int CPU_haveARMSIMD(void)
 {
 	int arm_simd = 0;
@@ -425,30 +464,7 @@ static __inline__ int CPU_haveARMSIMD(void)
 	return arm_simd;
 }
 
-static __inline__ int CPU_haveARMNEON(void)
-{
-	int arm_neon = 0;
-	int fd;
-
-	fd = open("/proc/self/auxv", O_RDONLY);
-	if (fd >= 0)
-	{
-		Elf32_auxv_t aux;
-		while (read(fd, &aux, sizeof aux) == sizeof aux)
-		{
-			if (aux.a_type == AT_HWCAP)
-				arm_neon = (aux.a_un.a_val & 4096) != 0;
-		}
-		close(fd);
-	}
-	return arm_neon;
-}
-
 #elif defined(__RISCOS__)
-
-#include <kernel.h>
-#include <swis.h>
-
 static __inline__ int CPU_haveARMSIMD(void)
 {
 	_kernel_swi_regs regs;
@@ -467,32 +483,81 @@ static __inline__ int CPU_haveARMSIMD(void)
 	return regs.r[0];
 }
 
-static __inline__ int CPU_haveARMNEON(void)
-{
-	/* Use the VFPSupport_Features SWI to access the MVFR registers */
-	_kernel_swi_regs regs;
-	regs.r[0] = 0;
-	if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
-		if ((regs.r[2] & 0xFFF000) == 0x111000) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
 #else
 
 static __inline__ int CPU_haveARMSIMD(void)
 {
+#warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
 	return 0;
 }
 
-static __inline__ int CPU_haveARMNEON(void)
+#endif
+
+#if defined(__LINUX__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
+static __inline__  int readProcAuxvForNeon(void)
 {
-	return 0;
+	int neon = 0;
+	int fd;
+
+	fd = open("/proc/self/auxv", O_RDONLY);
+	if (fd >= 0)
+	{
+		Elf32_auxv_t aux;
+		while (read(fd, &aux, sizeof (aux)) == sizeof (aux)) {
+			if (aux.a_type == AT_HWCAP) {
+				neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON;
+				break;
+			}
+		}
+		close(fd);
+	}
+	return neon;
 }
+#endif
 
+static __inline__ int CPU_haveNEON(void)
+{
+/* The way you detect NEON is a privileged instruction on ARM, so you have
+   query the OS kernel in a platform-specific way. :/ */
+#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__)
+	return 1;  /* ARMv8 always has non-optional NEON support. */
+#elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
+	/* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
+	return 1;  /* all Apple ARMv7 chips and later have NEON. */
+#elif defined(__APPLE__)
+	return 0;  /* assume anything else from Apple doesn't have NEON. */
+#elif !defined(__arm__)
+	return 0;  /* not an ARM CPU at all. */
+#elif defined(__OpenBSD__)
+	return 1;  /* OpenBSD only supports ARMv7 CPUs that have NEON. */
+#elif defined(HAVE_ELF_AUX_INFO)
+	unsigned long hasneon = 0;
+	if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0)
+		return 0;
+	return ((hasneon & HWCAP_NEON) == HWCAP_NEON);
+#elif defined(__QNXNTO__)
+	return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
+#elif defined(__LINUX__) && defined(HAVE_GETAUXVAL)
+	return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
+#elif defined(__LINUX__)
+	return readProcAuxvForNeon();
+#elif defined(__RISCOS__)
+	/* Use the VFPSupport_Features SWI to access the MVFR registers */
+	{
+		_kernel_swi_regs regs;
+		regs.r[0] = 0;
+		if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
+		if ((regs.r[2] & 0xFFF000) == 0x111000) {
+			return 1;
+			}
+		}
+		return 0;
+	}
+#else
+#warning SDL_HasNEON is not implemented for this ARM platform. Write me.
+	return 0;
 #endif
+}
 
 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
 
@@ -527,8 +592,8 @@ static Uint32 SDL_GetCPUFeatures(void)
 		if ( CPU_haveARMSIMD() ) {
 			SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
 		}
-		if ( CPU_haveARMNEON() ) {
-			SDL_CPUFeatures |= CPU_HAS_ARM_NEON;
+		if ( CPU_haveNEON() ) {
+			SDL_CPUFeatures |= CPU_HAS_NEON;
 		}
 	}
 	return SDL_CPUFeatures;
@@ -606,14 +671,19 @@ SDL_bool SDL_HasARMSIMD(void)
 	return SDL_FALSE;
 }
 
-SDL_bool SDL_HasARMNEON(void)
+SDL_bool SDL_HasNEON(void)
 {
-	if ( SDL_GetCPUFeatures() & CPU_HAS_ARM_NEON ) {
+	if ( SDL_GetCPUFeatures() & CPU_HAS_NEON ) {
 		return SDL_TRUE;
 	}
 	return SDL_FALSE;
 }
 
+SDL_bool SDL_HasARMNEON(void)
+{
+	return SDL_HasNEON();
+}
+
 #ifdef TEST_MAIN
 
 #include <stdio.h>
@@ -629,7 +699,7 @@ int main()
 	printf("SSE2: %d\n", SDL_HasSSE2());
 	printf("AltiVec: %d\n", SDL_HasAltiVec());
 	printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
-	printf("ARM NEON: %d\n", SDL_HasARMNEON());
+	printf("NEON: %d\n", SDL_HasNEON());
 	return 0;
 }
 
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index cfbd5790..c1542f2b 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -2883,7 +2883,7 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
 		   || (sf->Bmask == 0xff && df->Bmask == 0x1f)))
 		{
 #if SDL_ARM_NEON_BLITTERS
-		    if(SDL_HasARMNEON())
+		    if(SDL_HasNEON())
 		        return BlitARGBto565PixelAlphaARMNEON;
 #endif
 #if SDL_ARM_SIMD_BLITTERS
@@ -2930,7 +2930,7 @@ SDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index)
 				return BlitRGBtoRGBPixelAlphaAltivec;
 #endif
 #if SDL_ARM_NEON_BLITTERS
-			if (SDL_HasARMNEON())
+			if (SDL_HasNEON())
 				return BlitRGBtoRGBPixelAlphaARMNEON;
 #endif
 #if SDL_ARM_SIMD_BLITTERS
diff --git a/src/video/SDL_surface.c b/src/video/SDL_surface.c
index 73f05034..459b2be8 100644
--- a/src/video/SDL_surface.c
+++ b/src/video/SDL_surface.c
@@ -604,7 +604,7 @@ int SDL_FillRect(SDL_Surface *dst, SDL_Rect *dstrect, Uint32 color)
 	row = (Uint8 *)dst->pixels+dstrect->y*dst->pitch+
 			dstrect->x*dst->format->BytesPerPixel;
 #if SDL_ARM_NEON_BLITTERS
-    if (SDL_HasARMNEON() && dst->format->BytesPerPixel != 3) {
+    if (SDL_HasNEON() && dst->format->BytesPerPixel != 3) {
         void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
         void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
         void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);