SDL: cmake: Detect AVX + allow build system to disable Intel intrinsics

From 4681240241b86372a62b98d2f19d33c2971bada0 Mon Sep 17 00:00:00 2001
From: Anonymous Maarten <[EMAIL REDACTED]>
Date: Sat, 25 Feb 2023 00:21:15 +0100
Subject: [PATCH] cmake: Detect AVX + allow build system to disable Intel
 intrinsics

---
 CMakeLists.txt                                | 72 +++++++++++++++++--
 include/SDL3/SDL_intrin.h                     | 13 ++--
 include/build_config/SDL_build_config.h.cmake | 19 ++---
 include/build_config/SDL_build_config_macos.h |  4 +-
 .../build_config/SDL_build_config_windows.h   |  7 +-
 .../build_config/SDL_build_config_wingdk.h    |  7 +-
 include/build_config/SDL_build_config_xbox.h  |  7 +-
 src/SDL_internal.h                            | 40 +++++++++++
 src/audio/SDL_audiocvt.c                      | 29 --------
 src/audio/SDL_audiotypecvt.c                  |  4 --
 src/video/SDL_blit_A.c                        | 20 +++---
 src/video/SDL_blit_copy.c                     | 12 ++--
 src/video/SDL_fillrect.c                      |  8 +--
 src/video/SDL_stretch.c                       |  4 --
 src/video/SDL_yuv.c                           | 40 +++++------
 src/video/yuv2rgb/yuv_rgb.c                   |  5 +-
 16 files changed, 175 insertions(+), 116 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 056b293b569b..dd6616648640 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -367,6 +367,7 @@ option_string(SDL_ASSERTIONS "Enable internal sanity checks (auto/disabled/relea
 #set_option(SDL_DEPENDENCY_TRACKING "Use gcc -MMD -MT dependency tracking" ON)
 set_option(SDL_ASSEMBLY            "Enable assembly routines" ${SDL_ASSEMBLY_DEFAULT})
 dep_option(SDL_SSEMATH             "Allow GCC to use SSE floating point math" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
+dep_option(SDL_AVX                 "Use AVX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
 dep_option(SDL_SSE                 "Use SSE assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
 dep_option(SDL_SSE2                "Use SSE2 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
 dep_option(SDL_SSE3                "Use SSE3 assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_X86 OR SDL_CPU_X64" OFF)
@@ -711,6 +712,32 @@ if(SDL_ASSEMBLY)
     # TODO: Those all seem to be quite GCC specific - needs to be
     # reworked for better compiler support
     set(HAVE_ASSEMBLY TRUE)
+
+    if(SDL_AVX)
+      cmake_push_check_state()
+      set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mavx")
+      check_c_source_compiles("
+          #ifdef __MINGW32__
+          #include <_mingw.h>
+          #ifdef __MINGW64_VERSION_MAJOR
+          #include <intrin.h>
+          #else
+          #include <immintrin.h>
+          #endif
+          #else
+          #include <immintrin.h>
+          #endif
+          #ifndef __AVX__
+          #error Assembler CPP flag not enabled
+          #endif
+          int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
+      cmake_pop_check_state()
+      if(CPU_SUPPORTS_AVX)
+        set(HAVE_AVX TRUE)
+        target_compile_options(sdl-build-options INTERFACE "-mavx")
+      endif()
+    endif()
+
     if(SDL_MMX)
       cmake_push_check_state()
       set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mmmx")
@@ -728,9 +755,10 @@ if(SDL_ASSEMBLY)
           #ifndef __MMX__
           #error Assembler CPP flag not enabled
           #endif
-          int main(int argc, char **argv) { return 0; }" HAVE_MMX)
+          int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_MMX)
       cmake_pop_check_state()
-      if(HAVE_MMX)
+      if(CPU_SUPPORTS_MMX)
+        set(HAVE_MMX TRUE)
         target_compile_options(sdl-build-options INTERFACE "-mmmx")
       endif()
     endif()
@@ -823,8 +851,6 @@ if(SDL_ASSEMBLY)
       set(HAVE_SSEMATH TRUE)
     endif()
 
-    check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
-
     if(SDL_ALTIVEC)
       cmake_push_check_state()
       set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -maltivec")
@@ -954,10 +980,46 @@ if(SDL_ASSEMBLY)
     if(SDL_SSE3)
       set(HAVE_SSE3 TRUE)
     endif()
-    check_include_file("immintrin.h" HAVE_IMMINTRIN_H)
+    if(SDL_AVX)
+      cmake_push_check_state()
+      # FIXME: should be CMAKE_REQUIRED_LINK_OPTIONS for CMake 3.14+
+      list(APPEND CMAKE_REQUIRED_LIBRARIES "/ARCH:AVX")
+      check_c_source_compiles("
+          #include <immintrin.h>
+          #ifndef __AVX__
+          #error Assembler CPP flag not enabled
+          #endif
+          int main(int argc, char **argv) { return 0; }" CPU_SUPPORTS_AVX)
+      cmake_pop_check_state()
+      if(CPU_SUPPORTS_AVX)
+        # FIXME: should be target_link_options for CMake 3.13+
+        target_link_libraries(sdl-build-options INTERFACE "/ARCH:AVX")
+        set(HAVE_AVX TRUE)
+      endif()
+    endif()
   endif()
 endif()
 
+if(NOT HAVE_AVX)
+  set(SDL_DISABLE_AVX 1)
+endif()
+
+if(NOT HAVE_MMX)
+  set(SDL_DISABLE_MMX 1)
+endif()
+
+if(NOT HAVE_SSE)
+  set(SDL_DISABLE_SSE 1)
+endif()
+
+if(NOT HAVE_SSE2)
+  set(SDL_DISABLE_SSE2 1)
+endif()
+
+if(NOT HAVE_SSE3)
+  set(SDL_DISABLE_SSE3 1)
+endif()
+
 # TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define
 # anything.
 if(SDL_LIBC)
diff --git a/include/SDL3/SDL_intrin.h b/include/SDL3/SDL_intrin.h
index 8add09baed0f..be9b29b0aefd 100644
--- a/include/SDL3/SDL_intrin.h
+++ b/include/SDL3/SDL_intrin.h
@@ -101,21 +101,20 @@ _m_prefetch(void *__P)
 #include <lasxintrin.h>
 #define __LASX__
 #endif
-#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
+#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
 #include <immintrin.h>
-#else
-#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
+#endif
+#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
 #include <mmintrin.h>
 #endif
-#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
+#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
 #include <xmmintrin.h>
 #endif
-#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
+#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
 #include <emmintrin.h>
 #endif
-#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
+#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
 #include <pmmintrin.h>
 #endif
-#endif /* HAVE_IMMINTRIN_H */
 
 #endif /* SDL_intrin_h_ */
diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake
index 8f26167b15a6..568e6c6a59b0 100644
--- a/include/build_config/SDL_build_config.h.cmake
+++ b/include/build_config/SDL_build_config.h.cmake
@@ -226,18 +226,6 @@
 #cmakedefine HAVE_LIBUSB 1
 #cmakedefine HAVE_O_CLOEXEC 1
 
-/* Apple platforms might be building universal binaries, where Intel builds
-   can use immintrin.h but other architectures can't. */
-#ifdef __APPLE__
-#  if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
-#    if __has_include(<immintrin.h>)
-#       define HAVE_IMMINTRIN_H 1
-#    endif
-#  endif
-#else  /* non-Apple platforms can use the normal CMake check for this. */
-#cmakedefine HAVE_IMMINTRIN_H 1
-#endif
-
 #cmakedefine HAVE_LIBUDEV_H 1
 #cmakedefine HAVE_LIBSAMPLERATE_H 1
 #cmakedefine HAVE_LIBDECOR_H  1
@@ -597,4 +585,11 @@ typedef unsigned int uintptr_t;
 #endif /* Visual Studio 2008 */
 #endif /* !_STDINT_H_ && !HAVE_STDINT_H */
 
+/* Configure use of intrinsics */
+
+#cmakedefine SDL_DISABLE_SSE 1
+#cmakedefine SDL_DISABLE_SSE2 1
+#cmakedefine SDL_DISABLE_SSE3 1
+#cmakedefine SDL_DISABLE_AVX 1
+
 #endif /* SDL_build_config_h_ */
diff --git a/include/build_config/SDL_build_config_macos.h b/include/build_config/SDL_build_config_macos.h
index df6d679e1a6b..139cd7228901 100644
--- a/include/build_config/SDL_build_config_macos.h
+++ b/include/build_config/SDL_build_config_macos.h
@@ -138,8 +138,8 @@
 #define HAVE_SYSCTLBYNAME 1
 
 #if defined(__has_include) && (defined(__i386__) || defined(__x86_64))
-# if __has_include(<immintrin.h>)
-#   define HAVE_IMMINTRIN_H 1
+# if !__has_include(<immintrin.h>)
+#   define SDL_DISABLE_AVX 1
 # endif
 #endif
 
diff --git a/include/build_config/SDL_build_config_windows.h b/include/build_config/SDL_build_config_windows.h
index e1d76dc6a7ec..18cf96e917ca 100644
--- a/include/build_config/SDL_build_config_windows.h
+++ b/include/build_config/SDL_build_config_windows.h
@@ -103,11 +103,12 @@ typedef unsigned int uintptr_t;
 #define HAVE_TPCSHRD_H 1
 #define HAVE_SENSORSAPI_H 1
 #if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
-#define HAVE_IMMINTRIN_H 1
 #elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
-# if __has_include(<immintrin.h>)
-#   define HAVE_IMMINTRIN_H 1
+# if !__has_include(<immintrin.h>)
+#   define SDL_DISABLE_AVX 1
 # endif
+#else
+# define SDL_DISABLE_AVX 1
 #endif
 
 /* This is disabled by default to avoid C runtime dependencies and manifest requirements */
diff --git a/include/build_config/SDL_build_config_wingdk.h b/include/build_config/SDL_build_config_wingdk.h
index 49fecf39f6ee..512213b7e627 100644
--- a/include/build_config/SDL_build_config_wingdk.h
+++ b/include/build_config/SDL_build_config_wingdk.h
@@ -48,11 +48,12 @@
 #define HAVE_TPCSHRD_H 1
 #define HAVE_SENSORSAPI_H 1
 #if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
-#define HAVE_IMMINTRIN_H 1
 #elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
-# if __has_include(<immintrin.h>)
-#   define HAVE_IMMINTRIN_H 1
+# if !__has_include(<immintrin.h>)
+#  define SDL_DISABLE_AVX 1
 # endif
+#else
+# define SDL_DISABLE_AVX 1
 #endif
 
 /* This is disabled by default to avoid C runtime dependencies and manifest requirements */
diff --git a/include/build_config/SDL_build_config_xbox.h b/include/build_config/SDL_build_config_xbox.h
index 0f978a0faa54..0059ca7d888c 100644
--- a/include/build_config/SDL_build_config_xbox.h
+++ b/include/build_config/SDL_build_config_xbox.h
@@ -48,11 +48,12 @@
 /*#define HAVE_TPCSHRD_H  1*/
 /*#define HAVE_SENSORSAPI_H 1*/
 #if (defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)) && (defined(_MSC_VER) && _MSC_VER >= 1600)
-#define HAVE_IMMINTRIN_H 1
 #elif defined(__has_include) && (defined(__i386__) || defined(__x86_64))
-# if __has_include(<immintrin.h>)
-#   define HAVE_IMMINTRIN_H 1
+# if !__has_include(<immintrin.h>)
+#  define SDL_DISABLE_AVX 1
 # endif
+#else
+# define SDL_DISABLE_AVX 1
 #endif
 
 /* This is disabled by default to avoid C runtime dependencies and manifest requirements */
diff --git a/src/SDL_internal.h b/src/SDL_internal.h
index 511d5b752636..8e1e14191d17 100644
--- a/src/SDL_internal.h
+++ b/src/SDL_internal.h
@@ -185,6 +185,46 @@
 
 #include <SDL3/SDL.h>
 #include <SDL3/SDL_intrin.h>
+
+
+#ifdef __ARM_NEON
+#define HAVE_NEON_INTRINSICS 1
+#endif
+
+#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
+#define HAVE_MMX_INTRINSICS 1
+#endif
+
+#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
+#define HAVE_SSE_INTRINSICS 1
+#endif
+
+#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
+#define HAVE_SSE2_INTRINSICS 1
+#endif
+
+#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
+#define HAVE_SSE3_INTRINSICS 1
+#endif
+
+#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
+#define HAVE_AVX_INTRINSICS 1
+#endif
+
+#if defined __clang__
+#if (!__has_attribute(target))
+#undef HAVE_AVX_INTRINSICS
+#endif
+#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
+#undef HAVE_AVX_INTRINSICS
+#endif
+#elif defined __GNUC__
+#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
+#undef HAVE_AVX_INTRINSICS
+#endif
+#endif
+
+
 #define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
 #include <SDL3/SDL_main.h>
 
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index 904721bb0c8b..b597e4cf349c 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -29,35 +29,6 @@
 
 #define DEBUG_AUDIOSTREAM 0
 
-#ifdef __ARM_NEON
-#define HAVE_NEON_INTRINSICS 1
-#endif
-
-#ifdef __SSE__
-#define HAVE_SSE_INTRINSICS 1
-#endif
-
-#ifdef __SSE3__
-#define HAVE_SSE3_INTRINSICS 1
-#endif
-
-#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
-#define HAVE_AVX_INTRINSICS 1
-#endif
-#if defined __clang__
-#if (!__has_attribute(target))
-#undef HAVE_AVX_INTRINSICS
-#endif
-#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
-#undef HAVE_AVX_INTRINSICS
-#endif
-#elif defined __GNUC__
-#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
-#undef HAVE_AVX_INTRINSICS
-#endif
-#endif
-
-
 /**
  * Initialize an SDL_AudioCVT structure for conversion.
  *
diff --git a/src/audio/SDL_audiotypecvt.c b/src/audio/SDL_audiotypecvt.c
index 6a99038fc60c..22595ec41b57 100644
--- a/src/audio/SDL_audiotypecvt.c
+++ b/src/audio/SDL_audiotypecvt.c
@@ -27,10 +27,6 @@
 #define HAVE_NEON_INTRINSICS 1
 #endif
 
-#ifdef __SSE2__
-#define HAVE_SSE2_INTRINSICS 1
-#endif
-
 #if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
 #define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
 #elif __MACOS__ && HAVE_SSE2_INTRINSICS
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index 4d3c8bef4c33..b0e20743a056 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -166,7 +166,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
     }
 }
 
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
 
 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
 static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
@@ -409,7 +409,7 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
     _mm_empty();
 }
 
-#endif /* __MMX__ */
+#endif /* HAVE_MMX_INTRINSICS */
 
 #if SDL_ARM_SIMD_BLITTERS
 void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
@@ -750,7 +750,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
     }
 }
 
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
 
 /* fast RGB565->RGB565 blending with surface alpha */
 static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
@@ -1025,7 +1025,7 @@ static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
     }
 }
 
-#endif /* __MMX__ */
+#endif /* HAVE_MMX_INTRINSICS */
 
 /* fast RGB565->RGB565 blending with surface alpha */
 static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
@@ -1357,15 +1357,13 @@ SDL_CalculateBlitA(SDL_Surface *surface)
 
         case 4:
             if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
-#if defined(__MMX__)
+#if HAVE_MMX_INTRINSICS
                 if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
-#ifdef __MMX__
                     if (SDL_HasMMX()) {
                         return BlitRGBtoRGBPixelAlphaMMX;
                     }
-#endif
                 }
-#endif /* __MMX__ */
+#endif /* HAVE_MMX_INTRINSICS */
                 if (sf->Amask == 0xff000000) {
 #if SDL_ARM_NEON_BLITTERS
                     if (SDL_HasNEON()) {
@@ -1407,7 +1405,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
             case 2:
                 if (surface->map->identity) {
                     if (df->Gmask == 0x7e0) {
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
                         if (SDL_HasMMX()) {
                             return Blit565to565SurfaceAlphaMMX;
                         } else
@@ -1416,7 +1414,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
                             return Blit565to565SurfaceAlpha;
                         }
                     } else if (df->Gmask == 0x3e0) {
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
                         if (SDL_HasMMX()) {
                             return Blit555to555SurfaceAlphaMMX;
                         } else
@@ -1430,7 +1428,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
 
             case 4:
                 if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
                     if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) {
                         return BlitRGBtoRGBSurfaceAlphaMMX;
                     }
diff --git a/src/video/SDL_blit_copy.c b/src/video/SDL_blit_copy.c
index 79e65b1531db..45536b4b2f7b 100644
--- a/src/video/SDL_blit_copy.c
+++ b/src/video/SDL_blit_copy.c
@@ -23,7 +23,7 @@
 #include "SDL_blit.h"
 #include "SDL_blit_copy.h"
 
-#ifdef __SSE__
+#if HAVE_SSE_INTRINSICS
 /* This assumes 16-byte aligned src and dst */
 static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
 {
@@ -48,9 +48,9 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
         SDL_memcpy(dst, src, len & 63);
     }
 }
-#endif /* __SSE__ */
+#endif /* HAVE_SSE_INTRINSICS */
 
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
 #ifdef _MSC_VER
 #pragma warning(disable : 4799)
 #endif
@@ -81,7 +81,7 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
         SDL_memcpy(dst + skip, src + skip, remain);
     }
 }
-#endif /* __MMX__ */
+#endif /* HAVE_MMX_INTRINSICS */
 
 void SDL_BlitCopy(SDL_BlitInfo *info)
 {
@@ -122,7 +122,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
         return;
     }
 
-#ifdef __SSE__
+#if HAVE_SSE_INTRINSICS
     if (SDL_HasSSE() &&
         !((uintptr_t)src & 15) && !(srcskip & 15) &&
         !((uintptr_t)dst & 15) && !(dstskip & 15)) {
@@ -135,7 +135,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
     }
 #endif
 
-#ifdef __MMX__
+#if HAVE_MMX_INTRINSICS
     if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
         while (h--) {
             SDL_memcpyMMX(dst, src, w);
diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c
index 20a0fccdbf7b..595cf7c32a71 100644
--- a/src/video/SDL_fillrect.c
+++ b/src/video/SDL_fillrect.c
@@ -22,7 +22,7 @@
 
 #include "SDL_blit.h"
 
-#ifdef __SSE__
+#if HAVE_SSE_INTRINSICS
 /* *INDENT-OFF* */ /* clang-format off */
 
 #if defined(_MSC_VER) && !defined(__clang__)
@@ -376,7 +376,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
         {
             color |= (color << 8);
             color |= (color << 16);
-#ifdef __SSE__
+#if HAVE_SSE_INTRINSICS
             if (SDL_HasSSE()) {
                 fill_function = SDL_FillSurfaceRect1SSE;
                 break;
@@ -389,7 +389,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
         case 2:
         {
             color |= (color << 16);
-#ifdef __SSE__
+#if HAVE_SSE_INTRINSICS
             if (SDL_HasSSE()) {
                 fill_function = SDL_FillSurfaceRect2SSE;
                 break;
@@ -408,7 +408,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
 
         case 4:
         {
-#ifdef __SSE__
+#if HAVE_SSE_INTRINSICS
             if (SDL_HasSSE()) {
                 fill_function = SDL_FillSurfaceRect4SSE;
                 break;
diff --git a/src/video/SDL_stretch.c b/src/video/SDL_stretch.c
index 05cf102622be..ce3eae9a15ef 100644
--- a/src/video/SDL_stretch.c
+++ b/src/video/SDL_stretch.c
@@ -332,10 +332,6 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch,
     return 0;
 }
 
-#if defined(__SSE2__)
-#define HAVE_SSE2_INTRINSICS 1
-#endif
-
 #if defined(__ARM_NEON)
 #define HAVE_NEON_INTRINSICS 1
 #define CAST_uint8x8_t       (uint8x8_t)
diff --git a/src/video/SDL_yuv.c b/src/video/SDL_yuv.c
index 2df6f64e43e1..acf2bae058b4 100644
--- a/src/video/SDL_yuv.c
+++ b/src/video/SDL_yuv.c
@@ -310,7 +310,7 @@ static SDL_bool yuv_rgb_sse(
     Uint8 *rgb, Uint32 rgb_stride,
     YCbCrType yuv_type)
 {
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     if (!SDL_HasSSE2()) {
         return SDL_FALSE;
     }
@@ -1114,7 +1114,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
     const Uint8 *src1, *src2;
     Uint8 *dstUV;
     Uint8 *tmp = NULL;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
@@ -1144,7 +1144,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
     y = UVheight;
     while (y--) {
         x = UVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             while (x >= 16) {
                 __m128i u = _mm_loadu_si128((__m128i *)src1);
@@ -1187,7 +1187,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
     const Uint8 *srcUV;
     Uint8 *dst1, *dst2;
     Uint8 *tmp = NULL;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
@@ -1217,7 +1217,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
     y = UVheight;
     while (y--) {
         x = UVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             __m128i mask = _mm_set1_epi16(0x00FF);
             while (x >= 16) {
@@ -1264,7 +1264,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
     const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
     const Uint16 *srcUV;
     Uint16 *dstUV;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
@@ -1277,7 +1277,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
     y = UVheight;
     while (y--) {
         x = UVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             while (x >= 8) {
                 __m128i uv = _mm_loadu_si128((__m128i *)srcUV);
@@ -1372,7 +1372,7 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
                         SDL_GetPixelFormatName(dst_format));
 }
 
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
 #define PACKED4_TO_PACKED4_ROW_SSE2(shuffle)                      \
     while (x >= 4) {                                              \
         __m128i yuv = _mm_loadu_si128((__m128i *)srcYUV);         \
@@ -1399,14 +1399,14 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src
     const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
     const Uint8 *srcYUV = (const Uint8 *)src;
     Uint8 *dstYUV = (Uint8 *)dst;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
     y = height;
     while (y--) {
         x = YUVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
         }
@@ -1440,14 +1440,14 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src
     const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
     const Uint8 *srcYUV = (const Uint8 *)src;
     Uint8 *dstYUV = (Uint8 *)dst;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
     y = height;
     while (y--) {
         x = YUVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
         }
@@ -1481,14 +1481,14 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src
     const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
     const Uint8 *srcYUV = (const Uint8 *)src;
     Uint8 *dstYUV = (Uint8 *)dst;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
     y = height;
     while (y--) {
         x = YUVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
         }
@@ -1522,14 +1522,14 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src
     const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
     const Uint8 *srcYUV = (const Uint8 *)src;
     Uint8 *dstYUV = (Uint8 *)dst;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
     y = height;
     while (y--) {
         x = YUVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
         }
@@ -1563,14 +1563,14 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src
     const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
     const Uint8 *srcYUV = (const Uint8 *)src;
     Uint8 *dstYUV = (Uint8 *)dst;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
     y = height;
     while (y--) {
         x = YUVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
         }
@@ -1604,14 +1604,14 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src
     const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
     const Uint8 *srcYUV = (const Uint8 *)src;
     Uint8 *dstYUV = (Uint8 *)dst;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
     const SDL_bool use_SSE2 = SDL_HasSSE2();
 #endif
 
     y = height;
     while (y--) {
         x = YUVwidth;
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
         if (use_SSE2) {
             PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
         }
diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb.c
index 0ec04f59cd15..68139bdbc638 100644
--- a/src/video/yuv2rgb/yuv_rgb.c
+++ b/src/video/yuv2rgb/yuv_rgb.c
@@ -6,7 +6,6 @@
 
 #include "yuv_rgb.h"
 
-
 #define PRECISION 6
 #define PRECISION_FACTOR (1<<PRECISION)
 
@@ -240,7 +239,7 @@ void rgb24_yuv420_std(
 	}
 }
 
-#ifdef __SSE2__
+#if HAVE_SSE2_INTRINSICS
 
 #define SSE_FUNCTION_NAME	yuv420_rgb565_sse
 #define STD_FUNCTION_NAME	yuv420_rgb565_std
@@ -683,7 +682,7 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
 }
 
 
-#endif //__SSE2__
+#endif //HAVE_SSE2_INTRINSICS
 
 #ifdef __loongarch_sx