SDL: Move intrinsic includes into a private header

From ee0e9e979cd90fa64d9c94da1fdd878f476bd12a Mon Sep 17 00:00:00 2001
From: Cameron Cawley <[EMAIL REDACTED]>
Date: Sat, 26 Nov 2022 17:12:42 +0000
Subject: [PATCH] Move intrinsic includes into a private header

---
 WhatsNew.txt                 |   1 +
 docs/README-migration.md     |  17 +++++
 include/SDL3/SDL_cpuinfo.h   |  94 --------------------------
 src/SDL_intrin.h             | 123 +++++++++++++++++++++++++++++++++++
 src/audio/SDL_audiocvt.c     |   1 +
 src/audio/SDL_audiotypecvt.c |   1 +
 src/video/SDL_blit_A.c       |   1 +
 src/video/SDL_blit_copy.c    |   1 +
 src/video/SDL_fillrect.c     |   1 +
 src/video/SDL_stretch.c      |   1 +
 src/video/SDL_yuv.c          |   1 +
 src/video/yuv2rgb/yuv_rgb.c  |   2 +-
 12 files changed, 149 insertions(+), 95 deletions(-)
 create mode 100644 src/SDL_intrin.h

diff --git a/WhatsNew.txt b/WhatsNew.txt
index d2908b076a22..a67335330832 100644
--- a/WhatsNew.txt
+++ b/WhatsNew.txt
@@ -38,3 +38,4 @@ General:
 * Added SDL_GetTicksNS() to return the number of nanoseconds since the SDL library initialized
 * Added SDL_DelayNS() to specify a delay in nanoseconds, to the highest precision the system will support
 * The timestamp member of the SDL_Event structure is now in nanoseconds, filled in with the time the event was generated, or the time it was queued if that's not available
+* Intrinsic headers are no longer included in the public SDL headers
diff --git a/docs/README-migration.md b/docs/README-migration.md
index 993fb7c44a94..2c95b31184aa 100644
--- a/docs/README-migration.md
+++ b/docs/README-migration.md
@@ -31,6 +31,23 @@ LDFLAGS += $(shell pkg-config sdl3 --libs)
 The SDL3main and SDL3test libraries have been renamed SDL3_main and SDL3_test, respectively.
 
 
+## SDL_cpuinfo.h
+
+The following headers are no longer automatically included, and will need to be included manually:
+- immintrin.h
+- mm3dnow.h
+- mmintrin.h
+- xmmintrin.h
+- emmintrin.h
+- pmmintrin.h
+- arm_neon.h
+- arm64_neon.h
+- armintr.h
+- arm64intr.h
+- altivec.h
+- lsxintrin.h
+- lasxintrin.h
+
 ## SDL_events.h
 
 The `timestamp` member of the SDL_Event structure now represents nanoseconds, and is populated with `SDL_GetTicksNS()`
diff --git a/include/SDL3/SDL_cpuinfo.h b/include/SDL3/SDL_cpuinfo.h
index b395c4449ffa..ee5be8eb63f1 100644
--- a/include/SDL3/SDL_cpuinfo.h
+++ b/include/SDL3/SDL_cpuinfo.h
@@ -30,100 +30,6 @@
 
 #include <SDL3/SDL_stdinc.h>
 
-/* Need to do this here because intrin.h has C++ code in it */
-/* Visual Studio 2005 has a bug where intrin.h conflicts with winnt.h */
-#if defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_IX86) || defined(_M_X64))
-#ifdef __clang__
-/* As of Clang 11, '_m_prefetchw' is conflicting with the winnt.h's version,
-   so we define the needed '_m_prefetch' here as a pseudo-header, until the issue is fixed. */
-
-#ifndef __PRFCHWINTRIN_H
-#define __PRFCHWINTRIN_H
-
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
-_m_prefetch(void *__P)
-{
-  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
-}
-
-#endif /* __PRFCHWINTRIN_H */
-#endif /* __clang__ */
-#include <intrin.h>
-#ifndef _WIN64
-#ifndef __MMX__
-#define __MMX__
-#endif
-#ifndef __3dNOW__
-#define __3dNOW__
-#endif
-#endif
-#ifndef __SSE__
-#define __SSE__
-#endif
-#ifndef __SSE2__
-#define __SSE2__
-#endif
-#ifndef __SSE3__
-#define __SSE3__
-#endif
-#elif defined(__MINGW64_VERSION_MAJOR)
-#include <intrin.h>
-#if !defined(SDL_DISABLE_ARM_NEON_H) && defined(__ARM_NEON)
-#  include <arm_neon.h>
-#endif
-#else
-/* altivec.h redefining bool causes a number of problems, see bugs 3993 and 4392, so you need to explicitly define SDL_ENABLE_ALTIVEC_H to have it included. */
-#if defined(HAVE_ALTIVEC_H) && defined(__ALTIVEC__) && !defined(__APPLE_ALTIVEC__) && defined(SDL_ENABLE_ALTIVEC_H)
-#include <altivec.h>
-#endif
-#if !defined(SDL_DISABLE_ARM_NEON_H)
-#  if defined(__ARM_NEON)
-#    include <arm_neon.h>
-#  elif defined(__WINDOWS__) || defined(__WINRT__) || defined(__GDK__)
-/* Visual Studio doesn't define __ARM_ARCH, but _M_ARM (if set, always 7), and _M_ARM64 (if set, always 1). */
-#    if defined(_M_ARM)
-#      include <armintr.h>
-#      include <arm_neon.h>
-#      define __ARM_NEON 1 /* Set __ARM_NEON so that it can be used elsewhere, at compile time */
-#    endif
-#    if defined (_M_ARM64)
-#      include <arm64intr.h>
-#      include <arm64_neon.h>
-#      define __ARM_NEON 1 /* Set __ARM_NEON so that it can be used elsewhere, at compile time */
-#      define __ARM_ARCH 8
-#    endif
-#  endif
-#endif
-#endif /* compiler version */
-
-#if defined(__3dNOW__) && !defined(SDL_DISABLE_MM3DNOW_H)
-#include <mm3dnow.h>
-#endif
-#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX_H)
-#include <lsxintrin.h>
-#define __LSX__
-#endif
-#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX_H)
-#include <lasxintrin.h>
-#define __LASX__
-#endif
-#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
-#include <immintrin.h>
-#else
-#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
-#include <mmintrin.h>
-#endif
-#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
-#include <xmmintrin.h>
-#endif
-#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
-#include <emmintrin.h>
-#endif
-#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
-#include <pmmintrin.h>
-#endif
-#endif /* HAVE_IMMINTRIN_H */
-
 #include <SDL3/begin_code.h>
 /* Set up for C function definitions, even when using C++ */
 #ifdef __cplusplus
diff --git a/src/SDL_intrin.h b/src/SDL_intrin.h
new file mode 100644
index 000000000000..8d0dc8c9d067
--- /dev/null
+++ b/src/SDL_intrin.h
@@ -0,0 +1,123 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SDL_intrin_h_
+#define SDL_intrin_h_
+
+#include <SDL3/SDL_stdinc.h>
+
+/* Need to do this here because intrin.h has C++ code in it */
+/* Visual Studio 2005 has a bug where intrin.h conflicts with winnt.h */
+#if defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_IX86) || defined(_M_X64))
+#ifdef __clang__
+/* As of Clang 11, '_m_prefetchw' is conflicting with the winnt.h's version,
+   so we define the needed '_m_prefetch' here as a pseudo-header, until the issue is fixed. */
+
+#ifndef __PRFCHWINTRIN_H
+#define __PRFCHWINTRIN_H
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_m_prefetch(void *__P)
+{
+  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+#endif /* __PRFCHWINTRIN_H */
+#endif /* __clang__ */
+#include <intrin.h>
+#ifndef _WIN64
+#ifndef __MMX__
+#define __MMX__
+#endif
+#ifndef __3dNOW__
+#define __3dNOW__
+#endif
+#endif
+#ifndef __SSE__
+#define __SSE__
+#endif
+#ifndef __SSE2__
+#define __SSE2__
+#endif
+#ifndef __SSE3__
+#define __SSE3__
+#endif
+#elif defined(__MINGW64_VERSION_MAJOR)
+#include <intrin.h>
+#if !defined(SDL_DISABLE_ARM_NEON_H) && defined(__ARM_NEON)
+#  include <arm_neon.h>
+#endif
+#else
+/* altivec.h redefining bool causes a number of problems, see bugs 3993 and 4392, so you need to explicitly define SDL_ENABLE_ALTIVEC_H to have it included. */
+#if defined(HAVE_ALTIVEC_H) && defined(__ALTIVEC__) && !defined(__APPLE_ALTIVEC__) && defined(SDL_ENABLE_ALTIVEC_H)
+#include <altivec.h>
+#endif
+#if !defined(SDL_DISABLE_ARM_NEON_H)
+#  if defined(__ARM_NEON)
+#    include <arm_neon.h>
+#  elif defined(__WINDOWS__) || defined(__WINRT__) || defined(__GDK__)
+/* Visual Studio doesn't define __ARM_ARCH, but _M_ARM (if set, always 7), and _M_ARM64 (if set, always 1). */
+#    if defined(_M_ARM)
+#      include <armintr.h>
+#      include <arm_neon.h>
+#      define __ARM_NEON 1 /* Set __ARM_NEON so that it can be used elsewhere, at compile time */
+#    endif
+#    if defined (_M_ARM64)
+#      include <arm64intr.h>
+#      include <arm64_neon.h>
+#      define __ARM_NEON 1 /* Set __ARM_NEON so that it can be used elsewhere, at compile time */
+#      define __ARM_ARCH 8
+#    endif
+#  endif
+#endif
+#endif /* compiler version */
+
+#if defined(__3dNOW__) && !defined(SDL_DISABLE_MM3DNOW_H)
+#include <mm3dnow.h>
+#endif
+#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX_H)
+#include <lsxintrin.h>
+#define __LSX__
+#endif
+#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX_H)
+#include <lasxintrin.h>
+#define __LASX__
+#endif
+#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
+#include <immintrin.h>
+#else
+#if defined(__MMX__) && !defined(SDL_DISABLE_MMINTRIN_H)
+#include <mmintrin.h>
+#endif
+#if defined(__SSE__) && !defined(SDL_DISABLE_XMMINTRIN_H)
+#include <xmmintrin.h>
+#endif
+#if defined(__SSE2__) && !defined(SDL_DISABLE_EMMINTRIN_H)
+#include <emmintrin.h>
+#endif
+#if defined(__SSE3__) && !defined(SDL_DISABLE_PMMINTRIN_H)
+#include <pmmintrin.h>
+#endif
+#endif /* HAVE_IMMINTRIN_H */
+
+#endif /* SDL_intrin_h_ */
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/audio/SDL_audiocvt.c b/src/audio/SDL_audiocvt.c
index 03d2c0bba12d..20abbd4420b7 100644
--- a/src/audio/SDL_audiocvt.c
+++ b/src/audio/SDL_audiocvt.c
@@ -25,6 +25,7 @@
 #include "SDL_audio_c.h"
 
 #include "../SDL_dataqueue.h"
+#include "../SDL_intrin.h"
 
 #define DEBUG_AUDIOSTREAM 0
 
diff --git a/src/audio/SDL_audiotypecvt.c b/src/audio/SDL_audiotypecvt.c
index 0d06e9af24b7..e677d0dba87b 100644
--- a/src/audio/SDL_audiotypecvt.c
+++ b/src/audio/SDL_audiotypecvt.c
@@ -21,6 +21,7 @@
 #include "SDL_internal.h"
 
 #include "SDL_audio_c.h"
+#include "../SDL_intrin.h"
 
 #ifdef __ARM_NEON
 #define HAVE_NEON_INTRINSICS 1
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index 6b8aa9e0034c..a1108409b6cb 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -23,6 +23,7 @@
 #if SDL_HAVE_BLIT_A
 
 #include "SDL_blit.h"
+#include "../SDL_intrin.h"
 
 /* Functions to perform alpha blended blitting */
 
diff --git a/src/video/SDL_blit_copy.c b/src/video/SDL_blit_copy.c
index d4bd9c3a1407..acfde5a36965 100644
--- a/src/video/SDL_blit_copy.c
+++ b/src/video/SDL_blit_copy.c
@@ -22,6 +22,7 @@
 
 #include "SDL_blit.h"
 #include "SDL_blit_copy.h"
+#include "../SDL_intrin.h"
 
 #ifdef __SSE__
 /* This assumes 16-byte aligned src and dst */
diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c
index bccdb41d040b..fb60929df635 100644
--- a/src/video/SDL_fillrect.c
+++ b/src/video/SDL_fillrect.c
@@ -21,6 +21,7 @@
 #include "SDL_internal.h"
 
 #include "SDL_blit.h"
+#include "../SDL_intrin.h"
 
 #ifdef __SSE__
 /* *INDENT-OFF* */ /* clang-format off */
diff --git a/src/video/SDL_stretch.c b/src/video/SDL_stretch.c
index e48e108d9de4..30436959538f 100644
--- a/src/video/SDL_stretch.c
+++ b/src/video/SDL_stretch.c
@@ -21,6 +21,7 @@
 #include "SDL_internal.h"
 
 #include "SDL_blit.h"
+#include "../SDL_intrin.h"
 
 static int SDL_LowerSoftStretchNearest(SDL_Surface *src, const SDL_Rect *srcrect, SDL_Surface *dst, const SDL_Rect *dstrect);
 static int SDL_LowerSoftStretchLinear(SDL_Surface *src, const SDL_Rect *srcrect, SDL_Surface *dst, const SDL_Rect *dstrect);
diff --git a/src/video/SDL_yuv.c b/src/video/SDL_yuv.c
index aff90926c7e0..0f9837097b43 100644
--- a/src/video/SDL_yuv.c
+++ b/src/video/SDL_yuv.c
@@ -22,6 +22,7 @@
 
 #include "SDL_pixels_c.h"
 #include "SDL_yuv_c.h"
+#include "../SDL_intrin.h"
 
 #include "yuv2rgb/yuv_rgb.h"
 
diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb.c
index 99d2e2b7e9fd..2bdf0cdb9d54 100644
--- a/src/video/yuv2rgb/yuv_rgb.c
+++ b/src/video/yuv2rgb/yuv_rgb.c
@@ -6,7 +6,7 @@
 
 #include "yuv_rgb.h"
 
-/*#include <x86intrin.h>*/
+#include "../../SDL_intrin.h"
 
 #define PRECISION 6
 #define PRECISION_FACTOR (1<<PRECISION)