sdl2-compat: Add SIMD functions (#25)

From 7e64bf6b87efa816123670eabbe33cc6f807440f Mon Sep 17 00:00:00 2001
From: Sylvain Becker <[EMAIL REDACTED]>
Date: Tue, 10 Jan 2023 12:17:57 +0100
Subject: [PATCH] Add SIMD functions (#25)

---
 src/sdl2_compat.c          | 64 ++++++++++++++++++++++++++++++++++++++
 src/sdl3_include_wrapper.h | 15 ---------
 src/sdl3_syms.h            |  5 ++-
 3 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/src/sdl2_compat.c b/src/sdl2_compat.c
index 4f1fd3d..a8abb89 100644
--- a/src/sdl2_compat.c
+++ b/src/sdl2_compat.c
@@ -3835,6 +3835,70 @@ SDL_SensorOpen(int idx)
     return sid ? SDL3_OpenSensor(sid) : NULL;
 }
 
+
+DECLSPEC void * SDLCALL SDL_SIMDAlloc(const size_t len)
+{
+    return SDL3_aligned_alloc(SDL_SIMDGetAlignment(), len);
+}
+
+DECLSPEC void * SDLCALL SDL_SIMDRealloc(void *mem, const size_t len)
+{
+    const size_t alignment = SDL_SIMDGetAlignment();
+    const size_t padding = (alignment - (len % alignment)) % alignment;
+    Uint8 *retval = (Uint8 *)mem;
+    void *oldmem = mem;
+    size_t memdiff = 0, ptrdiff;
+    Uint8 *ptr;
+    size_t to_allocate;
+
+    /* alignment + padding + sizeof (void *) is bounded (a few hundred
+     * bytes max), so no need to check for overflow within that argument */
+    if (SDL_size_add_overflow(len, alignment + padding + sizeof(void *), &to_allocate)) {
+        return NULL;
+    }
+
+    if (mem) {
+        mem = *(((void **)mem) - 1);
+
+        /* Check the delta between the real pointer and user pointer */
+        memdiff = ((size_t)oldmem) - ((size_t)mem);
+    }
+
+    ptr = (Uint8 *)SDL_realloc(mem, to_allocate);
+
+    if (ptr == NULL) {
+        return NULL; /* Out of memory, bail! */
+    }
+
+    /* Store the actual allocated pointer right before our aligned pointer. */
+    retval = ptr + sizeof(void *);
+    retval += alignment - (((size_t)retval) % alignment);
+
+    /* Make sure the delta is the same! */
+    if (mem) {
+        ptrdiff = ((size_t)retval) - ((size_t)ptr);
+        if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
+            oldmem = (void *)(((uintptr_t)ptr) + memdiff);
+
+            /* Even though the data past the old `len` is undefined, this is the
+             * only length value we have, and it guarantees that we copy all the
+             * previous memory anyhow.
+             */
+            SDL_memmove(retval, oldmem, len);
+        }
+    }
+
+    /* Actually store the allocated pointer, finally. */
+    *(((void **)retval) - 1) = ptr;
+    return retval;
+}
+
+DECLSPEC void SDLCALL SDL_SIMDFree(void *ptr)
+{
+    SDL3_aligned_free(ptr);
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/sdl3_include_wrapper.h b/src/sdl3_include_wrapper.h
index e010d47..9260720 100644
--- a/src/sdl3_include_wrapper.h
+++ b/src/sdl3_include_wrapper.h
@@ -569,9 +569,6 @@
 #define SDL_HasLASX IGNORE_THIS_VERSION_OF_SDL_HasLASX
 #define SDL_GetSystemRAM IGNORE_THIS_VERSION_OF_SDL_GetSystemRAM
 #define SDL_SIMDGetAlignment IGNORE_THIS_VERSION_OF_SDL_SIMDGetAlignment
-#define SDL_SIMDAlloc IGNORE_THIS_VERSION_OF_SDL_SIMDAlloc
-#define SDL_SIMDRealloc IGNORE_THIS_VERSION_OF_SDL_SIMDRealloc
-#define SDL_SIMDFree IGNORE_THIS_VERSION_OF_SDL_SIMDFree
 #define SDL_GetPowerInfo IGNORE_THIS_VERSION_OF_SDL_GetPowerInfo
 #define SDL_GetPixelFormatName IGNORE_THIS_VERSION_OF_SDL_GetPixelFormatName
 #define SDL_GetMasksForPixelFormatEnum IGNORE_THIS_VERSION_OF_SDL_GetMasksForPixelFormatEnum
@@ -3093,18 +3090,6 @@ typedef void (__cdecl *pfnSDL_CurrentEndThread) (unsigned);
 #undef SDL_SIMDGetAlignment
 #endif
 
-#ifdef SDL_SIMDAlloc
-#undef SDL_SIMDAlloc
-#endif
-
-#ifdef SDL_SIMDRealloc
-#undef SDL_SIMDRealloc
-#endif
-
-#ifdef SDL_SIMDFree
-#undef SDL_SIMDFree
-#endif
-
 #ifdef SDL_GetPowerInfo
 #undef SDL_GetPowerInfo
 #endif
diff --git a/src/sdl3_syms.h b/src/sdl3_syms.h
index 5e18bb8..bec9da0 100644
--- a/src/sdl3_syms.h
+++ b/src/sdl3_syms.h
@@ -699,8 +699,8 @@ SDL3_SYM_PASSTHROUGH(SDL_TouchDeviceType,GetTouchDeviceType,(SDL_TouchID a),(a),
 SDL3_SYM_PASSTHROUGH(int,UIKitRunApp,(int a, char *b, SDL_main_func c),(a,b,c),return)
 #endif
 SDL3_SYM_PASSTHROUGH(size_t,SIMDGetAlignment,(void),(),return)
-SDL3_SYM_PASSTHROUGH(void*,SIMDAlloc,(const size_t a),(a),return)
-SDL3_SYM_PASSTHROUGH(void,SIMDFree,(void *a),(a),)
+SDL3_SYM(void*,aligned_alloc,(size_t a, size_t b),(a,b),return)
+SDL3_SYM(void,aligned_free,(void *a),(a),)
 SDL3_SYM(Sint64,RWsize,(SDL_RWops *a),(a),return)
 SDL3_SYM(Sint64,RWseek,(SDL_RWops *a, Sint64 b, int c),(a,b,c),return)
 SDL3_SYM(Sint64,RWtell,(SDL_RWops *a),(a),return)
@@ -747,7 +747,6 @@ SDL3_SYM_PASSTHROUGH(void,Metal_GetDrawableSize,(SDL_Window *a, int *b, int *c),
 SDL3_SYM_PASSTHROUGH(double,trunc,(double a),(a),return)
 SDL3_SYM_PASSTHROUGH(float,truncf,(float a),(a),return)
 SDL3_SYM_PASSTHROUGH(SDL_Locale *,GetPreferredLocales,(void),(),return)
-SDL3_SYM_PASSTHROUGH(void*,SIMDRealloc,(void *a, const size_t b),(a, b),return)
 #ifdef __ANDROID__
 SDL3_SYM_PASSTHROUGH(SDL_bool,AndroidRequestPermission,(const char *a),(a),return)
 #endif