SDL: Merge commit '2aa9569b3e3dacce9338fb6964c953217d02ff65' into main

From 2aa9569b3e3dacce9338fb6964c953217d02ff65 Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Mon, 9 Jan 2023 17:42:16 -0800
Subject: [PATCH] Replaced SDL_SIMDAlloc(), SDL_SIMDRealloc(), and
 SDL_SIMDFree() with SDL_aligned_alloc() and SDL_aligned_free()

Fixes https://github.com/libsdl-org/SDL/issues/5641
---
 WhatsNew.txt                      |  1 +
 docs/README-migration.md          |  2 +
 include/SDL3/SDL_cpuinfo.h        | 90 ++-----------------------------
 include/SDL3/SDL_stdinc.h         | 28 ++++++++++
 src/cpuinfo/SDL_cpuinfo.c         | 87 +-----------------------------
 src/dynapi/SDL_dynapi.sym         |  5 +-
 src/dynapi/SDL_dynapi_overrides.h |  5 +-
 src/dynapi/SDL_dynapi_procs.h     |  5 +-
 src/render/SDL_yuv_sw.c           |  4 +-
 src/stdlib/SDL_stdlib.c           | 37 +++++++++++++
 src/video/SDL_RLEaccel.c          | 19 +++++--
 src/video/SDL_surface.c           |  4 +-
 test/testautomation_stdlib.c      | 39 ++++++++++++--
 13 files changed, 132 insertions(+), 194 deletions(-)

diff --git a/WhatsNew.txt b/WhatsNew.txt
index f6458552a292..371e24135014 100644
--- a/WhatsNew.txt
+++ b/WhatsNew.txt
@@ -20,5 +20,6 @@ General:
 * Added SDL_DelayNS() to specify a delay in nanoseconds, to the highest precision the system will support
 * The timestamp member of the SDL_Event structure is now in nanoseconds, filled in with the time the event was generated, or the time it was queued if that's not available
 * Added SDL_modf() and SDL_modff() to separate the whole and fractional portions of a floating point number
+* Added SDL_aligned_alloc() and SDL_aligned_free() to allocate and free memory with a given alignment
 * Added SDL_GetRenderVSync() to get vsync of the given renderer
 * Added SDL_PlayAudioDevice() to start audio playback
diff --git a/docs/README-migration.md b/docs/README-migration.md
index fa3d8bbef422..23e66acf31cb 100644
--- a/docs/README-migration.md
+++ b/docs/README-migration.md
@@ -76,6 +76,8 @@ Use the SDL_AudioDevice functions instead.
 
 SDL_Has3DNow() has been removed; there is no replacement.
 
+SDL_SIMDAlloc(), SDL_SIMDRealloc(), and SDL_SIMDFree() have been removed. You can use SDL_aligned_alloc() and SDL_aligned_free() with SDL_SIMDGetAlignment() to get the same functionality.
+
 The following headers are no longer automatically included, and will need to be included manually:
 - immintrin.h
 - mm3dnow.h
diff --git a/include/SDL3/SDL_cpuinfo.h b/include/SDL3/SDL_cpuinfo.h
index fae303b235d8..9e7b807d76ec 100644
--- a/include/SDL3/SDL_cpuinfo.h
+++ b/include/SDL3/SDL_cpuinfo.h
@@ -367,95 +367,11 @@ extern DECLSPEC int SDLCALL SDL_GetSystemRAM(void);
  *          instructions.
  *
  * \since This function is available since SDL 3.0.0.
- */
-extern DECLSPEC size_t SDLCALL SDL_SIMDGetAlignment(void);
-
-/**
- * Allocate memory in a SIMD-friendly way.
- *
- * This will allocate a block of memory that is suitable for use with SIMD
- * instructions. Specifically, it will be properly aligned and padded for the
- * system's supported vector instructions.
- *
- * The memory returned will be padded such that it is safe to read or write an
- * incomplete vector at the end of the memory block. This can be useful so you
- * don't have to drop back to a scalar fallback at the end of your SIMD
- * processing loop to deal with the final elements without overflowing the
- * allocated buffer.
- *
- * You must free this memory with SDL_FreeSIMD(), not free() or SDL_free() or
- * delete[], etc.
- *
- * Note that SDL will only deal with SIMD instruction sets it is aware of; for
- * example, SDL 2.0.8 knows that SSE wants 16-byte vectors (SDL_HasSSE()), and
- * AVX2 wants 32 bytes (SDL_HasAVX2()), but doesn't know that AVX-512 wants
- * 64. To be clear: if you can't decide to use an instruction set with an
- * SDL_Has*() function, don't use that instruction set with memory allocated
- * through here.
  *
- * SDL_AllocSIMD(0) will return a non-NULL pointer, assuming the system isn't
- * out of memory, but you are not allowed to dereference it (because you only
- * own zero bytes of that buffer).
- *
- * \param len The length, in bytes, of the block to allocate. The actual
- *            allocated block might be larger due to padding, etc.
- * \returns a pointer to the newly-allocated block, NULL if out of memory.
- *
- * \since This function is available since SDL 3.0.0.
- *
- * \sa SDL_SIMDGetAlignment
- * \sa SDL_SIMDRealloc
- * \sa SDL_SIMDFree
+ * \sa SDL_aligned_alloc
+ * \sa SDL_aligned_free
  */
-extern DECLSPEC void * SDLCALL SDL_SIMDAlloc(const size_t len);
-
-/**
- * Reallocate memory obtained from SDL_SIMDAlloc
- *
- * It is not valid to use this function on a pointer from anything but
- * SDL_SIMDAlloc(). It can't be used on pointers from malloc, realloc,
- * SDL_malloc, memalign, new[], etc.
- *
- * \param mem The pointer obtained from SDL_SIMDAlloc. This function also
- *            accepts NULL, at which point this function is the same as
- *            calling SDL_SIMDAlloc with a NULL pointer.
- * \param len The length, in bytes, of the block to allocated. The actual
- *            allocated block might be larger due to padding, etc. Passing 0
- *            will return a non-NULL pointer, assuming the system isn't out of
- *            memory.
- * \returns a pointer to the newly-reallocated block, NULL if out of memory.
- *
- * \since This function is available since SDL 3.0.0.
- *
- * \sa SDL_SIMDGetAlignment
- * \sa SDL_SIMDAlloc
- * \sa SDL_SIMDFree
- */
-extern DECLSPEC void * SDLCALL SDL_SIMDRealloc(void *mem, const size_t len);
-
-/**
- * Deallocate memory obtained from SDL_SIMDAlloc
- *
- * It is not valid to use this function on a pointer from anything but
- * SDL_SIMDAlloc() or SDL_SIMDRealloc(). It can't be used on pointers from
- * malloc, realloc, SDL_malloc, memalign, new[], etc.
- *
- * However, SDL_SIMDFree(NULL) is a legal no-op.
- *
- * The memory pointed to by `ptr` is no longer valid for access upon return,
- * and may be returned to the system or reused by a future allocation. The
- * pointer passed to this function is no longer safe to dereference once this
- * function returns, and should be discarded.
- *
- * \param ptr The pointer, returned from SDL_SIMDAlloc or SDL_SIMDRealloc, to
- *            deallocate. NULL is a legal no-op.
- *
- * \since This function is available since SDL 3.0.0.
- *
- * \sa SDL_SIMDAlloc
- * \sa SDL_SIMDRealloc
- */
-extern DECLSPEC void SDLCALL SDL_SIMDFree(void *ptr);
+extern DECLSPEC size_t SDLCALL SDL_SIMDGetAlignment(void);
 
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
diff --git a/include/SDL3/SDL_stdinc.h b/include/SDL3/SDL_stdinc.h
index db6c8da47b86..5526a3508ea3 100644
--- a/include/SDL3/SDL_stdinc.h
+++ b/include/SDL3/SDL_stdinc.h
@@ -421,6 +421,34 @@ extern DECLSPEC int SDLCALL SDL_SetMemoryFunctions(SDL_malloc_func malloc_func,
                                                    SDL_realloc_func realloc_func,
                                                    SDL_free_func free_func);
 
+/**
+ * Allocate memory aligned to a specific value
+ *
+ * If `alignment` is less than the size of `void *`, then it will be increased to match that.
+ *
+ * The returned memory address will be a multiple of the alignment value, and the amount of memory allocated will be a multiple of the alignment value.
+ *
+ * The memory returned by this function must be freed with SDL_aligned_free()
+ *
+ * \param alignment the alignment requested
+ * \param size the size to allocate
+ * \returns a pointer to the aligned memory
+ *
+ * \since This function is available since SDL 3.0.0.
+ *
+ * \sa SDL_aligned_free
+ */
+extern DECLSPEC SDL_MALLOC void *SDLCALL SDL_aligned_alloc(size_t alignment, size_t size);
+
+/**
+ * Free memory allocated by SDL_aligned_alloc()
+ *
+ * \since This function is available since SDL 3.0.0.
+ *
+ * \sa SDL_aligned_alloc
+ */
+extern DECLSPEC void SDLCALL SDL_aligned_free(void *mem);
+
 /**
  * Get the number of outstanding (unfreed) allocations
  *
diff --git a/src/cpuinfo/SDL_cpuinfo.c b/src/cpuinfo/SDL_cpuinfo.c
index 898589a6807d..4fe8e4277c40 100644
--- a/src/cpuinfo/SDL_cpuinfo.c
+++ b/src/cpuinfo/SDL_cpuinfo.c
@@ -139,7 +139,7 @@ static int CPU_haveCPUID(void)
     : "%eax", "%ecx"
     );
 #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
-/* Technically, if this is being compiled under __x86_64__ then it has 
+/* Technically, if this is being compiled under __x86_64__ then it has
    CPUid by definition.  But it's nice to be able to prove it.  :)      */
     __asm__ (
 "        pushfq                      # Get original EFLAGS             \n"
@@ -1094,91 +1094,6 @@ SDL_SIMDGetAlignment(void)
     return SDL_SIMDAlignment;
 }
 
-void *
-SDL_SIMDAlloc(const size_t len)
-{
-    const size_t alignment = SDL_SIMDGetAlignment();
-    const size_t padding = (alignment - (len % alignment)) % alignment;
-    Uint8 *retval = NULL;
-    Uint8 *ptr;
-    size_t to_allocate;
-
-    /* alignment + padding + sizeof (void *) is bounded (a few hundred
-     * bytes max), so no need to check for overflow within that argument */
-    if (SDL_size_add_overflow(len, alignment + padding + sizeof(void *), &to_allocate)) {
-        return NULL;
-    }
-
-    ptr = (Uint8 *)SDL_malloc(to_allocate);
-    if (ptr) {
-        /* store the actual allocated pointer right before our aligned pointer. */
-        retval = ptr + sizeof(void *);
-        retval += alignment - (((size_t)retval) % alignment);
-        *(((void **)retval) - 1) = ptr;
-    }
-    return retval;
-}
-
-void *
-SDL_SIMDRealloc(void *mem, const size_t len)
-{
-    const size_t alignment = SDL_SIMDGetAlignment();
-    const size_t padding = (alignment - (len % alignment)) % alignment;
-    Uint8 *retval = (Uint8 *)mem;
-    void *oldmem = mem;
-    size_t memdiff = 0, ptrdiff;
-    Uint8 *ptr;
-    size_t to_allocate;
-
-    /* alignment + padding + sizeof (void *) is bounded (a few hundred
-     * bytes max), so no need to check for overflow within that argument */
-    if (SDL_size_add_overflow(len, alignment + padding + sizeof(void *), &to_allocate)) {
-        return NULL;
-    }
-
-    if (mem) {
-        mem = *(((void **)mem) - 1);
-
-        /* Check the delta between the real pointer and user pointer */
-        memdiff = ((size_t)oldmem) - ((size_t)mem);
-    }
-
-    ptr = (Uint8 *)SDL_realloc(mem, to_allocate);
-
-    if (ptr == NULL) {
-        return NULL; /* Out of memory, bail! */
-    }
-
-    /* Store the actual allocated pointer right before our aligned pointer. */
-    retval = ptr + sizeof(void *);
-    retval += alignment - (((size_t)retval) % alignment);
-
-    /* Make sure the delta is the same! */
-    if (mem) {
-        ptrdiff = ((size_t)retval) - ((size_t)ptr);
-        if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
-            oldmem = (void *)(((uintptr_t)ptr) + memdiff);
-
-            /* Even though the data past the old `len` is undefined, this is the
-             * only length value we have, and it guarantees that we copy all the
-             * previous memory anyhow.
-             */
-            SDL_memmove(retval, oldmem, len);
-        }
-    }
-
-    /* Actually store the allocated pointer, finally. */
-    *(((void **)retval) - 1) = ptr;
-    return retval;
-}
-
-void SDL_SIMDFree(void *ptr)
-{
-    if (ptr) {
-        SDL_free(*(((void **)ptr) - 1));
-    }
-}
-
 #ifdef TEST_MAIN
 
 #include <stdio.h>
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index fe547ae87fda..5d96f0acd5fa 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -555,10 +555,7 @@ SDL3_0.0.0 {
     SDL_RumbleJoystick;
     SDL_RumbleJoystickTriggers;
     SDL_RunApp;
-    SDL_SIMDAlloc;
-    SDL_SIMDFree;
     SDL_SIMDGetAlignment;
-    SDL_SIMDRealloc;
     SDL_SaveBMP_RW;
     SDL_ScreenKeyboardShown;
     SDL_ScreenSaverEnabled;
@@ -843,6 +840,8 @@ SDL3_0.0.0 {
     SDL_modff;
     SDL_GetRenderVSync;
     SDL_PlayAudioDevice;
+    SDL_aligned_alloc;
+    SDL_aligned_free;
     # extra symbols go here (don't modify this line)
   local: *;
 };
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index d14eaaae28a8..54dbf1760da0 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -581,10 +581,7 @@
 #define SDL_RumbleJoystick SDL_RumbleJoystick_REAL
 #define SDL_RumbleJoystickTriggers SDL_RumbleJoystickTriggers_REAL
 #define SDL_RunApp SDL_RunApp_REAL
-#define SDL_SIMDAlloc SDL_SIMDAlloc_REAL
-#define SDL_SIMDFree SDL_SIMDFree_REAL
 #define SDL_SIMDGetAlignment SDL_SIMDGetAlignment_REAL
-#define SDL_SIMDRealloc SDL_SIMDRealloc_REAL
 #define SDL_SaveBMP_RW SDL_SaveBMP_RW_REAL
 #define SDL_ScreenKeyboardShown SDL_ScreenKeyboardShown_REAL
 #define SDL_ScreenSaverEnabled SDL_ScreenSaverEnabled_REAL
@@ -871,3 +868,5 @@
 #define SDL_modff SDL_modff_REAL
 #define SDL_GetRenderVSync SDL_GetRenderVSync_REAL
 #define SDL_PlayAudioDevice SDL_PlayAudioDevice_REAL
+#define SDL_aligned_alloc SDL_aligned_alloc_REAL
+#define SDL_aligned_free SDL_aligned_free_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index efbf90fed682..6e756a0683bc 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -637,10 +637,7 @@ SDL_DYNAPI_PROC(int,SDL_RumbleGamepadTriggers,(SDL_Gamepad *a, Uint16 b, Uint16
 SDL_DYNAPI_PROC(int,SDL_RumbleJoystick,(SDL_Joystick *a, Uint16 b, Uint16 c, Uint32 d),(a,b,c,d),return)
 SDL_DYNAPI_PROC(int,SDL_RumbleJoystickTriggers,(SDL_Joystick *a, Uint16 b, Uint16 c, Uint32 d),(a,b,c,d),return)
 SDL_DYNAPI_PROC(int,SDL_RunApp,(int a, char *b[], SDL_main_func c, void *d),(a,b,c,d),return)
-SDL_DYNAPI_PROC(void*,SDL_SIMDAlloc,(const size_t a),(a),return)
-SDL_DYNAPI_PROC(void,SDL_SIMDFree,(void *a),(a),)
 SDL_DYNAPI_PROC(size_t,SDL_SIMDGetAlignment,(void),(),return)
-SDL_DYNAPI_PROC(void*,SDL_SIMDRealloc,(void *a, const size_t b),(a,b),return)
 SDL_DYNAPI_PROC(int,SDL_SaveBMP_RW,(SDL_Surface *a, SDL_RWops *b, int c),(a,b,c),return)
 SDL_DYNAPI_PROC(SDL_bool,SDL_ScreenKeyboardShown,(SDL_Window *a),(a),return)
 SDL_DYNAPI_PROC(SDL_bool,SDL_ScreenSaverEnabled,(void),(),return)
@@ -916,3 +913,5 @@ SDL_DYNAPI_PROC(double,SDL_modf,(double a, double *b),(a,b),return)
 SDL_DYNAPI_PROC(float,SDL_modff,(float a, float *b),(a,b),return)
 SDL_DYNAPI_PROC(int,SDL_GetRenderVSync,(SDL_Renderer *a, int *b),(a,b),return)
 SDL_DYNAPI_PROC(void,SDL_PlayAudioDevice,(SDL_AudioDeviceID a),(a),)
+SDL_DYNAPI_PROC(void*,SDL_aligned_alloc,(size_t a, size_t b),(a,b),return)
+SDL_DYNAPI_PROC(void,SDL_aligned_free,(void *a),(a),)
diff --git a/src/render/SDL_yuv_sw.c b/src/render/SDL_yuv_sw.c
index 11d3c2796ece..f920c553d8e5 100644
--- a/src/render/SDL_yuv_sw.c
+++ b/src/render/SDL_yuv_sw.c
@@ -63,7 +63,7 @@ SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
             SDL_OutOfMemory();
             return NULL;
         }
-        swdata->pixels = (Uint8 *)SDL_SIMDAlloc(dst_size);
+        swdata->pixels = (Uint8 *)SDL_aligned_alloc(SDL_SIMDGetAlignment(), dst_size);
         if (!swdata->pixels) {
             SDL_SW_DestroyYUVTexture(swdata);
             SDL_OutOfMemory();
@@ -394,7 +394,7 @@ int SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture *swdata, const SDL_Rect *srcrect,
 void SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture *swdata)
 {
     if (swdata) {
-        SDL_SIMDFree(swdata->pixels);
+        SDL_aligned_free(swdata->pixels);
         SDL_DestroySurface(swdata->stretch);
         SDL_DestroySurface(swdata->display);
         SDL_free(swdata);
diff --git a/src/stdlib/SDL_stdlib.c b/src/stdlib/SDL_stdlib.c
index e568a704ab65..1a9da77502c5 100644
--- a/src/stdlib/SDL_stdlib.c
+++ b/src/stdlib/SDL_stdlib.c
@@ -686,3 +686,40 @@ int SDL_isblank(int x)
     return ((x) == ' ') || ((x) == '\t');
 }
 #endif
+
+void *SDL_aligned_alloc(size_t alignment, size_t size)
+{
+    size_t padding;
+    Uint8 *retval = NULL;
+
+    if (alignment < sizeof(void*)) {
+        alignment = sizeof(void*);
+    }
+    padding = (alignment - (size % alignment));
+
+    if (SDL_size_add_overflow(size, alignment, &size) == 0 &&
+        SDL_size_add_overflow(size, sizeof(void *), &size) == 0 &&
+        SDL_size_add_overflow(size, padding, &size) == 0) {
+        void *original = SDL_malloc(size);
+        if (original) {
+            /* Make sure we have enough space to store the original pointer */
+            retval = (Uint8 *)original + sizeof(original);
+
+            /* Align the pointer we're going to return */
+            retval += alignment - (((size_t)retval) % alignment);
+
+            /* Store the original pointer right before the returned value */
+            SDL_memcpy(retval - sizeof(original), &original, sizeof(original));
+        }
+    }
+    return retval;
+}
+
+void SDL_aligned_free(void *mem)
+{
+    if (mem) {
+        void *original;
+        SDL_memcpy(&original, ((Uint8 *)mem - sizeof(original)), sizeof(original));
+        SDL_free(original);
+    }
+}
diff --git a/src/video/SDL_RLEaccel.c b/src/video/SDL_RLEaccel.c
index 70efde921810..d9c81f38819d 100644
--- a/src/video/SDL_RLEaccel.c
+++ b/src/video/SDL_RLEaccel.c
@@ -1215,7 +1215,7 @@ static int RLEAlphaSurface(SDL_Surface *surface)
     /* Now that we have it encoded, release the original pixels */
     if (!(surface->flags & SDL_PREALLOC)) {
         if (surface->flags & SDL_SIMD_ALIGNED) {
-            SDL_SIMDFree(surface->pixels);
+            SDL_aligned_free(surface->pixels);
             surface->flags &= ~SDL_SIMD_ALIGNED;
         } else {
             SDL_free(surface->pixels);
@@ -1382,7 +1382,7 @@ static int RLEColorkeySurface(SDL_Surface *surface)
     /* Now that we have it encoded, release the original pixels */
     if (!(surface->flags & SDL_PREALLOC)) {
         if (surface->flags & SDL_SIMD_ALIGNED) {
-            SDL_SIMDFree(surface->pixels);
+            SDL_aligned_free(surface->pixels);
             surface->flags &= ~SDL_SIMD_ALIGNED;
         } else {
             SDL_free(surface->pixels);
@@ -1482,6 +1482,7 @@ static SDL_bool UnRLEAlpha(SDL_Surface *surface)
                          RLEDestFormat *, SDL_PixelFormat *);
     int w = surface->w;
     int bpp = df->BytesPerPixel;
+    size_t size;
 
     if (bpp == 2) {
         uncopy_opaque = uncopy_opaque_16;
@@ -1490,7 +1491,11 @@ static SDL_bool UnRLEAlpha(SDL_Surface *surface)
         uncopy_opaque = uncopy_transl = uncopy_32;
     }
 
-    surface->pixels = SDL_SIMDAlloc((size_t)surface->h * surface->pitch);
+    if (SDL_size_mul_overflow(surface->h, surface->pitch, &size)) {
+        return SDL_FALSE;
+    }
+
+    surface->pixels = SDL_aligned_alloc(SDL_SIMDGetAlignment(), size);
     if (surface->pixels == NULL) {
         return SDL_FALSE;
     }
@@ -1554,9 +1559,15 @@ void SDL_UnRLESurface(SDL_Surface *surface, int recode)
         if (recode && !(surface->flags & SDL_PREALLOC)) {
             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
                 SDL_Rect full;
+                size_t size;
 
                 /* re-create the original surface */
-                surface->pixels = SDL_SIMDAlloc((size_t)surface->h * surface->pitch);
+                if (SDL_size_mul_overflow(surface->h, surface->pitch, &size)) {
+                    /* Memory corruption? */
+                    surface->flags |= SDL_RLEACCEL;
+                    return;
+                }
+                surface->pixels = SDL_aligned_alloc(SDL_SIMDGetAlignment(), size);
                 if (surface->pixels == NULL) {
                     /* Oh crap... */
                     surface->flags |= SDL_RLEACCEL;
diff --git a/src/video/SDL_surface.c b/src/video/SDL_surface.c
index 2b45982936c5..36d2fdde3a27 100644
--- a/src/video/SDL_surface.c
+++ b/src/video/SDL_surface.c
@@ -165,7 +165,7 @@ SDL_CreateSurface(int width, int height, Uint32 format)
             }
         }
 
-        surface->pixels = SDL_SIMDAlloc(size);
+        surface->pixels = SDL_aligned_alloc(SDL_SIMDGetAlignment(), size);
         if (!surface->pixels) {
             SDL_DestroySurface(surface);
             SDL_OutOfMemory();
@@ -1553,7 +1553,7 @@ void SDL_DestroySurface(SDL_Surface *surface)
         /* Don't free */
     } else if (surface->flags & SDL_SIMD_ALIGNED) {
         /* Free aligned */
-        SDL_SIMDFree(surface->pixels);
+        SDL_aligned_free(surface->pixels);
     } else {
         /* Normal */
         SDL_free(surface->pixels);
diff --git a/test/testautomation_stdlib.c b/test/testautomation_stdlib.c
index 2460d9382522..8357f780158c 100644
--- a/test/testautomation_stdlib.c
+++ b/test/testautomation_stdlib.c
@@ -444,6 +444,32 @@ int stdlib_sscanf(void *arg)
 #define SIZE_FORMAT "zu"
 #endif
 
+/**
+ * @brief Call to SDL_aligned_alloc
+ */
+int stdlib_aligned_alloc(void *arg)
+{
+    size_t i, alignment;
+    void *ptr;
+
+    for (i = 0; i < 2*sizeof(void *); ++i) {
+        SDLTest_AssertPass("Call to SDL_aligned_alloc(%"SIZE_FORMAT")", i);
+        ptr = SDL_aligned_alloc(i, 1);
+        if (i < sizeof(void *)) {
+            alignment = sizeof(void *);
+        } else {
+            alignment = i;
+        }
+        SDLTest_AssertCheck(ptr != NULL, "Check output, expected non-NULL, got: %p", ptr);
+        SDLTest_AssertCheck((((size_t)ptr) % alignment) == 0, "Check output, expected aligned pointer, actual offset: %"SIZE_FORMAT, (((size_t)ptr) % alignment));
+        SDLTest_AssertPass("Filling memory to alignment value");
+        SDL_memset(ptr, 0xAA, alignment);
+        SDL_aligned_free(ptr);
+    }
+
+    return TEST_COMPLETED;
+}
+
 typedef struct
 {
     size_t a;
@@ -600,19 +626,23 @@ stdlib_overflow(void *arg)
 
 /* Standard C routine test cases */
 static const SDLTest_TestCaseReference stdlibTest1 = {
-    (SDLTest_TestCaseFp)stdlib_strlcpy, "stdlib_strlcpy", "Call to SDL_strlcpy", TEST_ENABLED
+    stdlib_strlcpy, "stdlib_strlcpy", "Call to SDL_strlcpy", TEST_ENABLED
 };
 
 static const SDLTest_TestCaseReference stdlibTest2 = {
-    (SDLTest_TestCaseFp)stdlib_snprintf, "stdlib_snprintf", "Call to SDL_snprintf", TEST_ENABLED
+    stdlib_snprintf, "stdlib_snprintf", "Call to SDL_snprintf", TEST_ENABLED
 };
 
 static const SDLTest_TestCaseReference stdlibTest3 = {
-    (SDLTest_TestCaseFp)stdlib_getsetenv, "stdlib_getsetenv", "Call to SDL_getenv and SDL_setenv", TEST_ENABLED
+    stdlib_getsetenv, "stdlib_getsetenv", "Call to SDL_getenv and SDL_setenv", TEST_ENABLED
 };
 
 static const SDLTest_TestCaseReference stdlibTest4 = {
-    (SDLTest_TestCaseFp)stdlib_sscanf, "stdlib_sscanf", "Call to SDL_sscanf", TEST_ENABLED
+    stdlib_sscanf, "stdlib_sscanf", "Call to SDL_sscanf", TEST_ENABLED
+};
+
+static const SDLTest_TestCaseReference stdlibTest5 = {
+    stdlib_aligned_alloc, "stdlib_aligned_alloc", "Call to SDL_aligned_alloc", TEST_ENABLED
 };
 
 static const SDLTest_TestCaseReference stdlibTestOverflow = {
@@ -625,6 +655,7 @@ static const SDLTest_TestCaseReference *stdlibTests[] = {
     &stdlibTest2,
     &stdlibTest3,
     &stdlibTest4,
+    &stdlibTest5,
     &stdlibTestOverflow,
     NULL
 };