SDL: stlib: Extract SDL_memcpy and SDL_memset to its own file respectively

From ae7446a9591299eef719f82403c568cd06e7388a Mon Sep 17 00:00:00 2001
From: Anonymous Maarten <[EMAIL REDACTED]>
Date: Wed, 15 Jun 2022 20:48:54 +0200
Subject: [PATCH] stlib: Extract SDL_memcpy and SDL_memset to its own file
 respectively

This is done such that we can disable LTO for these 2 functions when
building with MSVC.

This is due to a limitation of Link Time Code Generation (LTCG).
Code generation might generate a new reference to memset after linking
has started. The LTCG must make assumptions about where memset is
defined which is normally the C runtime.
---
 CMakeLists.txt                |   5 ++
 Makefile.os2                  |   2 +-
 Makefile.w32                  |   2 +-
 VisualC-WinRT/SDL-UWP.vcxproj |   2 +
 VisualC/SDL/SDL.vcxproj       |   2 +
 src/stdlib/SDL_memcpy.c       |  79 ++++++++++++++++++++++++++
 src/stdlib/SDL_memset.c       |  75 +++++++++++++++++++++++++
 src/stdlib/SDL_string.c       | 102 ----------------------------------
 8 files changed, 165 insertions(+), 104 deletions(-)
 create mode 100644 src/stdlib/SDL_memcpy.c
 create mode 100644 src/stdlib/SDL_memset.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7982b9134ac..5beaf3b38bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -649,6 +649,11 @@ if(MSVC)
   target_compile_definitions(sdl-build-options INTERFACE "-D_CRT_SECURE_NO_WARNINGS")
 endif()
 
+if(MSVC)
+  # Due to a limitation of Microsoft's LTO implementation, LTO must be disabled for memcpy and memset.
+  set_property(SOURCE src/stdlib/SDL_memcpy.c src/stdlib/SDL_memset.c APPEND PROPERTY COMPILE_FLAGS /GL-)
+endif()
+
 if(SDL_ASSEMBLY)
   if(USE_GCC OR USE_CLANG)
     # TODO: Those all seem to be quite GCC specific - needs to be
diff --git a/Makefile.os2 b/Makefile.os2
index 14b4953d412..0b1b13a1cdb 100644
--- a/Makefile.os2
+++ b/Makefile.os2
@@ -68,7 +68,7 @@ CFLAGS_DLL+= -DSDL_BUILD_MINOR_VERSION=$(MINOR_VERSION)
 CFLAGS_DLL+= -DSDL_BUILD_MICRO_VERSION=$(MICRO_VERSION)
 
 SRCS = SDL.c SDL_assert.c SDL_error.c SDL_guid.c SDL_log.c SDL_dataqueue.c SDL_hints.c SDL_list.c
-SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
+SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_memcpy.c SDL_memset.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
 SRCS+= SDL_cpuinfo.c SDL_atomic.c SDL_spinlock.c SDL_thread.c SDL_timer.c
 SRCS+= SDL_rwops.c SDL_power.c
 SRCS+= SDL_audio.c SDL_audiocvt.c SDL_audiodev.c SDL_audiotypecvt.c SDL_mixer.c SDL_wave.c
diff --git a/Makefile.w32 b/Makefile.w32
index fe198900439..2a186928dba 100644
--- a/Makefile.w32
+++ b/Makefile.w32
@@ -44,7 +44,7 @@ CFLAGS_DLL+= -DSDL_BUILD_MICRO_VERSION=$(MICRO_VERSION)
 RCFLAGS = -q -r -bt=nt $(INCPATH)
 
 SRCS = SDL.c SDL_assert.c SDL_error.c SDL_guid.c SDL_log.c SDL_dataqueue.c SDL_hints.c SDL_list.c
-SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
+SRCS+= SDL_getenv.c SDL_iconv.c SDL_malloc.c SDL_memcpy.c SDL_memset.c SDL_qsort.c SDL_stdlib.c SDL_string.c SDL_strtokr.c SDL_crc32.c
 SRCS+= SDL_cpuinfo.c SDL_atomic.c SDL_spinlock.c SDL_thread.c SDL_timer.c
 SRCS+= SDL_rwops.c SDL_power.c
 SRCS+= SDL_audio.c SDL_audiocvt.c SDL_audiodev.c SDL_audiotypecvt.c SDL_mixer.c SDL_wave.c
diff --git a/VisualC-WinRT/SDL-UWP.vcxproj b/VisualC-WinRT/SDL-UWP.vcxproj
index 80c837972ca..a4d77445b86 100644
--- a/VisualC-WinRT/SDL-UWP.vcxproj
+++ b/VisualC-WinRT/SDL-UWP.vcxproj
@@ -311,6 +311,8 @@
     <ClCompile Include="..\src\stdlib\SDL_getenv.c" />
     <ClCompile Include="..\src\stdlib\SDL_iconv.c" />
     <ClCompile Include="..\src\stdlib\SDL_malloc.c" />
+    <ClCompile Include="..\src\stdlib\SDL_memcpy.c" />
+    <ClCompile Include="..\src\stdlib\SDL_memset.c" />
     <ClCompile Include="..\src\stdlib\SDL_qsort.c" />
     <ClCompile Include="..\src\stdlib\SDL_stdlib.c" />
     <ClCompile Include="..\src\stdlib\SDL_string.c" />
diff --git a/VisualC/SDL/SDL.vcxproj b/VisualC/SDL/SDL.vcxproj
index 04ed6b46f1e..013ca020297 100644
--- a/VisualC/SDL/SDL.vcxproj
+++ b/VisualC/SDL/SDL.vcxproj
@@ -562,6 +562,8 @@
     <ClCompile Include="..\..\src\stdlib\SDL_getenv.c" />
     <ClCompile Include="..\..\src\stdlib\SDL_iconv.c" />
     <ClCompile Include="..\..\src\stdlib\SDL_malloc.c" />
+    <ClCompile Include="..\..\src\stdlib\SDL_memcpy.c" />
+    <ClCompile Include="..\..\src\stdlib\SDL_memset.c" />
     <ClCompile Include="..\..\src\stdlib\SDL_qsort.c" />
     <ClCompile Include="..\..\src\stdlib\SDL_stdlib.c" />
     <ClCompile Include="..\..\src\stdlib\SDL_string.c" />
diff --git a/src/stdlib/SDL_memcpy.c b/src/stdlib/SDL_memcpy.c
new file mode 100644
index 00000000000..bedbce8f56a
--- /dev/null
+++ b/src/stdlib/SDL_memcpy.c
@@ -0,0 +1,79 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "../SDL_internal.h"
+
+/* This file contains a portable memcpy manipulation function for SDL */
+
+void *
+SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
+{
+#ifdef __GNUC__
+    /* Presumably this is well tuned for speed.
+       On my machine this is twice as fast as the C code below.
+     */
+    return __builtin_memcpy(dst, src, len);
+#elif defined(HAVE_MEMCPY)
+    return memcpy(dst, src, len);
+#elif defined(HAVE_BCOPY)
+    bcopy(src, dst, len);
+    return dst;
+#else
+    /* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
+       using Uint32* pointers, so we need to make sure the pointers are
+       aligned before we loop using them.
+     */
+    if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
+        /* Do an unaligned byte copy */
+        Uint8 *srcp1 = (Uint8 *)src;
+        Uint8 *dstp1 = (Uint8 *)dst;
+
+        while (len--) {
+            *dstp1++ = *srcp1++;
+        }
+    } else {
+        size_t left = (len % 4);
+        Uint32 *srcp4, *dstp4;
+        Uint8 *srcp1, *dstp1;
+
+        srcp4 = (Uint32 *) src;
+        dstp4 = (Uint32 *) dst;
+        len /= 4;
+        while (len--) {
+            *dstp4++ = *srcp4++;
+        }
+
+        srcp1 = (Uint8 *) srcp4;
+        dstp1 = (Uint8 *) dstp4;
+        switch (left) {
+            case 3:
+                *dstp1++ = *srcp1++;
+            case 2:
+                *dstp1++ = *srcp1++;
+            case 1:
+                *dstp1++ = *srcp1++;
+        }
+    }
+    return dst;
+#endif /* __GNUC__ */
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/stdlib/SDL_memset.c b/src/stdlib/SDL_memset.c
new file mode 100644
index 00000000000..c2a487591ba
--- /dev/null
+++ b/src/stdlib/SDL_memset.c
@@ -0,0 +1,75 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "../SDL_internal.h"
+
+/* This file contains a portable memset manipulation function for SDL */
+
+void *
+SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
+{
+#if defined(HAVE_MEMSET)
+    return memset(dst, c, len);
+#else
+    size_t left;
+    Uint32 *dstp4;
+    Uint8 *dstp1 = (Uint8 *) dst;
+    Uint8 value1;
+    Uint32 value4;
+
+    /* The value used in memset() is a byte, passed as an int */
+    c &= 0xff;
+
+    /* The destination pointer needs to be aligned on a 4-byte boundary to
+     * execute a 32-bit set. Set first bytes manually if needed until it is
+     * aligned. */
+    value1 = (Uint8)c;
+    while ((uintptr_t)dstp1 & 0x3) {
+        if (len--) {
+            *dstp1++ = value1;
+        } else {
+            return dst;
+        }
+    }
+
+    value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
+    dstp4 = (Uint32 *) dstp1;
+    left = (len % 4);
+    len /= 4;
+    while (len--) {
+        *dstp4++ = value4;
+    }
+
+    dstp1 = (Uint8 *) dstp4;
+    switch (left) {
+        case 3:
+            *dstp1++ = value1;
+        case 2:
+            *dstp1++ = value1;
+        case 1:
+            *dstp1++ = value1;
+    }
+
+    return dst;
+#endif /* HAVE_MEMSET */
+}
+
+/* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c
index 77e0d30f5a5..943e727406b 100644
--- a/src/stdlib/SDL_string.c
+++ b/src/stdlib/SDL_string.c
@@ -263,108 +263,6 @@ SDL_ScanFloat(const char *text, double *valuep)
 }
 #endif
 
-void *
-SDL_memset(SDL_OUT_BYTECAP(len) void *dst, int c, size_t len)
-{
-#if defined(HAVE_MEMSET)
-    return memset(dst, c, len);
-#else
-    size_t left;
-    Uint32 *dstp4;
-    Uint8 *dstp1 = (Uint8 *) dst;
-    Uint8 value1;
-    Uint32 value4;
-
-    /* The value used in memset() is a byte, passed as an int */
-    c &= 0xff;
-
-    /* The destination pointer needs to be aligned on a 4-byte boundary to
-     * execute a 32-bit set. Set first bytes manually if needed until it is
-     * aligned. */
-    value1 = (Uint8)c;
-    while ((uintptr_t)dstp1 & 0x3) {
-        if (len--) {
-            *dstp1++ = value1;
-        } else {
-            return dst;
-        }
-    }
-
-    value4 = ((Uint32)c | ((Uint32)c << 8) | ((Uint32)c << 16) | ((Uint32)c << 24));
-    dstp4 = (Uint32 *) dstp1;
-    left = (len % 4);
-    len /= 4;
-    while (len--) {
-        *dstp4++ = value4;
-    }
-
-    dstp1 = (Uint8 *) dstp4;
-    switch (left) {
-    case 3:
-        *dstp1++ = value1;
-    case 2:
-        *dstp1++ = value1;
-    case 1:
-        *dstp1++ = value1;
-    }
-
-    return dst;
-#endif /* HAVE_MEMSET */
-}
-
-void *
-SDL_memcpy(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
-{
-#ifdef __GNUC__
-    /* Presumably this is well tuned for speed.
-       On my machine this is twice as fast as the C code below.
-     */
-    return __builtin_memcpy(dst, src, len);
-#elif defined(HAVE_MEMCPY)
-    return memcpy(dst, src, len);
-#elif defined(HAVE_BCOPY)
-    bcopy(src, dst, len);
-    return dst;
-#else
-    /* GCC 4.9.0 with -O3 will generate movaps instructions with the loop
-       using Uint32* pointers, so we need to make sure the pointers are
-       aligned before we loop using them.
-     */
-    if (((uintptr_t)src & 0x3) || ((uintptr_t)dst & 0x3)) {
-        /* Do an unaligned byte copy */
-        Uint8 *srcp1 = (Uint8 *)src;
-        Uint8 *dstp1 = (Uint8 *)dst;
-
-        while (len--) {
-            *dstp1++ = *srcp1++;
-        }
-    } else {
-        size_t left = (len % 4);
-        Uint32 *srcp4, *dstp4;
-        Uint8 *srcp1, *dstp1;
-
-        srcp4 = (Uint32 *) src;
-        dstp4 = (Uint32 *) dst;
-        len /= 4;
-        while (len--) {
-            *dstp4++ = *srcp4++;
-        }
-
-        srcp1 = (Uint8 *) srcp4;
-        dstp1 = (Uint8 *) dstp4;
-        switch (left) {
-        case 3:
-            *dstp1++ = *srcp1++;
-        case 2:
-            *dstp1++ = *srcp1++;
-        case 1:
-            *dstp1++ = *srcp1++;
-        }
-    }
-    return dst;
-#endif /* __GNUC__ */
-}
-
 void *
 SDL_memmove(SDL_OUT_BYTECAP(len) void *dst, SDL_IN_BYTECAP(len) const void *src, size_t len)
 {