sdl2-compat: fixing MSVC link failures about __chkstk with stuff taken from SDL3

From 35a65dee79ca145acd8c0657fd4fe3163bdec63d Mon Sep 17 00:00:00 2001
From: Ozkan Sezer <[EMAIL REDACTED]>
Date: Wed, 31 Jan 2024 22:20:50 +0300
Subject: [PATCH] fixing MSVC link failures about __chkstk with stuff taken
 from SDL3

---
 CMakeLists.txt                    |  20 +++-
 src/Makefile.vc                   |  15 ++-
 src/sdl2_compat.c                 |   3 -
 src/{x86_msvc.h => sdl2_mslibc.c} | 161 ++++++++++++++++++++++--------
 src/sdl2_mslibc_x64.masm          |  29 ++++++
 5 files changed, 181 insertions(+), 47 deletions(-)
 rename src/{x86_msvc.h => sdl2_mslibc.c} (85%)
 create mode 100644 src/sdl2_mslibc_x64.masm

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a030cb6..f0b6a7b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,9 +184,22 @@ endif()
 
 if(WIN32)
   list(APPEND SDL2COMPAT_SRCS
+    "src/sdl2_mslibc.c"
     "src/version.rc"
   )
 endif()
+if(MSVC)
+  if(SDL_CPU_X64)
+    list(APPEND SDL2COMPAT_SRCS
+      "src/sdl2_mslibc_x64.masm"
+    )
+    enable_language(ASM_MASM)
+    set_property(SOURCE "src/sdl2_mslibc_x64.masm" PROPERTY LANGUAGE "ASM_MASM")
+  elseif(SDL_CPU_ARM32 OR SDL_CPU_ARM64)
+    # FIXME: ARM assembler (armasm.exe/armasm64.exe) is NOT ASM_MASM, and does currently not work with CMake
+    # (https://gitlab.kitware.com/cmake/cmake/-/issues/18912)
+  endif()
+endif()
 
 if(APPLE)
   foreach(SOURCE_FILE ${SDL2COMPAT_SRCS})
@@ -215,6 +228,9 @@ if(TARGET SDL3::SDL3-shared)
   set_property(TARGET SDL2 PROPERTY BUILD_RPATH $<TARGET_FILE_DIR:SDL3::SDL3-shared>)
 endif()
 
+if(MSVC AND SDL_CPU_X64)
+  target_compile_options(SDL2 PRIVATE "$<$<COMPILE_LANGUAGE:ASM_MASM>:/nologo>")
+endif()
 set(EXTRA_CFLAGS )
 if(CMAKE_C_COMPILER_ID MATCHES "Clang|GNU")
   set(EXTRA_CFLAGS "${EXTRA_CFLAGS} -Wall")
@@ -309,9 +325,9 @@ if(WINDOWS_STORE)
   target_compile_options(SDL2 PRIVATE "-ZW")
 elseif(MSVC)
   # Don't try to link with the default set of libraries.
-  target_compile_options(SDL2 PRIVATE /GS-)
+  target_compile_options(SDL2 PRIVATE "$<$<COMPILE_LANGUAGE:C>:/GS->")
   if(SDL_CPU_X86)  # don't emit SSE2 in x86 builds
-    target_compile_options(SDL2 PRIVATE /arch:SSE)
+    target_compile_options(SDL2 PRIVATE "$<$<COMPILE_LANGUAGE:C>:/arch:SSE>")
   endif()
   if(NOT MSVC_CLANG AND NOT SDL_CPU_ARM32)
     set_property(TARGET SDL2 APPEND_STRING PROPERTY LINK_FLAGS " /NODEFAULTLIB")
diff --git a/src/Makefile.vc b/src/Makefile.vc
index 47e12ef..72a93e5 100644
--- a/src/Makefile.vc
+++ b/src/Makefile.vc
@@ -3,6 +3,9 @@
 #
 # If you specifically want to build for x86:
 #	nmake /f Makefile.vc CPU=x86
+#
+# If you specifically want to build for x64:
+#	nmake /f Makefile.vc CPU=x64
 
 # change INCLUDES so it points to SDL3 headers directory:
 INCLUDES = -Iinclude
@@ -12,19 +15,27 @@ CPPFLAGS = -DNDEBUG -DDLL_EXPORT
 CC = cl
 LD = link
 RC = rc
+AS64 = ml64
 
 CFLAGS = /nologo /O2 /MD /W3 /GS-
 LDFLAGS = /nologo /DLL /NODEFAULTLIB /RELEASE
 LDLIBS = kernel32.lib user32.lib
+AS64FLAGS = /nologo /W3 /Ta
 
 !if "$(CPU)" == "x86"
 CFLAGS = $(CFLAGS) /arch:SSE
 !endif
+!if "$(CPU)" == "x64"
+X64_OBJ = sdl2_mslibc_x64.obj
+!endif
 
 DLLNAME = SDL2.dll
 IMPNAME = SDL2.lib
 
-OBJ = sdl2_compat.obj dynapi\SDL_dynapi.obj version.res
+OBJ = sdl2_compat.obj dynapi\SDL_dynapi.obj sdl2_mslibc.obj $(X64_OBJ) version.res
+
+.SUFFIXES:
+.SUFFIXES: .dll .obj .res .c .cpp .asm .masm .rc
 
 all: $(DLLNAME)
 
@@ -33,6 +44,8 @@ $(DLLNAME): $(OBJ)
 
 .c.obj:
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) /Fo$@ -c $<
+.masm.obj:
+	$(AS64) $(AS64FLAGS) $< /Fo$@ /c
 .rc.res:
 	$(RC) /r /Fo$@ $<
 
diff --git a/src/sdl2_compat.c b/src/sdl2_compat.c
index 485dad2..25fafb2 100644
--- a/src/sdl2_compat.c
+++ b/src/sdl2_compat.c
@@ -609,9 +609,6 @@ LoadSDL3(void)
 }
 
 #if defined(_MSC_VER) && !defined(SDL_BUILDING_WINRT)
-#ifdef _M_IX86
-#include "x86_msvc.h"
-#endif
 
 /* NOLINTNEXTLINE(readability-redundant-declaration) */
 extern void *memcpy(void *dst, const void *src, size_t len);
diff --git a/src/x86_msvc.h b/src/sdl2_mslibc.c
similarity index 85%
rename from src/x86_msvc.h
rename to src/sdl2_mslibc.c
index 4e88e4f..bfd4b21 100644
--- a/src/x86_msvc.h
+++ b/src/sdl2_mslibc.c
@@ -19,15 +19,21 @@
   3. This notice may not be removed or altered from any source distribution.
 */
 
-/* taken from SDL_stdlib.c of SDL2 */
+/* taken from SDL_mslibc.c of SDL3 */
 
-/* *INDENT-OFF* */
+#if defined(_MSC_VER) && !defined(SDL_BUILDING_WINRT)
+
+#ifndef __FLTUSED__
+#define __FLTUSED__
+__declspec(selectany) int _fltused = 1;
+#endif
+
+#ifdef _M_IX86
 
 /* Float to long */
-void
-__declspec(naked)
-_ftol()
+void __declspec(naked) _ftol()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        ebp
         mov         ebp,esp
@@ -71,25 +77,23 @@ _ftol()
         leave
         ret
     }
+    /* *INDENT-ON* */
 }
 
-void
-_ftol2()
+void _ftol2_sse()
 {
     _ftol();
 }
 
-void
-_ftol2_sse()
+void _ftol2()
 {
     _ftol();
 }
 
 /* 64-bit math operators for 32-bit systems */
-void
-__declspec(naked)
-_allmul()
+void __declspec(naked) _allmul()
 {
+    /* *INDENT-OFF* */
     __asm {
         mov         eax, dword ptr[esp+8]
         mov         ecx, dword ptr[esp+10h]
@@ -112,12 +116,12 @@ _allmul()
         pop         ebx
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_alldiv()
+void __declspec(naked) _alldiv()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        edi
         push        esi
@@ -198,12 +202,12 @@ _alldiv()
         pop         edi
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_aulldiv()
+void __declspec(naked) _aulldiv()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        ebx
         push        esi
@@ -254,12 +258,12 @@ _aulldiv()
         pop         ebx
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_allrem()
+void __declspec(naked) _allrem()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        ebx
         push        edi
@@ -339,12 +343,12 @@ _allrem()
         pop         ebx
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_aullrem()
+void __declspec(naked) _aullrem()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        ebx
         mov         eax,dword ptr [esp+14h]
@@ -396,12 +400,12 @@ _aullrem()
         pop         ebx
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_alldvrm()
+void __declspec(naked) _alldvrm()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        edi
         push        esi
@@ -504,12 +508,12 @@ _alldvrm()
         pop         edi
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_aulldvrm()
+void __declspec(naked) _aulldvrm()
 {
+    /* *INDENT-OFF* */
     __asm {
         push        esi
         mov         eax,dword ptr [esp+14h]
@@ -575,12 +579,12 @@ _aulldvrm()
         pop         esi
         ret         10h
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_allshl()
+void __declspec(naked) _allshl()
 {
+    /* *INDENT-OFF* */
     __asm {
         cmp         cl,40h
         jae         RETZERO
@@ -600,12 +604,12 @@ _allshl()
         xor         edx,edx
         ret
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_allshr()
+void __declspec(naked) _allshr()
 {
+    /* *INDENT-OFF* */
     __asm {
         cmp         cl,3Fh
         jae         RETSIGN
@@ -625,12 +629,12 @@ _allshr()
         mov         eax,edx
         ret
     }
+    /* *INDENT-ON* */
 }
 
-void
-__declspec(naked)
-_aullshr()
+void __declspec(naked) _aullshr()
 {
+    /* *INDENT-OFF* */
     __asm {
         cmp         cl,40h
         jae         RETZERO
@@ -650,7 +654,82 @@ _aullshr()
         xor         edx,edx
         ret
     }
+    /* *INDENT-ON* */
 }
-/* *INDENT-ON* */
 
-/* vi: set ts=4 sw=4 expandtab: */
+void __declspec(naked) _chkstk(void)
+{
+    __asm {
+        push        ecx
+        mov         ecx,esp     ; lea         ecx,dword ptr [esp]+4
+        add         ecx,4
+        sub         ecx,eax
+        sbb         eax,eax
+        not         eax
+        and         ecx,eax
+        mov         eax,esp
+        and         eax,0xfffff000
+L1:
+        cmp         ecx,eax
+        jb          short L2
+        mov         eax,ecx
+        pop         ecx
+        xchg        esp,eax
+        mov         eax,dword ptr [eax]
+        mov         dword ptr [esp],eax
+        ret
+L2:
+        sub         eax,0x1000
+        test        dword ptr [eax],eax
+        jmp         short L1
+    }
+}
+
+void __declspec(naked) _alloca_probe_8(void)
+{
+    /* *INDENT-OFF* */
+    __asm {
+        push        ecx
+        mov         ecx,esp     ; lea         ecx,dword ptr [esp]+8
+        add         ecx,8
+        sub         ecx,eax
+        and         ecx,0x7
+        add         eax,ecx
+        sbb         ecx,ecx
+        or          eax,ecx
+        pop         ecx
+        jmp         _chkstk
+    }
+    /* *INDENT-ON* */
+}
+
+void __declspec(naked) _alloca_probe_16(void)
+{
+    /* *INDENT-OFF* */
+    __asm {
+        push        ecx
+        mov         ecx,esp     ; lea         ecx,dword ptr [esp]+8
+        add         ecx,8
+        sub         ecx,eax
+        and         ecx,0xf
+        add         eax,ecx
+        sbb         ecx,ecx
+        or          eax,ecx
+        pop         ecx
+        jmp         _chkstk
+    }
+    /* *INDENT-ON* */
+}
+
+#endif /* _M_IX86 */
+
+#ifdef _M_ARM64
+
+/* !!! FIXME !!! */
+void __chkstk(void);
+void __chkstk() {
+}
+
+#endif
+
+#endif /* MSC_VER */
diff --git a/src/sdl2_mslibc_x64.masm b/src/sdl2_mslibc_x64.masm
new file mode 100644
index 0000000..1590d88
--- /dev/null
+++ b/src/sdl2_mslibc_x64.masm
@@ -0,0 +1,29 @@
+include ksamd64.inc
+
+text        SEGMENT EXECUTE
+
+public      __chkstk
+
+__chkstk:
+    sub         rsp,010h
+    mov         QWORD PTR [rsp],r10
+    mov         QWORD PTR [rsp+08h],r11
+    xor         r11,r11
+    lea         r10,[rsp+018h]
+    sub         r10,rax
+    cmovb       r10,r11
+    mov         r11,QWORD PTR gs:[TeStackLimit]
+    cmp         r10,r11
+    jae         chkstk_finish
+    and         r10w,0f000h
+chkstk_loop:
+    lea         r11,[r11-PAGE_SIZE]
+    mov         BYTE PTR [r11],0h
+    cmp         r10,r11
+    jne         chkstk_loop
+chkstk_finish:
+    mov         r10,QWORD PTR [rsp]
+    mov         r11,QWORD PTR [rsp+08h]
+    add         rsp,010h
+    ret
+end