SDL: atomic: Fix missing full memory barrier on GCC/Clang (21ac6)

From 21ac66c8b3df481db293af3e52ca06c5c5a22d44 Mon Sep 17 00:00:00 2001
From: Cameron Gutman <[EMAIL REDACTED]>
Date: Sun, 19 Apr 2026 14:06:44 -0500
Subject: [PATCH] atomic: Fix missing full memory barrier on GCC/Clang

__sync_lock_test_and_set() is designed for creating locks, not as
a general atomic exchange function. As a result, it only provides
an acquire memory barrier and isn't guaranteed to actually store
the provided value (though it does on architectures we care about).

__atomic_exchange_n() is supported on GCC/Clang for the last ~10
years, so let's use that instead if available. We will keep the
__sync_lock_test_and_set() fallback around for ancient platforms,
but add a full memory barrier to match the documented behavior.

(cherry picked from commit 30de669b32b4b31c67c36171524cd539167fa916)
---
 CMakeLists.txt          |  1 +
 src/atomic/SDL_atomic.c | 34 ++++++++++++++++++++++++----------
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f9dea7091b9d..2ef50e0ef4e33 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2802,6 +2802,7 @@ elseif(PS2)
     gskit
     dmakit
     ps2_drivers
+    atomic
   )
 
 elseif(OS2)
diff --git a/src/atomic/SDL_atomic.c b/src/atomic/SDL_atomic.c
index bde7939ea8e37..35c3ca42cd8be 100644
--- a/src/atomic/SDL_atomic.c
+++ b/src/atomic/SDL_atomic.c
@@ -35,19 +35,17 @@
 #include <atomic.h>
 #endif
 
-/* The __atomic_load_n() intrinsic showed up in different times for different compilers. */
-#if defined(__clang__)
-#if __has_builtin(__atomic_load_n) || defined(HAVE_GCC_ATOMICS)
-/* !!! FIXME: this advertises as available in the NDK but uses an external symbol we don't have.
-   It might be in a later NDK or we might need an extra library? --ryan. */
-#if !defined(__ANDROID__)
+/* The __atomic intrinsics showed up in different times for different compilers. */
+#if (defined(__GNUC__) && (__GNUC__ >= 5)) || (defined(__clang__) && defined(HAVE_GCC_ATOMICS))
 #define HAVE_ATOMIC_LOAD_N 1
-#endif
-#endif
-#elif defined(__GNUC__)
-#if (__GNUC__ >= 5)
+#define HAVE_ATOMIC_EXCHANGE_N 1
+#else
+#if _SDL_HAS_BUILTIN(__atomic_load_n)
 #define HAVE_ATOMIC_LOAD_N 1
 #endif
+#if _SDL_HAS_BUILTIN(__atomic_exchange_n)
+#define HAVE_ATOMIC_EXCHANGE_N 1
+#endif
 #endif
 
 /* *INDENT-OFF* */ /* clang-format off */
@@ -190,7 +188,15 @@ int SDL_AtomicSet(SDL_atomic_t *a, int v)
     return _InterlockedExchange((long *)&a->value, v);
 #elif defined(HAVE_WATCOM_ATOMICS)
     return _SDL_xchg_watcom(&a->value, v);
+#elif defined(HAVE_ATOMIC_EXCHANGE_N)
+    return __atomic_exchange_n(&a->value, v, __ATOMIC_SEQ_CST);
 #elif defined(HAVE_GCC_ATOMICS)
+    /* __sync_lock_test_and_set() is designed for locking rather than a
+       generic atomic exchange, so it only provides an acquire barrier
+       and may not store the exact value on all architectures. We prefer
+       __atomic_exchange_n() instead on all modern compilers.
+    */
+    __sync_synchronize();
     return __sync_lock_test_and_set(&a->value, v);
 #elif defined(__SOLARIS__)
     return (int)atomic_swap_uint((volatile uint_t *)&a->value, v);
@@ -209,7 +215,15 @@ void *SDL_AtomicSetPtr(void **a, void *v)
     return _InterlockedExchangePointer(a, v);
 #elif defined(HAVE_WATCOM_ATOMICS)
     return (void *)_SDL_xchg_watcom((int *)a, (long)v);
+#elif defined(HAVE_ATOMIC_EXCHANGE_N)
+    return __atomic_exchange_n(a, v, __ATOMIC_SEQ_CST);
 #elif defined(HAVE_GCC_ATOMICS)
+    /* __sync_lock_test_and_set() is designed for locking rather than a
+       generic atomic exchange, so it only provides an acquire barrier
+       and may not store the exact value on all architectures. We prefer
+       __atomic_exchange_n() instead on all modern compilers.
+    */
+    __sync_synchronize();
     return __sync_lock_test_and_set(a, v);
 #elif defined(__SOLARIS__)
     return atomic_swap_ptr(a, v);