SDL: N3DS: Semaphore fixes. (#6776)

From 053ce39d67d13658f035e6ddfcae5dd1175021df Mon Sep 17 00:00:00 2001
From: Pierre Wendling <[EMAIL REDACTED]>
Date: Sun, 11 Dec 2022 14:27:16 -0500
Subject: [PATCH] N3DS: Semaphore fixes. (#6776)

* N3DS: Make Sem waits cooperative friendly.

The 3DS has a cooperative threading model. Sleeping after TryWait and
WaitTimeout avoid starving other threads. It inccurs a runtime penalty,
but it's better than having to hard reset your console to recover from
a deadlock.
---
 docs/README-n3ds.md             |  1 +
 src/thread/n3ds/SDL_syssem.c    | 53 +++++++++++++++++----------------
 src/thread/n3ds/SDL_systhread.c |  3 +-
 test/testsem.c                  |  2 ++
 4 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/docs/README-n3ds.md b/docs/README-n3ds.md
index 647720bc8527..933952bb1120 100644
--- a/docs/README-n3ds.md
+++ b/docs/README-n3ds.md
@@ -25,3 +25,4 @@ cmake --install build
 -   SDL3_main should be used to ensure ROMFS is enabled.
 -   By default, the extra L2 cache and higher clock speeds of the New 2/3DS lineup are enabled. If you wish to turn it off, use `osSetSpeedupEnable(false)` in your main function.
 -   `SDL_GetBasePath` returns the romfs root instead of the executable's directory.
+-   The Nintendo 3DS uses a cooperative threading model on a single core, meaning a thread will never yield unless done manually through the `SDL_Delay` functions, or blocking waits (`SDL_LockMutex`, `SDL_SemWait`, `SDL_CondWait`, `SDL_WaitThread`). To avoid starving other threads, `SDL_SemTryWait` and `SDL_SemWaitTimeout` will yield if they fail to acquire the semaphore, see https://github.com/libsdl-org/SDL/pull/6776 for more information.
diff --git a/src/thread/n3ds/SDL_syssem.c b/src/thread/n3ds/SDL_syssem.c
index 8cd381f5c0bf..98883b0ee846 100644
--- a/src/thread/n3ds/SDL_syssem.c
+++ b/src/thread/n3ds/SDL_syssem.c
@@ -26,13 +26,14 @@
 
 #include <3ds.h>
 
+int WaitOnSemaphoreFor(SDL_sem *sem, Sint64 timeout);
+
 struct SDL_semaphore
 {
     LightSemaphore semaphore;
 };
 
-SDL_sem *
-SDL_CreateSemaphore(Uint32 initial_value)
+SDL_sem *SDL_CreateSemaphore(Uint32 initial_value)
 {
     SDL_sem *sem;
 
@@ -57,44 +58,46 @@ SDL_CreateSemaphore(Uint32 initial_value)
 */
 void SDL_DestroySemaphore(SDL_sem *sem)
 {
-    if (sem) {
-        SDL_free(sem);
-    }
+    SDL_free(sem);
 }
 
 int SDL_SemWaitTimeoutNS(SDL_sem *sem, Sint64 timeoutNS)
 {
-    int retval;
-
     if (sem == NULL) {
         return SDL_InvalidParamError("sem");
     }
 
     if (timeoutNS == SDL_MUTEX_MAXWAIT) {
         LightSemaphore_Acquire(&sem->semaphore, 1);
-        retval = 0;
-    } else {
-        int return_code = LightSemaphore_TryAcquire(&sem->semaphore, 1);
-        if (return_code != 0) {
-            /* FIXME: Does this code guarantee a wall clock timeout here?
-             *        Can we handle sub-millisecond delays? */
-            u32 timeout = (u32)SDL_NS_TO_MS(timeoutNS);
-            for (u32 i = 0; i < timeout; i++) {
-                svcSleepThread(1000000LL);
-                return_code = LightSemaphore_TryAcquire(&sem->semaphore, 1);
-                if (return_code == 0) {
-                    break;
-                }
-            }
+        return 0;
+    }
+
+    if (LightSemaphore_TryAcquire(&sem->semaphore, 1) != 0) {
+        return WaitOnSemaphoreFor(sem, timeoutNS);
+    }
+
+    return 0;
+}
+
+int WaitOnSemaphoreFor(SDL_sem *sem, Sint64 timeout)
+{
+    Uint64 stop_time = SDL_GetTicksNS() + timeout;
+    Uint64 current_time = SDL_GetTicksNS();
+    while (current_time < stop_time) {
+        if (LightSemaphore_TryAcquire(&sem->semaphore, 1) == 0) {
+            return 0;
         }
-        retval = return_code != 0 ? SDL_MUTEX_TIMEDOUT : 0;
+        /* 100 microseconds seems to be the sweet spot */
+        SDL_DelayNS(SDL_US_TO_NS(100));
+        current_time = SDL_GetTicksNS();
     }
 
-    return retval;
+    /* If we failed, yield to avoid starvation on busy waits */
+    SDL_DelayNS(1);
+    return SDL_MUTEX_TIMEDOUT;
 }
 
-Uint32
-SDL_SemValue(SDL_sem *sem)
+Uint32 SDL_SemValue(SDL_sem *sem)
 {
     if (sem == NULL) {
         SDL_InvalidParamError("sem");
diff --git a/src/thread/n3ds/SDL_systhread.c b/src/thread/n3ds/SDL_systhread.c
index 7631e38f1d01..077380601092 100644
--- a/src/thread/n3ds/SDL_systhread.c
+++ b/src/thread/n3ds/SDL_systhread.c
@@ -49,8 +49,9 @@ static void ThreadEntry(void *arg)
 
 int SDL_SYS_CreateThread(SDL_Thread *thread)
 {
-    s32 priority = N3DS_THREAD_PRIORITY_MEDIUM;
+    s32 priority;
     size_t stack_size = GetStackSize(thread->stacksize);
+    svcGetThreadPriority(&priority, CUR_THREAD_HANDLE);
 
     thread->handle = threadCreate(ThreadEntry,
                                   thread,
diff --git a/test/testsem.c b/test/testsem.c
index e3a8ba217a64..9b977ba1e5c3 100644
--- a/test/testsem.c
+++ b/test/testsem.c
@@ -208,6 +208,8 @@ TestOverheadContended(SDL_bool try_wait)
         }
         /* Make sure threads consumed everything */
         while (SDL_SemValue(sem)) {
+            /* Friendlier with cooperative threading models */
+            SDL_DelayNS(1);
         }
     }
     end_ticks = SDL_GetTicks();