SDL: stdlib: Clean up and export SDL_UCS4ToUTF8().

From c3bf874abf713718586fbf66bba544958ce2785a Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Thu, 4 Jul 2024 01:09:46 -0400
Subject: [PATCH] stdlib: Clean up and export SDL_UCS4ToUTF8().

Also fix internal usage of the function.

Fixes #10157.
---
 include/SDL3/SDL_stdinc.h                   | 29 +++++++++++++++++
 src/SDL_utils.c                             | 25 --------------
 src/SDL_utils_c.h                           |  2 --
 src/dynapi/SDL_dynapi.sym                   |  1 +
 src/dynapi/SDL_dynapi_overrides.h           |  1 +
 src/dynapi/SDL_dynapi_procs.h               |  1 +
 src/stdlib/SDL_string.c                     | 36 +++++++++++++++++++++
 src/video/emscripten/SDL_emscriptenevents.c |  6 ++--
 src/video/windows/SDL_windowsevents.c       |  6 ++--
 9 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/include/SDL3/SDL_stdinc.h b/include/SDL3/SDL_stdinc.h
index c3628cc6b74ca..482ae8cf59cd8 100644
--- a/include/SDL3/SDL_stdinc.h
+++ b/include/SDL3/SDL_stdinc.h
@@ -1309,6 +1309,35 @@ extern SDL_DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *st
  */
 extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen);
 
+/**
+ * Convert a single Unicode codepoint to UTF-8.
+ *
+ * The buffer pointed to by `dst` must be at least 4 bytes long, as this
+ * function may generate between 1 and 4 bytes of output.
+ *
+ * This function returns the first byte _after_ the newly-written UTF-8
+ * sequence, which is useful for encoding multiple codepoints in a loop, or
+ * knowing where to write a NULL-terminator character to end the string (in
+ * either case, plan to have a buffer of _more_ than 4 bytes!).
+ *
+ * If `codepoint` is an invalid value (outside the Unicode range, or a UTF-16
+ * surrogate value, etc), this will use U+FFFD (REPLACEMENT CHARACTER) for
+ * the codepoint instead, and not set an error.
+ *
+ * If `dst` is NULL, this returns NULL immediately without writing to the
+ * pointer and without setting an error.
+ *
+ * \param codepoint a Unicode codepoint to convert to UTF-8.
+ * \param dst the location to write the encoded UTF-8. Must point to at least 4 bytes!
+ * \returns the first byte past the newly-written UTF-8 sequence.
+ *
+ * \threadsafety It is safe to call this function from any thread.
+ *
+ * \since This function is available since SDL 3.0.0.
+ */
+extern SDL_DECLSPEC char * SDLCALL SDL_UCS4ToUTF8(Uint32 codepoint, char *dst);
+
+
 extern SDL_DECLSPEC int SDLCALL SDL_sscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, ...) SDL_SCANF_VARARG_FUNC(2);
 extern SDL_DECLSPEC int SDLCALL SDL_vsscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, va_list ap) SDL_SCANF_VARARG_FUNCV(2);
 extern SDL_DECLSPEC int SDLCALL SDL_snprintf(SDL_OUT_Z_CAP(maxlen) char *text, size_t maxlen, SDL_PRINTF_FORMAT_STRING const char *fmt, ... ) SDL_PRINTF_VARARG_FUNC(3);
diff --git a/src/SDL_utils.c b/src/SDL_utils.c
index 0aaa3dc391bc3..222cf49a3ed13 100644
--- a/src/SDL_utils.c
+++ b/src/SDL_utils.c
@@ -101,31 +101,6 @@ SDL_bool SDL_endswith(const char *string, const char *suffix)
     return SDL_FALSE;
 }
 
-char *SDL_UCS4ToUTF8(Uint32 ch, char *dst)
-{
-    Uint8 *p = (Uint8 *)dst;
-    if (ch <= 0x7F) {
-        *p = (Uint8)ch;
-        ++dst;
-    } else if (ch <= 0x7FF) {
-        p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
-        p[1] = 0x80 | (Uint8)(ch & 0x3F);
-        dst += 2;
-    } else if (ch <= 0xFFFF) {
-        p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
-        p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
-        p[2] = 0x80 | (Uint8)(ch & 0x3F);
-        dst += 3;
-    } else {
-        p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
-        p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
-        p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
-        p[3] = 0x80 | (Uint8)(ch & 0x3F);
-        dst += 4;
-    }
-    return dst;
-}
-
 /* Assume we can wrap SDL_AtomicInt values and cast to Uint32 */
 SDL_COMPILE_TIME_ASSERT(sizeof_object_id, sizeof(int) == sizeof(Uint32));
 
diff --git a/src/SDL_utils_c.h b/src/SDL_utils_c.h
index fb83c30be9ef8..35d8543e8a04f 100644
--- a/src/SDL_utils_c.h
+++ b/src/SDL_utils_c.h
@@ -32,8 +32,6 @@ extern void SDL_CalculateFraction(float x, int *numerator, int *denominator);
 
 extern SDL_bool SDL_endswith(const char *string, const char *suffix);
 
-extern char *SDL_UCS4ToUTF8(Uint32 ch, char *dst);
-
 typedef enum
 {
     SDL_OBJECT_TYPE_UNKNOWN,
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index 1f5f8ca5429b7..284d1bda56b16 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -811,6 +811,7 @@ SDL3_0.0.0 {
     SDL_TryLockRWLockForWriting;
     SDL_TryLockSpinlock;
     SDL_TryWaitSemaphore;
+    SDL_UCS4ToUTF8;
     SDL_UnbindAudioStream;
     SDL_UnbindAudioStreams;
     SDL_UnloadObject;
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 36fec9b6485b1..701d5fe3291cc 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -836,6 +836,7 @@
 #define SDL_TryLockRWLockForWriting SDL_TryLockRWLockForWriting_REAL
 #define SDL_TryLockSpinlock SDL_TryLockSpinlock_REAL
 #define SDL_TryWaitSemaphore SDL_TryWaitSemaphore_REAL
+#define SDL_UCS4ToUTF8 SDL_UCS4ToUTF8_REAL
 #define SDL_UnbindAudioStream SDL_UnbindAudioStream_REAL
 #define SDL_UnbindAudioStreams SDL_UnbindAudioStreams_REAL
 #define SDL_UnloadObject SDL_UnloadObject_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 16c54b365896a..75fd300e19183 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -846,6 +846,7 @@ SDL_DYNAPI_PROC(int,SDL_TryLockRWLockForReading,(SDL_RWLock *a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_TryLockRWLockForWriting,(SDL_RWLock *a),(a),return)
 SDL_DYNAPI_PROC(SDL_bool,SDL_TryLockSpinlock,(SDL_SpinLock *a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_TryWaitSemaphore,(SDL_Semaphore *a),(a),return)
+SDL_DYNAPI_PROC(char*,SDL_UCS4ToUTF8,(Uint32 a, char *b),(a,b),return)
 SDL_DYNAPI_PROC(void,SDL_UnbindAudioStream,(SDL_AudioStream *a),(a),)
 SDL_DYNAPI_PROC(void,SDL_UnbindAudioStreams,(SDL_AudioStream **a, int b),(a,b),)
 SDL_DYNAPI_PROC(void,SDL_UnloadObject,(void *a),(a),)
diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c
index 554a746ff45d6..cd6cd1868c04c 100644
--- a/src/stdlib/SDL_string.c
+++ b/src/stdlib/SDL_string.c
@@ -42,6 +42,42 @@
 SDL_COMPILE_TIME_ASSERT(sizeof_wchar_t, sizeof(wchar_t) == SDL_SIZEOF_WCHAR_T);
 
 
+char *SDL_UCS4ToUTF8(Uint32 codepoint, char *dst)
+{
+    if (!dst) {
+        return NULL;  // I guess...?
+    } else if (codepoint > 0x10FFFF) {  // Outside the range of Unicode codepoints (also, larger than can be encoded in 4 bytes of UTF-8!).
+        codepoint = SDL_INVALID_UNICODE_CODEPOINT;
+    } else if ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) {  // UTF-16 surrogate values are illegal in UTF-8.
+        codepoint = SDL_INVALID_UNICODE_CODEPOINT;
+    }
+
+    Uint8 *p = (Uint8 *)dst;
+    if (codepoint <= 0x7F) {
+        *p = (Uint8)codepoint;
+        ++dst;
+    } else if (codepoint <= 0x7FF) {
+        p[0] = 0xC0 | (Uint8)((codepoint >> 6) & 0x1F);
+        p[1] = 0x80 | (Uint8)(codepoint & 0x3F);
+        dst += 2;
+    } else if (codepoint <= 0xFFFF) {
+        p[0] = 0xE0 | (Uint8)((codepoint >> 12) & 0x0F);
+        p[1] = 0x80 | (Uint8)((codepoint >> 6) & 0x3F);
+        p[2] = 0x80 | (Uint8)(codepoint & 0x3F);
+        dst += 3;
+    } else {
+        SDL_assert(codepoint <= 0x10FFFF);
+        p[0] = 0xF0 | (Uint8)((codepoint >> 18) & 0x07);
+        p[1] = 0x80 | (Uint8)((codepoint >> 12) & 0x3F);
+        p[2] = 0x80 | (Uint8)((codepoint >> 6) & 0x3F);
+        p[3] = 0x80 | (Uint8)(codepoint & 0x3F);
+        dst += 4;
+    }
+
+    return dst;
+}
+
+
 // this expects `from` and `to` to be UTF-32 encoding!
 int SDL_CaseFoldUnicode(const Uint32 from, Uint32 *to)
 {
diff --git a/src/video/emscripten/SDL_emscriptenevents.c b/src/video/emscripten/SDL_emscriptenevents.c
index f531e47b54733..74fc3f1568e4e 100644
--- a/src/video/emscripten/SDL_emscriptenevents.c
+++ b/src/video/emscripten/SDL_emscriptenevents.c
@@ -543,9 +543,9 @@ static EM_BOOL Emscripten_HandleKeyPress(int eventType, const EmscriptenKeyboard
 
     if (SDL_TextInputActive(window_data->window)) {
         char text[5];
-        if (SDL_UCS4ToUTF8(keyEvent->charCode, text)) {
-            SDL_SendKeyboardText(text);
-        }
+        char *end = SDL_UCS4ToUTF8(keyEvent->charCode, text);
+        *end = '\0';
+        SDL_SendKeyboardText(text);
         return EM_TRUE;
     }
     return EM_FALSE;
diff --git a/src/video/windows/SDL_windowsevents.c b/src/video/windows/SDL_windowsevents.c
index 8e6485b3cbf65..32a35076175df 100644
--- a/src/video/windows/SDL_windowsevents.c
+++ b/src/video/windows/SDL_windowsevents.c
@@ -1286,9 +1286,9 @@ LRESULT CALLBACK WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lPara
         } else {
             if (SDL_TextInputActive(data->window)) {
                 char text[5];
-                if (SDL_UCS4ToUTF8((Uint32)wParam, text) != text) {
-                    SDL_SendKeyboardText(text);
-                }
+                char *end = SDL_UCS4ToUTF8((Uint32)wParam, text);
+                *end = '\0';
+                SDL_SendKeyboardText(text);
             }
             returnCode = 0;
         }