SDL: Win32: Simplify Unicode text input code

From 7b628ea4d53c5b85954088570105764f20b6ff02 Mon Sep 17 00:00:00 2001
From: Dimitriy Ryazantcev <[EMAIL REDACTED]>
Date: Mon, 27 Nov 2023 16:15:24 +0200
Subject: [PATCH] Win32: Simplify Unicode text input code

---
 src/video/windows/SDL_windowsevents.c | 77 ++++++---------------------
 1 file changed, 17 insertions(+), 60 deletions(-)

diff --git a/src/video/windows/SDL_windowsevents.c b/src/video/windows/SDL_windowsevents.c
index d5381dd77cf0..dc56b0002b39 100644
--- a/src/video/windows/SDL_windowsevents.c
+++ b/src/video/windows/SDL_windowsevents.c
@@ -98,12 +98,6 @@
 #ifndef IS_HIGH_SURROGATE
 #define IS_HIGH_SURROGATE(x) (((x) >= 0xd800) && ((x) <= 0xdbff))
 #endif
-#ifndef IS_LOW_SURROGATE
-#define IS_LOW_SURROGATE(x) (((x) >= 0xdc00) && ((x) <= 0xdfff))
-#endif
-#ifndef IS_SURROGATE_PAIR
-#define IS_SURROGATE_PAIR(h, l) (IS_HIGH_SURROGATE(h) && IS_LOW_SURROGATE(l))
-#endif
 
 #ifndef USER_TIMER_MINIMUM
 #define USER_TIMER_MINIMUM 0x0000000A
@@ -394,39 +388,6 @@ static void WIN_UpdateFocus(SDL_Window *window, SDL_bool expect_focus)
 }
 #endif /*!defined(__XBOXONE__) && !defined(__XBOXSERIES__)*/
 
-static BOOL WIN_ConvertUTF32toUTF8(UINT32 codepoint, char *text)
-{
-    if (codepoint <= 0x7F) {
-        text[0] = (char)codepoint;
-        text[1] = '\0';
-    } else if (codepoint <= 0x7FF) {
-        text[0] = 0xC0 | (char)((codepoint >> 6) & 0x1F);
-        text[1] = 0x80 | (char)(codepoint & 0x3F);
-        text[2] = '\0';
-    } else if (codepoint <= 0xFFFF) {
-        text[0] = 0xE0 | (char)((codepoint >> 12) & 0x0F);
-        text[1] = 0x80 | (char)((codepoint >> 6) & 0x3F);
-        text[2] = 0x80 | (char)(codepoint & 0x3F);
-        text[3] = '\0';
-    } else if (codepoint <= 0x10FFFF) {
-        text[0] = 0xF0 | (char)((codepoint >> 18) & 0x0F);
-        text[1] = 0x80 | (char)((codepoint >> 12) & 0x3F);
-        text[2] = 0x80 | (char)((codepoint >> 6) & 0x3F);
-        text[3] = 0x80 | (char)(codepoint & 0x3F);
-        text[4] = '\0';
-    } else {
-        return SDL_FALSE;
-    }
-    return SDL_TRUE;
-}
-
-static BOOL WIN_ConvertUTF16toUTF8(UINT32 high_surrogate, UINT32 low_surrogate, char *text)
-{
-    const UINT32 SURROGATE_OFFSET = 0x10000U - (0xD800 << 10) - 0xDC00;
-    const UINT32 codepoint = (high_surrogate << 10) + low_surrogate + SURROGATE_OFFSET;
-    return WIN_ConvertUTF32toUTF8(codepoint, text);
-}
-
 static SDL_bool ShouldGenerateWindowCloseOnAltF4(void)
 {
     return !SDL_GetHintBoolean(SDL_HINT_WINDOWS_NO_CLOSE_ON_ALT_F4, SDL_FALSE);
@@ -885,7 +846,7 @@ WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam)
             returnCode = 1;
         } else {
             char text[5];
-            if (WIN_ConvertUTF32toUTF8((UINT32)wParam, text)) {
+            if (SDL_UCS4ToUTF8((Uint32)wParam, text) != text) {
                 SDL_SendKeyboardText(text);
             }
             returnCode = 0;
@@ -893,31 +854,27 @@ WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam)
         break;
 
     case WM_CHAR:
-        /* When a user enters a Unicode code point defined in the Basic Multilingual Plane, Windows sends a WM_CHAR
-           message with the code point encoded as UTF-16. When a user enters a Unicode code point from a Supplementary
-           Plane, Windows sends the code point in two separate WM_CHAR messages: The first message includes the UTF-16
-           High Surrogate and the second the UTF-16 Low Surrogate. The High and Low Surrogates cannot be individually
-           converted to valid UTF-8, therefore, we must save the High Surrogate from the first WM_CHAR message and
-           concatenate it with the Low Surrogate from the second WM_CHAR message. At that point, we have a valid
-           UTF-16 surrogate pair ready to re-encode as UTF-8. */
+        /* Characters outside Unicode Basic Multilingual Plane (BMP)
+         * are coded as so called "surrogate pair" in two separate character events.
+         * Cache high surrogate until next character event. */
         if (IS_HIGH_SURROGATE(wParam)) {
             data->high_surrogate = (WCHAR)wParam;
-        } else if (IS_SURROGATE_PAIR(data->high_surrogate, wParam)) {
-            /* The code point is in a Supplementary Plane.
-               Here wParam is the Low Surrogate. */
-            char text[5];
-            if (WIN_ConvertUTF16toUTF8((UINT32)data->high_surrogate, (UINT32)wParam, text)) {
-                SDL_SendKeyboardText(text);
-            }
-            data->high_surrogate = 0;
         } else {
-            /* The code point is in the Basic Multilingual Plane.
-               It's numerically equal to UTF-32. */
-            char text[5];
-            if (WIN_ConvertUTF32toUTF8((UINT32)wParam, text)) {
-                SDL_SendKeyboardText(text);
+            WCHAR utf16[] = {
+                data->high_surrogate ? data->high_surrogate : (WCHAR)wParam,
+                data->high_surrogate ? (WCHAR)wParam : L'\0',
+                L'\0'
+            };
+
+            char utf8[5];
+            int result = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, utf16, -1, utf8, sizeof(utf8), NULL, NULL);
+            if (result > 0) {
+                SDL_SendKeyboardText(utf8);
             }
+
+            data->high_surrogate = L'\0';
         }
+
         returnCode = 0;
         break;