SDL: Fixing WM_CHAR event handling for Unicode characters outside the Basic Multilingual Plane.

From 712e0d1f06bf413213bc4a9d3be2ac8d9a34a9f9 Mon Sep 17 00:00:00 2001
From: "Henry G. Stratmann III" <[EMAIL REDACTED]>
Date: Sat, 7 Aug 2021 16:23:15 -0500
Subject: [PATCH] Fixing WM_CHAR event handling for Unicode characters outside
 the Basic Multilingual Plane.

---
 src/video/windows/SDL_windowsevents.c | 36 +++++++++++++++++++++++----
 src/video/windows/SDL_windowswindow.c |  1 +
 src/video/windows/SDL_windowswindow.h |  1 +
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/video/windows/SDL_windowsevents.c b/src/video/windows/SDL_windowsevents.c
index 1481b0314..960443dbe 100644
--- a/src/video/windows/SDL_windowsevents.c
+++ b/src/video/windows/SDL_windowsevents.c
@@ -862,15 +862,41 @@ WIN_WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam)
     case WM_UNICHAR:
         if (wParam == UNICODE_NOCHAR) {
             returnCode = 1;
-            break;
-        }
-        /* otherwise fall through to below */
-    case WM_CHAR:
-        {
+        } else {
             char text[5];
             if (WIN_ConvertUTF32toUTF8((UINT32)wParam, text)) {
                 SDL_SendKeyboardText(text);
             }
+            returnCode = 0;
+        }
+        break;
+
+    case WM_CHAR:
+        /* When a user enters a Unicode code point defined in the Basic Multilingual Plane, Windows sends a WM_CHAR
+           message with the code point encoded as UTF-16. When a user enters a Unicode code point from a Supplementary
+           Plane, Windows sends the code point in two separate WM_CHAR messages: The first message includes the UTF-16
+           High Surrogate and the second the UTF-16 Low Surrogate. The High and Low Surrogates cannot be individually
+           converted to valid UTF-8, therefore, we must save the High Surrogate from the first WM_CHAR message and
+           concatenate it with the Low Surrogate from the second WM_CHAR message. At that point, we have a valid
+           UTF-16 surrogate pair ready to re-encode as UTF-8. */
+        if (IS_HIGH_SURROGATE(wParam)) {
+            data->high_surrogate = (WCHAR)wParam;
+        } else if (IS_SURROGATE_PAIR(data->high_surrogate, wParam)) {
+            /* The code point is in a Supplementary Plane.
+               Here wParam is the Low Surrogate. */
+            const WCHAR surrogate_pair[] = {data->high_surrogate, (WCHAR)wParam, 0};
+            char *s;
+            s = SDL_iconv_string("UTF-8", "UTF-16LE", (const char *)surrogate_pair, sizeof(surrogate_pair));
+            SDL_SendKeyboardText(s);
+            SDL_free(s);
+            data->high_surrogate = 0;
+        } else {
+            /* The code point is in the Basic Multilingual Plane */
+            const WCHAR wchar[] = {(WCHAR)wParam, 0};
+            char *s;
+            s = SDL_iconv_string("UTF-8", "UTF-16LE", (const char *)wchar, sizeof(wchar));
+            SDL_SendKeyboardText(s);
+            SDL_free(s);
         }
         returnCode = 0;
         break;
diff --git a/src/video/windows/SDL_windowswindow.c b/src/video/windows/SDL_windowswindow.c
index 93d1903a7..75fe64cac 100644
--- a/src/video/windows/SDL_windowswindow.c
+++ b/src/video/windows/SDL_windowswindow.c
@@ -186,6 +186,7 @@ SetupWindowData(_THIS, SDL_Window * window, HWND hwnd, HWND parent, SDL_bool cre
     data->hdc = GetDC(hwnd);
     data->hinstance = (HINSTANCE) GetWindowLongPtr(hwnd, GWLP_HINSTANCE);
     data->created = created;
+    data->high_surrogate = 0;
     data->mouse_button_flags = 0;
     data->last_pointer_update = (LPARAM)-1;
     data->videodata = videodata;
diff --git a/src/video/windows/SDL_windowswindow.h b/src/video/windows/SDL_windowswindow.h
index 700566e9c..e64096cda 100644
--- a/src/video/windows/SDL_windowswindow.h
+++ b/src/video/windows/SDL_windowswindow.h
@@ -41,6 +41,7 @@ typedef struct
     SDL_bool created;
     WPARAM mouse_button_flags;
     LPARAM last_pointer_update;
+    WCHAR high_surrogate;
     SDL_bool initializing;
     SDL_bool expected_resize;
     SDL_bool in_border_change;