SDL: IME Composition Truncation + SDL_IsTextInputShown + SDL_ClearComposition (#5398)

From d14a12638309a3cba55bbb245df3347379b86f8f Mon Sep 17 00:00:00 2001
From: Zach Reedy <[EMAIL REDACTED]>
Date: Fri, 11 Mar 2022 17:45:17 -0500
Subject: [PATCH] IME Composition Truncation + SDL_IsTextInputShown +
 SDL_ClearComposition (#5398)

* Fixes for IME Composition Truncation + Addition of SDL_ClearComposition, SDL_IsTextInputShown

* Fixed: Documentation and code style issues raised during code review.
---
 include/SDL_events.h                    |  15 +++
 include/SDL_hints.h                     |  11 ++
 include/SDL_keyboard.h                  |  18 ++++
 src/dynapi/SDL_dynapi_overrides.h       |   2 +
 src/dynapi/SDL_dynapi_procs.h           |   2 +
 src/events/SDL_keyboard.c               |  16 +++
 src/video/SDL_sysvideo.h                |   2 +
 src/video/SDL_video.c                   |  18 ++++
 src/video/windows/SDL_windowskeyboard.c | 129 ++++++++++++++++++++++--
 src/video/windows/SDL_windowskeyboard.h |   2 +
 src/video/windows/SDL_windowsvideo.c    |   2 +
 src/video/windows/SDL_windowsvideo.h    |   5 +-
 12 files changed, 210 insertions(+), 12 deletions(-)

diff --git a/include/SDL_events.h b/include/SDL_events.h
index 3722a6311a8..7e469070ecd 100644
--- a/include/SDL_events.h
+++ b/include/SDL_events.h
@@ -102,6 +102,7 @@ typedef enum
     SDL_KEYMAPCHANGED,          /**< Keymap changed due to a system event such as an
                                      input language or keyboard layout change.
                                 */
+    SDL_TEXTEDITING_EXT,       /**< Extended keyboard text editing (composition) */
 
     /* Mouse events */
     SDL_MOUSEMOTION    = 0x400, /**< Mouse moved */
@@ -243,6 +244,19 @@ typedef struct SDL_TextEditingEvent
     Sint32 length;                              /**< The length of selected editing text */
 } SDL_TextEditingEvent;
 
+/**
+ *  \brief Extended keyboard text editing event structure (event.editExt.*) when text would be
+ *  truncated if stored in the text buffer SDL_TextEditingEvent
+ */
+typedef struct SDL_TextEditingExtEvent
+{
+    Uint32 type;                                /**< ::SDL_TEXTEDITING_EXT */
+    Uint32 timestamp;                           /**< In milliseconds, populated using SDL_GetTicks() */
+    Uint32 windowID;                            /**< The window with keyboard focus, if any */
+    char* text;                                 /**< The editing text, which should be freed with SDL_free(), and will not be NULL */
+    Sint32 start;                               /**< The start cursor of selected editing text */
+    Sint32 length;                              /**< The length of selected editing text */
+} SDL_TextEditingExtEvent;
 
 #define SDL_TEXTINPUTEVENT_TEXT_SIZE (32)
 /**
@@ -601,6 +615,7 @@ typedef union SDL_Event
     SDL_WindowEvent window;                 /**< Window event data */
     SDL_KeyboardEvent key;                  /**< Keyboard event data */
     SDL_TextEditingEvent edit;              /**< Text editing event data */
+    SDL_TextEditingExtEvent editExt;        /**< Extended text editing event data */
     SDL_TextInputEvent text;                /**< Text input event data */
     SDL_MouseMotionEvent motion;            /**< Mouse motion event data */
     SDL_MouseButtonEvent button;            /**< Mouse button event data */
diff --git a/include/SDL_hints.h b/include/SDL_hints.h
index 92fbb5778b2..384320e0665 100644
--- a/include/SDL_hints.h
+++ b/include/SDL_hints.h
@@ -579,6 +579,17 @@ extern "C" {
  */
 #define SDL_HINT_IME_SHOW_UI "SDL_IME_SHOW_UI"
 
+/**
+ * \brief A variable to control if extended IME text support is enabled.
+ * If enabled then SDL_TextEditingExtEvent will be issued if the text would be truncated otherwise.
+ * Additionally SDL_TextInputEvent will be dispatched multiple times so that it is not truncated.
+ *
+ * The variable can be set to the following values:
+ *   "0"       - Legacy behavior. Text can be truncated, no heap allocations. (default)
+ *   "1"       - Modern behavior.
+ */
+#define SDL_HINT_IME_SUPPORT_EXTENDED_TEXT "SDL_IME_SUPPORT_EXTENDED_TEXT"
+
 /**
  * \brief  A variable controlling whether the home indicator bar on iPhone X
  *         should be hidden.
diff --git a/include/SDL_keyboard.h b/include/SDL_keyboard.h
index 86824655c8c..5489cb9df08 100644
--- a/include/SDL_keyboard.h
+++ b/include/SDL_keyboard.h
@@ -268,6 +268,24 @@ extern DECLSPEC SDL_bool SDLCALL SDL_IsTextInputActive(void);
  */
 extern DECLSPEC void SDLCALL SDL_StopTextInput(void);
 
+/**
+ * Dismiss the composition window/IME without disabling the subsystem.
+ *
+ * \since This function is available since SDL 2.0.22
+ *
+ * \sa SDL_StartTextInput
+ * \sa SDL_StopTextInput
+ */
+extern DECLSPEC void SDLCALL SDL_ClearComposition(void);
+
+/**
+ * Returns if an IME Composite or Candidate window is currently shown.
+ *
+ * \since This function is available since SDL 2.0.22
+ *
+ */
+extern DECLSPEC SDL_bool SDLCALL SDL_IsTextInputShown(void);
+
 /**
  * Set the rectangle used to type Unicode text inputs.
  *
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 2573a042257..6e652e45a56 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -857,3 +857,5 @@
 #define SDL_PremultiplyAlpha SDL_PremultiplyAlpha_REAL
 #define SDL_AndroidSendMessage SDL_AndroidSendMessage_REAL
 #define SDL_GetTouchName SDL_GetTouchName_REAL
+#define SDL_ClearComposition SDL_ClearComposition_REAL
+#define SDL_IsTextInputShown SDL_IsTextInputShown_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index b1ea912cd5c..6323185767b 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -928,3 +928,5 @@ SDL_DYNAPI_PROC(int,SDL_PremultiplyAlpha,(int a, int b, Uint32 c, const void *d,
 SDL_DYNAPI_PROC(int,SDL_AndroidSendMessage,(Uint32 a, int b),(a,b),return)
 #endif
 SDL_DYNAPI_PROC(const char*,SDL_GetTouchName,(int a),(a),return)
+SDL_DYNAPI_PROC(void,SDL_ClearComposition,(void),(),)
+SDL_DYNAPI_PROC(SDL_bool,SDL_IsTextInputShown,(void),(),return)
diff --git a/src/events/SDL_keyboard.c b/src/events/SDL_keyboard.c
index bc2386b44c0..cd150fab699 100644
--- a/src/events/SDL_keyboard.c
+++ b/src/events/SDL_keyboard.c
@@ -894,6 +894,22 @@ SDL_SendEditingText(const char *text, int start, int length)
         event.edit.start = start;
         event.edit.length = length;
         SDL_utf8strlcpy(event.edit.text, text, SDL_arraysize(event.edit.text));
+
+        if (SDL_GetHintBoolean(SDL_HINT_IME_SUPPORT_EXTENDED_TEXT, SDL_FALSE) &&
+            SDL_strlen(text) > SDL_arraysize(event.text.text)) {
+            event.editExt.type = SDL_TEXTEDITING_EXT;
+            event.editExt.windowID = keyboard->focus ? keyboard->focus->id : 0;
+            event.editExt.text = text ? SDL_strdup(text) : NULL;
+            event.editExt.start = start;
+            event.editExt.length = length;
+        } else {
+            event.edit.type = SDL_TEXTEDITING;
+            event.edit.windowID = keyboard->focus ? keyboard->focus->id : 0;
+            event.edit.start = start;
+            event.edit.length = length;
+            SDL_utf8strlcpy(event.edit.text, text, SDL_arraysize(event.edit.text));
+        }
+
         posted = (SDL_PushEvent(&event) > 0);
     }
     return (posted);
diff --git a/src/video/SDL_sysvideo.h b/src/video/SDL_sysvideo.h
index 470c14ba352..e2a65045ba7 100644
--- a/src/video/SDL_sysvideo.h
+++ b/src/video/SDL_sysvideo.h
@@ -307,6 +307,8 @@ struct SDL_VideoDevice
     void (*StartTextInput) (_THIS);
     void (*StopTextInput) (_THIS);
     void (*SetTextInputRect) (_THIS, SDL_Rect *rect);
+    void (*ClearComposition) (_THIS);
+    SDL_bool (*IsTextInputShown) (_THIS);
 
     /* Screen keyboard */
     SDL_bool (*HasScreenKeyboardSupport) (_THIS);
diff --git a/src/video/SDL_video.c b/src/video/SDL_video.c
index 143f1172b8f..2648c44c6e4 100644
--- a/src/video/SDL_video.c
+++ b/src/video/SDL_video.c
@@ -4135,6 +4135,24 @@ SDL_StartTextInput(void)
     }
 }
 
+void
+SDL_ClearComposition(void)
+{
+    if (_this && _this->ClearComposition) {
+        _this->ClearComposition(_this);
+    }
+}
+
+SDL_bool
+SDL_IsTextInputShown(void)
+{
+    if (_this && _this->IsTextInputShown) {
+        return _this->IsTextInputShown(_this);
+    }
+
+    return SDL_FALSE;
+}
+
 SDL_bool
 SDL_IsTextInputActive(void)
 {
diff --git a/src/video/windows/SDL_windowskeyboard.c b/src/video/windows/SDL_windowskeyboard.c
index 6ab440dddfc..d9bd32a81cd 100644
--- a/src/video/windows/SDL_windowskeyboard.c
+++ b/src/video/windows/SDL_windowskeyboard.c
@@ -36,6 +36,8 @@ static void IME_Init(SDL_VideoData *videodata, HWND hwnd);
 static void IME_Enable(SDL_VideoData *videodata, HWND hwnd);
 static void IME_Disable(SDL_VideoData *videodata, HWND hwnd);
 static void IME_Quit(SDL_VideoData *videodata);
+static void IME_ClearComposition(SDL_VideoData *videodata);
+static SDL_bool IME_IsTextInputShown(SDL_VideoData* videodata);
 #endif /* !SDL_DISABLE_WINDOWS_IME */
 
 #ifndef MAPVK_VK_TO_VSC
@@ -62,6 +64,8 @@ WIN_InitKeyboard(_THIS)
     data->ime_hwnd_main = 0;
     data->ime_hwnd_current = 0;
     data->ime_himc = 0;
+    data->ime_composition_length = 32 * sizeof(WCHAR);
+    data->ime_composition = (WCHAR*)SDL_malloc(data->ime_composition_length);
     data->ime_composition[0] = 0;
     data->ime_readingstring[0] = 0;
     data->ime_cursor = 0;
@@ -272,6 +276,18 @@ WIN_SetTextInputRect(_THIS, SDL_Rect *rect)
     }
 }
 
+void WIN_ClearComposition(_THIS)
+{
+    SDL_VideoData *videodata = (SDL_VideoData *)_this->driverdata;
+    IME_ClearComposition(videodata);
+}
+
+SDL_bool WIN_IsTextInputShown(_THIS)
+{
+    SDL_VideoData* videodata = (SDL_VideoData*)_this->driverdata;
+    return IME_IsTextInputShown(videodata);
+}
+
 static SDL_bool
 WIN_ShouldShowNativeUI()
 {
@@ -742,18 +758,51 @@ IME_ClearComposition(SDL_VideoData *videodata)
     SDL_SendEditingText("", 0, 0);
 }
 
+static SDL_bool
+IME_IsTextInputShown(SDL_VideoData* videodata)
+{
+    BOOL result;
+    HIMC himc;
+
+    if (!videodata->ime_initialized || !videodata->ime_available || !videodata->ime_enabled)
+        return SDL_FALSE;
+
+    return videodata->ime_uicontext != 0 ? SDL_TRUE : SDL_FALSE;
+}
+
 static void
 IME_GetCompositionString(SDL_VideoData *videodata, HIMC himc, DWORD string)
 {
-    LONG length = ImmGetCompositionStringW(himc, string, videodata->ime_composition, sizeof(videodata->ime_composition) - sizeof(videodata->ime_composition[0]));
+    LONG length;
+    DWORD dwLang = ((DWORD_PTR)videodata->ime_hkl & 0xffff);
+
+    length = ImmGetCompositionStringW(himc, string, NULL, 0);
+    if (length > 0 && videodata->ime_composition_length < length) {
+        if (videodata->ime_composition != NULL)
+            SDL_free(videodata->ime_composition);
+
+        videodata->ime_composition = (WCHAR*)SDL_malloc(length + sizeof(WCHAR));
+        videodata->ime_composition_length = length;
+    }
+
+    length = ImmGetCompositionStringW(
+        himc,
+        string,
+        videodata->ime_composition,
+        videodata->ime_composition_length
+    );
+
     if (length < 0)
         length = 0;
 
-    length /= sizeof(videodata->ime_composition[0]);
+    length /= sizeof(WCHAR);
     videodata->ime_cursor = LOWORD(ImmGetCompositionStringW(himc, GCS_CURSORPOS, 0, 0));
-    if (videodata->ime_cursor > 0 &&
-        videodata->ime_cursor < SDL_arraysize(videodata->ime_composition) &&
-        videodata->ime_composition[videodata->ime_cursor] == 0x3000) {
+    if ((dwLang == LANG_CHT || dwLang == LANG_CHS) &&
+        videodata->ime_cursor > 0 &&
+        videodata->ime_cursor < videodata->ime_composition_length / sizeof(WCHAR) &&
+        (videodata->ime_composition[0] == 0x3000 || videodata->ime_composition[0] == 0x0020)) {
+        // Traditional Chinese IMEs add a placeholder U+3000
+        // Simplified Chinese IMEs seem to add a placholder U+0020 sometimes
         int i;
         for (i = videodata->ime_cursor + 1; i < length; ++i)
             videodata->ime_composition[i - 1] = videodata->ime_composition[i];
@@ -762,6 +811,39 @@ IME_GetCompositionString(SDL_VideoData *videodata, HIMC himc, DWORD string)
     }
 
     videodata->ime_composition[length] = 0;
+
+    // Get the correct caret position if we've selected a candidate from the candidate window
+    if (videodata->ime_cursor == 0 && length > 0) {
+        Sint32 start = 0;
+        Sint32 end = 0;
+
+        length = ImmGetCompositionStringW(himc, GCS_COMPATTR, NULL, 0);
+        if (length > 0) {
+            Uint8* attributes = (Uint8*)SDL_malloc(length);
+            ImmGetCompositionString(himc, GCS_COMPATTR, attributes, length);
+
+            for (start = 0; start < length; ++start) {
+                if (attributes[start] == ATTR_TARGET_CONVERTED ||
+                    attributes[start] == ATTR_TARGET_NOTCONVERTED)
+                    break;
+            }
+
+            for (end = start; end < length; ++end) {
+                if (attributes[end] != ATTR_TARGET_CONVERTED &&
+                    attributes[end] != ATTR_TARGET_NOTCONVERTED)
+                    break;
+            }
+
+            if (start == length) {
+                start = 0;
+                end = length;
+            }
+
+            SDL_free(attributes);
+        }
+
+        videodata->ime_cursor = end;
+    }
 }
 
 static void
@@ -780,22 +862,30 @@ IME_SendInputEvent(SDL_VideoData *videodata)
 static void
 IME_SendEditingEvent(SDL_VideoData *videodata)
 {
-    char *s = 0;
-    WCHAR buffer[SDL_TEXTEDITINGEVENT_TEXT_SIZE];
-    const size_t size = SDL_arraysize(buffer);
-    buffer[0] = 0;
+    char *s = NULL;
+    WCHAR *buffer = NULL;
+    size_t size = videodata->ime_composition_length;
     if (videodata->ime_readingstring[0]) {
         size_t len = SDL_min(SDL_wcslen(videodata->ime_composition), (size_t)videodata->ime_cursor);
+
+        size += sizeof(videodata->ime_readingstring);
+        buffer = (WCHAR*)SDL_malloc(size);
+        buffer[0] = 0;
+
         SDL_wcslcpy(buffer, videodata->ime_composition, len + 1);
         SDL_wcslcat(buffer, videodata->ime_readingstring, size);
         SDL_wcslcat(buffer, &videodata->ime_composition[len], size);
     }
     else {
+        buffer = (WCHAR*)SDL_malloc(size);
+        buffer[0] = 0;
         SDL_wcslcpy(buffer, videodata->ime_composition, size);
     }
+
     s = WIN_StringToUTF8W(buffer);
     SDL_SendEditingText(s, videodata->ime_cursor + (int)SDL_wcslen(videodata->ime_readingstring), 0);
     SDL_free(s);
+    SDL_free(buffer);
 }
 
 static void
@@ -914,6 +1004,15 @@ IME_HandleMessage(HWND hwnd, UINT msg, WPARAM wParam, LPARAM *lParam, SDL_VideoD
         return SDL_FALSE;
 
     switch (msg) {
+    case WM_KEYDOWN:
+        if (wParam == VK_PROCESSKEY)
+        {
+            videodata->ime_uicontext = 1;
+            trap = SDL_TRUE;
+        }
+        else
+            videodata->ime_uicontext = 0;
+        break;
     case WM_INPUTLANGCHANGE:
         IME_InputLangChanged(videodata);
         break;
@@ -922,14 +1021,17 @@ IME_HandleMessage(HWND hwnd, UINT msg, WPARAM wParam, LPARAM *lParam, SDL_VideoD
             *lParam = 0;
         }
         break;
-    case WM_IME_STARTCOMPOSITION:
+    case WM_IME_STARTCOMPOSITION: 
+        videodata->ime_suppress_endcomposition_event = SDL_FALSE;
         trap = SDL_TRUE;
         break;
     case WM_IME_COMPOSITION:
         trap = SDL_TRUE;
         himc = ImmGetContext(hwnd);
         if (*lParam & GCS_RESULTSTR) {
+            videodata->ime_suppress_endcomposition_event = SDL_TRUE;
             IME_GetCompositionString(videodata, himc, GCS_RESULTSTR);
+            SDL_SendEditingText("", 0, 0);
             IME_SendInputEvent(videodata);
         }
         if (*lParam & GCS_COMPSTR) {
@@ -942,10 +1044,13 @@ IME_HandleMessage(HWND hwnd, UINT msg, WPARAM wParam, LPARAM *lParam, SDL_VideoD
         ImmReleaseContext(hwnd, himc);
         break;
     case WM_IME_ENDCOMPOSITION:
+        videodata->ime_uicontext = 0;
         videodata->ime_composition[0] = 0;
         videodata->ime_readingstring[0] = 0;
         videodata->ime_cursor = 0;
-        SDL_SendEditingText("", 0, 0);
+        if (videodata->ime_suppress_endcomposition_event == SDL_FALSE)
+            SDL_SendEditingText("", 0, 0);
+        videodata->ime_suppress_endcomposition_event = SDL_FALSE;
         break;
     case WM_IME_NOTIFY:
         switch (wParam) {
@@ -959,10 +1064,12 @@ IME_HandleMessage(HWND hwnd, UINT msg, WPARAM wParam, LPARAM *lParam, SDL_VideoD
                 break;
 
             trap = SDL_TRUE;
+            videodata->ime_uicontext = 1;
             IME_GetCandidateList(hwnd, videodata);
             break;
         case IMN_CLOSECANDIDATE:
             trap = SDL_TRUE;
+            videodata->ime_uicontext = 0;
             IME_HideCandidateList(videodata);
             break;
         case IMN_PRIVATE:
diff --git a/src/video/windows/SDL_windowskeyboard.h b/src/video/windows/SDL_windowskeyboard.h
index 76048e78a2c..97382c7bd5d 100644
--- a/src/video/windows/SDL_windowskeyboard.h
+++ b/src/video/windows/SDL_windowskeyboard.h
@@ -32,6 +32,8 @@ extern void WIN_ResetDeadKeys(void);
 extern void WIN_StartTextInput(_THIS);
 extern void WIN_StopTextInput(_THIS);
 extern void WIN_SetTextInputRect(_THIS, SDL_Rect *rect);
+extern void WIN_ClearComposition(_THIS);
+extern SDL_bool WIN_IsTextInputShown(_THIS);
 
 extern SDL_bool IME_HandleMessage(HWND hwnd, UINT msg, WPARAM wParam, LPARAM *lParam, struct SDL_VideoData *videodata);
 
diff --git a/src/video/windows/SDL_windowsvideo.c b/src/video/windows/SDL_windowsvideo.c
index 39806930ba3..4bd2a9831cb 100644
--- a/src/video/windows/SDL_windowsvideo.c
+++ b/src/video/windows/SDL_windowsvideo.c
@@ -216,6 +216,8 @@ WIN_CreateDevice(int devindex)
     device->StartTextInput = WIN_StartTextInput;
     device->StopTextInput = WIN_StopTextInput;
     device->SetTextInputRect = WIN_SetTextInputRect;
+    device->ClearComposition = WIN_ClearComposition;
+    device->IsTextInputShown = WIN_IsTextInputShown;
 
     device->SetClipboardText = WIN_SetClipboardText;
     device->GetClipboardText = WIN_GetClipboardText;
diff --git a/src/video/windows/SDL_windowsvideo.h b/src/video/windows/SDL_windowsvideo.h
index daf68e6c2d6..d4208c40500 100644
--- a/src/video/windows/SDL_windowsvideo.h
+++ b/src/video/windows/SDL_windowsvideo.h
@@ -151,9 +151,11 @@ typedef struct SDL_VideoData
     SDL_bool ime_available;
     HWND ime_hwnd_main;
     HWND ime_hwnd_current;
+    SDL_bool ime_suppress_endcomposition_event;
     HIMC ime_himc;
 
-    WCHAR ime_composition[SDL_TEXTEDITINGEVENT_TEXT_SIZE];
+    WCHAR* ime_composition;
+    int ime_composition_length;
     WCHAR ime_readingstring[16];
     int ime_cursor;
 
@@ -189,6 +191,7 @@ typedef struct SDL_VideoData
     DWORD ime_convmodesinkcookie;
     TSFSink *ime_uielemsink;
     TSFSink *ime_ippasink;
+    LONG ime_uicontext;
 
     BYTE pre_hook_key_state[256];
     UINT _SDL_WAKEUP;