From 4745aa33248dc629d8794402be39cbb9e71f5489 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Mon, 7 Oct 2024 16:56:35 -0400
Subject: [PATCH] asyncio: Windows IoRing support, other tweaks and fixes.
---
VisualC-GDK/SDL/SDL.vcxproj | 1 +
VisualC-GDK/SDL/SDL.vcxproj.filters | 3 +
VisualC/SDL/SDL.vcxproj | 1 +
VisualC/SDL/SDL.vcxproj.filters | 6 +
Xcode/SDL/SDL.xcodeproj/project.pbxproj | 12 +
src/file/SDL_sysasyncio.h | 8 +-
src/file/io_uring/SDL_asyncio_liburing.c | 73 +--
src/file/windows/SDL_asyncio_windows_ioring.c | 551 ++++++++++++++++++
8 files changed, 621 insertions(+), 34 deletions(-)
create mode 100644 src/file/windows/SDL_asyncio_windows_ioring.c
diff --git a/VisualC-GDK/SDL/SDL.vcxproj b/VisualC-GDK/SDL/SDL.vcxproj
index e00f1705b7666..34a9ae48817d4 100644
--- a/VisualC-GDK/SDL/SDL.vcxproj
+++ b/VisualC-GDK/SDL/SDL.vcxproj
@@ -522,6 +522,7 @@
<ClCompile Include="..\..\src\filesystem\windows\SDL_sysfsops.c" />
<ClCompile Include="..\..\src\file\generic\SDL_asyncio_generic.c" />
<ClCompile Include="..\..\src\file\SDL_asyncio.c" />
+ <ClCompile Include="..\..\src\file\windows\SDL_asyncio_windows_ioring.c" />
<ClCompile Include="..\..\src\main\gdk\SDL_sysmain_runapp.cpp" />
<ClCompile Include="..\..\src\main\generic\SDL_sysmain_callbacks.c" />
<ClCompile Include="..\..\src\main\SDL_main_callbacks.c" />
diff --git a/VisualC-GDK/SDL/SDL.vcxproj.filters b/VisualC-GDK/SDL/SDL.vcxproj.filters
index b6ff2e12fa17e..64cacdc1f8f0b 100644
--- a/VisualC-GDK/SDL/SDL.vcxproj.filters
+++ b/VisualC-GDK/SDL/SDL.vcxproj.filters
@@ -19,6 +19,9 @@
<ClCompile Include="..\..\src\file\SDL_asyncio.c">
<Filter>file</Filter>
</ClCompile>
+ <ClCompile Include="..\..\src\file\windows\SDL_asyncio_windows_ioring.c">
+ <Filter>file\windows</Filter>
+ </ClCompile>
<ClCompile Include="..\..\src\render\direct3d12\SDL_render_d3d12_xbox.cpp" />
<ClCompile Include="..\..\src\render\direct3d12\SDL_shaders_d3d12_xboxone.cpp" />
<ClCompile Include="..\..\src\render\direct3d12\SDL_shaders_d3d12_xboxseries.cpp" />
diff --git a/VisualC/SDL/SDL.vcxproj b/VisualC/SDL/SDL.vcxproj
index 6aae9e2eb8c97..19da967fd3579 100644
--- a/VisualC/SDL/SDL.vcxproj
+++ b/VisualC/SDL/SDL.vcxproj
@@ -415,6 +415,7 @@
<ClCompile Include="..\..\src\dialog\SDL_dialog_utils.c" />
<ClCompile Include="..\..\src\filesystem\SDL_filesystem.c" />
<ClCompile Include="..\..\src\filesystem\windows\SDL_sysfsops.c" />
+ <ClCompile Include="..\..\src\file\windows\SDL_asyncio_windows_ioring.c" />
<ClCompile Include="..\..\src\gpu\SDL_gpu.c" />
<ClCompile Include="..\..\src\gpu\d3d12\SDL_gpu_d3d12.c" />
<ClCompile Include="..\..\src\gpu\vulkan\SDL_gpu_vulkan.c" />
diff --git a/VisualC/SDL/SDL.vcxproj.filters b/VisualC/SDL/SDL.vcxproj.filters
index bfbd815fc7b96..5e6bd30ff12ab 100644
--- a/VisualC/SDL/SDL.vcxproj.filters
+++ b/VisualC/SDL/SDL.vcxproj.filters
@@ -214,6 +214,9 @@
<Filter Include="file\generic">
<UniqueIdentifier>{00004d6806b6238cae0ed62db5440000}</UniqueIdentifier>
</Filter>
+ <Filter Include="file\windows">
+ <UniqueIdentifier>{000028b2ea36d7190d13777a4dc70000}</UniqueIdentifier>
+ </Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\include\SDL3\SDL_begin_code.h">
@@ -962,6 +965,9 @@
<ClCompile Include="..\..\src\file\SDL_asyncio.c">
<Filter>file</Filter>
</ClCompile>
+ <ClCompile Include="..\..\src\file\windows\SDL_asyncio_windows_ioring.c">
+ <Filter>file\windows</Filter>
+ </ClCompile>
<ClCompile Include="..\..\src\main\generic\SDL_sysmain_callbacks.c">
<Filter>main\generic</Filter>
</ClCompile>
diff --git a/Xcode/SDL/SDL.xcodeproj/project.pbxproj b/Xcode/SDL/SDL.xcodeproj/project.pbxproj
index e26d575c8ab92..aa4de73ec5b08 100644
--- a/Xcode/SDL/SDL.xcodeproj/project.pbxproj
+++ b/Xcode/SDL/SDL.xcodeproj/project.pbxproj
@@ -552,6 +552,7 @@
00005081394CCF8322BE0000 /* SDL_sysasyncio.h in Headers */ = {isa = PBXBuildFile; fileRef = 0000585B2CAB450B40540000 /* SDL_sysasyncio.h */; };
000018AF97C08F2DAFFD0000 /* SDL_asyncio.h in Headers */ = {isa = PBXBuildFile; fileRef = 00004945A946DF5B1AED0000 /* SDL_asyncio.h */; settings = {ATTRIBUTES = (Public, ); }; };
00004D0B73767647AD550000 /* SDL_asyncio_generic.c in Sources */ = {isa = PBXBuildFile; fileRef = 0000FB02CDE4BE34A87E0000 /* SDL_asyncio_generic.c */; };
+ 0000A03C0F32C43816F40000 /* SDL_asyncio_windows_ioring.c in Sources */ = {isa = PBXBuildFile; fileRef = 000030DD21496B5C0F210000 /* SDL_asyncio_windows_ioring.c */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
@@ -1134,6 +1135,7 @@
0000585B2CAB450B40540000 /* SDL_sysasyncio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_sysasyncio.h; path = SDL_sysasyncio.h; sourceTree = "<group>"; };
00004945A946DF5B1AED0000 /* SDL_asyncio.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SDL_asyncio.h; path = SDL3/SDL_asyncio.h; sourceTree = "<group>"; };
0000FB02CDE4BE34A87E0000 /* SDL_asyncio_generic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = SDL_asyncio_generic.c; path = SDL_asyncio_generic.c; sourceTree = "<group>"; };
+ 000030DD21496B5C0F210000 /* SDL_asyncio_windows_ioring.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = SDL_asyncio_windows_ioring.c; path = SDL_asyncio_windows_ioring.c; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@@ -1947,6 +1949,7 @@
0000919399B1A908267F0000 /* SDL_asyncio_c.h */,
0000585B2CAB450B40540000 /* SDL_sysasyncio.h */,
000013C0F2EADC24ADC10000 /* generic */,
+ 000064F9A2AAE947C1CD0000 /* windows */,
);
path = file;
sourceTree = "<group>";
@@ -2447,6 +2450,14 @@
path = generic;
sourceTree = "<group>";
};
+ 000064F9A2AAE947C1CD0000 /* windows */ = {
+ isa = PBXGroup;
+ children = (
+ 000030DD21496B5C0F210000 /* SDL_asyncio_windows_ioring.c */,
+ );
+ path = windows;
+ sourceTree = "<group>";
+ };
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
@@ -3073,6 +3084,7 @@
6312C66D2B42341400A7BB00 /* SDL_murmur3.c in Sources */,
0000AEB9AE90228CA2D60000 /* SDL_asyncio.c in Sources */,
00004D0B73767647AD550000 /* SDL_asyncio_generic.c in Sources */,
+ 0000A03C0F32C43816F40000 /* SDL_asyncio_windows_ioring.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
diff --git a/src/file/SDL_sysasyncio.h b/src/file/SDL_sysasyncio.h
index 70824fc3c74bb..6fb84907e1e68 100644
--- a/src/file/SDL_sysasyncio.h
+++ b/src/file/SDL_sysasyncio.h
@@ -24,11 +24,17 @@
#ifndef SDL_sysasyncio_h_
#define SDL_sysasyncio_h_
+#if defined(SDL_PLATFORM_WINDOWS) && defined(NTDDI_WIN10_CO)
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && NTDDI_VERSION >= NTDDI_WIN10_CO
+#define HAVE_IORINGAPI_H
+#endif
+#endif
+
// If your platform has an option other than the "generic" code, make sure this
// is #defined to 0 instead and implement the SDL_SYS_* functions below in your
// backend (having them maybe call into the SDL_SYS_*_Generic versions as a
// fallback if the platform has functionality that isn't always available).
-#ifdef HAVE_LIBURING_H
+#if defined(HAVE_LIBURING_H) || defined(HAVE_IORINGAPI_H)
#define SDL_ASYNCIO_ONLY_HAVE_GENERIC 0
#else
#define SDL_ASYNCIO_ONLY_HAVE_GENERIC 1
diff --git a/src/file/io_uring/SDL_asyncio_liburing.c b/src/file/io_uring/SDL_asyncio_liburing.c
index eaae7d852a057..9d71dead159f1 100644
--- a/src/file/io_uring/SDL_asyncio_liburing.c
+++ b/src/file/io_uring/SDL_asyncio_liburing.c
@@ -130,6 +130,7 @@ static bool LoadLibUring(void)
result = LoadLibUringSyms();
if (result) {
static const int needed_ops[] = {
+ IORING_OP_NOP,
IORING_OP_FSYNC,
IORING_OP_TIMEOUT,
IORING_OP_CLOSE,
@@ -193,16 +194,21 @@ static void liburing_asyncioqueue_cancel_task(void *userdata, SDL_AsyncIOTask *t
}
LibUringAsyncIOQueueData *queuedata = (LibUringAsyncIOQueueData *) userdata;
+
+ // have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
+ SDL_LockMutex(queuedata->sqe_lock);
struct io_uring_sqe *sqe = liburing.io_uring_get_sqe(&queuedata->ring);
if (!sqe) {
- SDL_free(cancel_task);
- return; // oh well, the task can just finish on its own.
+ SDL_UnlockMutex(queuedata->sqe_lock);
+ SDL_free(cancel_task); // oh well, the task can just finish on its own.
+ return;
}
cancel_task->app_userdata = task;
liburing.io_uring_prep_cancel(sqe, task, 0);
liburing.io_uring_sqe_set_data(sqe, cancel_task);
liburing_asyncioqueue_queue_task(userdata, task);
+ SDL_UnlockMutex(queuedata->sqe_lock);
}
static SDL_AsyncIOTask *ProcessCQE(LibUringAsyncIOQueueData *queuedata, struct io_uring_cqe *cqe)
@@ -375,15 +381,15 @@ static bool liburing_asyncio_read(void *userdata, SDL_AsyncIOTask *task)
// have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
SDL_LockMutex(queuedata->sqe_lock);
+ bool retval;
struct io_uring_sqe *sqe = liburing.io_uring_get_sqe(&queuedata->ring);
if (!sqe) {
- return SDL_SetError("io_uring: submission queue is full");
+ retval = SDL_SetError("io_uring: submission queue is full");
+ } else {
+ liburing.io_uring_prep_read(sqe, fd, task->buffer, (unsigned) task->requested_size, task->offset);
+ liburing.io_uring_sqe_set_data(sqe, task);
+ retval = task->queue->iface.queue_task(task->queue->userdata, task);
}
-
- liburing.io_uring_prep_read(sqe, fd, task->buffer, (unsigned) task->requested_size, task->offset);
- liburing.io_uring_sqe_set_data(sqe, task);
-
- const bool retval = task->queue->iface.queue_task(task->queue->userdata, task);
SDL_UnlockMutex(queuedata->sqe_lock);
return retval;
}
@@ -401,15 +407,15 @@ static bool liburing_asyncio_write(void *userdata, SDL_AsyncIOTask *task)
// have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
SDL_LockMutex(queuedata->sqe_lock);
+ bool retval;
struct io_uring_sqe *sqe = liburing.io_uring_get_sqe(&queuedata->ring);
if (!sqe) {
- return SDL_SetError("io_uring: submission queue is full");
+ retval = SDL_SetError("io_uring: submission queue is full");
+ } else {
+ liburing.io_uring_prep_write(sqe, fd, task->buffer, (unsigned) task->requested_size, task->offset);
+ liburing.io_uring_sqe_set_data(sqe, task);
+ retval = task->queue->iface.queue_task(task->queue->userdata, task);
}
-
- liburing.io_uring_prep_write(sqe, fd, task->buffer, (unsigned) task->requested_size, task->offset);
- liburing.io_uring_sqe_set_data(sqe, task);
-
- const bool retval = task->queue->iface.queue_task(task->queue->userdata, task);
SDL_UnlockMutex(queuedata->sqe_lock);
return retval;
}
@@ -421,30 +427,31 @@ static bool liburing_asyncio_close(void *userdata, SDL_AsyncIOTask *task)
// have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
SDL_LockMutex(queuedata->sqe_lock);
+ bool retval;
struct io_uring_sqe *sqe = liburing.io_uring_get_sqe(&queuedata->ring);
if (!sqe) {
- return SDL_SetError("io_uring: submission queue is full");
- }
-
- if (task->flush) {
- struct io_uring_sqe *flush_sqe = sqe;
- sqe = liburing.io_uring_get_sqe(&queuedata->ring); // this will be our actual close task.
- if (!sqe) {
- liburing.io_uring_prep_nop(flush_sqe); // we already have the first sqe, just make it a NOP.
- liburing.io_uring_sqe_set_data(flush_sqe, NULL);
- task->queue->iface.queue_task(task->queue->userdata, task);
- return SDL_SetError("io_uring: submission queue is full");
+ retval = SDL_SetError("io_uring: submission queue is full");
+ } else {
+ if (task->flush) {
+ struct io_uring_sqe *flush_sqe = sqe;
+ sqe = liburing.io_uring_get_sqe(&queuedata->ring); // this will be our actual close task.
+ if (!sqe) {
+ liburing.io_uring_prep_nop(flush_sqe); // we already have the first sqe, just make it a NOP.
+ liburing.io_uring_sqe_set_data(flush_sqe, NULL);
+ task->queue->iface.queue_task(task->queue->userdata, task);
+ SDL_UnlockMutex(queuedata->sqe_lock);
+ return SDL_SetError("io_uring: submission queue is full");
+ }
+ liburing.io_uring_prep_fsync(flush_sqe, fd, IORING_FSYNC_DATASYNC);
+ liburing.io_uring_sqe_set_data(flush_sqe, task);
+ liburing.io_uring_sqe_set_flags(flush_sqe, IOSQE_IO_HARDLINK); // must complete before next sqe starts, and next sqe should run even if this fails.
}
- liburing.io_uring_prep_fsync(flush_sqe, fd, IORING_FSYNC_DATASYNC);
- liburing.io_uring_sqe_set_data(flush_sqe, task);
- liburing.io_uring_sqe_set_flags(flush_sqe, IOSQE_IO_HARDLINK); // must complete before next sqe starts, and next sqe should run even if this fails.
- }
-
- liburing.io_uring_prep_close(sqe, fd);
- liburing.io_uring_sqe_set_data(sqe, task);
+ liburing.io_uring_prep_close(sqe, fd);
+ liburing.io_uring_sqe_set_data(sqe, task);
- const bool retval = task->queue->iface.queue_task(task->queue->userdata, task);
+ retval = task->queue->iface.queue_task(task->queue->userdata, task);
+ }
SDL_UnlockMutex(queuedata->sqe_lock);
return retval;
}
diff --git a/src/file/windows/SDL_asyncio_windows_ioring.c b/src/file/windows/SDL_asyncio_windows_ioring.c
new file mode 100644
index 0000000000000..de22b2cdaf72b
--- /dev/null
+++ b/src/file/windows/SDL_asyncio_windows_ioring.c
@@ -0,0 +1,551 @@
+/*
+ Simple DirectMedia Layer
+ Copyright (C) 1997-2024 Sam Lantinga <slouken@libsdl.org>
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+// The Windows backend uses IoRing for asynchronous i/o, and falls back to
+// the "generic" threadpool implementation if it isn't available or
+// fails for some other reason. IoRing was introduced in Windows 11.
+
+#include "SDL_internal.h"
+#include "../SDL_sysasyncio.h"
+
+#ifdef HAVE_IORINGAPI_H
+
+#include "../../core/windows/SDL_windows.h"
+#include <ioringapi.h>
+
+// Don't know what the lowest usable version is, but this seems safe.
+#define SDL_REQUIRED_IORING_VERSION IORING_VERSION_3
+
+static SDL_InitState ioring_init;
+
+// We could add a whole bootstrap thing like the audio/video/etc subsystems use, but let's keep this simple for now.
+static bool (*CreateAsyncIOQueue)(SDL_AsyncIOQueue *queue);
+static void (*QuitAsyncIO)(void);
+static bool (*AsyncIOFromFile)(const char *file, const char *mode, SDL_AsyncIO *asyncio);
+
+// we never link directly to ioring.
+static const char *ioring_library = "KernelBase.dll";
+static void *ioring_handle = NULL;
+
+#define SDL_IORING_FUNCS \
+ SDL_IORING_FUNC(HRESULT, QueryIoRingCapabilities, (IORING_CAPABILITIES *capabilities)) \
+ SDL_IORING_FUNC(BOOL, IsIoRingOpSupported, (HIORING ioRing, IORING_OP_CODE op)) \
+ SDL_IORING_FUNC(HRESULT, CreateIoRing, (IORING_VERSION ioringVersion, IORING_CREATE_FLAGS flags, UINT32 submissionQueueSize, UINT32 completionQueueSize, HIORING* h)) \
+ SDL_IORING_FUNC(HRESULT, GetIoRingInfo, (HIORING ioRing, IORING_INFO* info)) \
+ SDL_IORING_FUNC(HRESULT, SubmitIoRing, (HIORING ioRing, UINT32 waitOperations, UINT32 milliseconds, UINT32* submittedEntries)) \
+ SDL_IORING_FUNC(HRESULT, CloseIoRing, (HIORING ioRing)) \
+ SDL_IORING_FUNC(HRESULT, PopIoRingCompletion, (HIORING ioRing, IORING_CQE* cqe)) \
+ SDL_IORING_FUNC(HRESULT, SetIoRingCompletionEvent, (HIORING ioRing, HANDLE hEvent)) \
+ SDL_IORING_FUNC(HRESULT, BuildIoRingCancelRequest, (HIORING ioRing, IORING_HANDLE_REF file, UINT_PTR opToCancel, UINT_PTR userData)) \
+ SDL_IORING_FUNC(HRESULT, BuildIoRingReadFile, (HIORING ioRing, IORING_HANDLE_REF fileRef, IORING_BUFFER_REF dataRef, UINT32 numberOfBytesToRead, UINT64 fileOffset, UINT_PTR userData, IORING_SQE_FLAGS sqeFlags)) \
+ SDL_IORING_FUNC(HRESULT, BuildIoRingWriteFile, (HIORING ioRing, IORING_HANDLE_REF fileRef, IORING_BUFFER_REF bufferRef, UINT32 numberOfBytesToWrite, UINT64 fileOffset, FILE_WRITE_FLAGS writeFlags, UINT_PTR userData, IORING_SQE_FLAGS sqeFlags)) \
+ SDL_IORING_FUNC(HRESULT, BuildIoRingFlushFile, (HIORING ioRing, IORING_HANDLE_REF fileRef, FILE_FLUSH_MODE flushMode, UINT_PTR userData, IORING_SQE_FLAGS sqeFlags)) \
+
+#define SDL_IORING_FUNC(ret, fn, args) typedef ret (WINAPI *SDL_fntype_##fn) args;
+SDL_IORING_FUNCS
+#undef SDL_IORING_FUNC
+
+typedef struct SDL_WinIoRingFunctions
+{
+ #define SDL_IORING_FUNC(ret, fn, args) SDL_fntype_##fn fn;
+ SDL_IORING_FUNCS
+ #undef SDL_IORING_FUNC
+} SDL_WinIoRingFunctions;
+
+static SDL_WinIoRingFunctions ioring;
+
+
+typedef struct WinIoRingAsyncIOQueueData
+{
+ SDL_Mutex *sqe_lock;
+ SDL_Mutex *cqe_lock;
+ HANDLE event;
+ HIORING ring;
+ SDL_AtomicInt num_waiting;
+} WinIoRingAsyncIOQueueData;
+
+
+static void UnloadWinIoRingLibrary(void)
+{
+ if (ioring_library) {
+ SDL_UnloadObject(ioring_handle);
+ ioring_library = NULL;
+ }
+ SDL_zero(ioring);
+}
+
+static bool LoadWinIoRingSyms(void)
+{
+ #define SDL_IORING_FUNC(ret, fn, args) { \
+ ioring.fn = (SDL_fntype_##fn) SDL_LoadFunction(ioring_handle, #fn); \
+ if (!ioring.fn) { \
+ return false; \
+ } \
+ }
+ SDL_IORING_FUNCS
+ #undef SDL_IORING_FUNC
+ return true;
+}
+
+static bool LoadWinIoRing(void)
+{
+ bool result = true;
+
+ if (!ioring_handle) {
+ ioring_handle = SDL_LoadObject(ioring_library);
+ if (!ioring_handle) {
+ result = false;
+ // Don't call SDL_SetError(): SDL_LoadObject already did.
+ } else {
+ result = LoadWinIoRingSyms();
+ if (result) {
+ IORING_CAPABILITIES caps;
+ HRESULT hr = ioring.QueryIoRingCapabilities(&caps);
+ if (FAILED(hr)) {
+ result = false;
+ } else if (caps.MaxVersion < SDL_REQUIRED_IORING_VERSION) {
+ result = false;
+ }
+ }
+
+ if (!result) {
+ UnloadWinIoRingLibrary();
+ }
+ }
+ }
+ return result;
+}
+
+static Sint64 ioring_asyncio_size(void *userdata)
+{
+ HANDLE handle = (HANDLE) userdata;
+ LARGE_INTEGER size;
+ if (!GetFileSizeEx(handle, &size)) {
+ WIN_SetError("GetFileSizeEx");
+ return -1;
+ }
+ return (Sint64) size.QuadPart;
+}
+
+// you must hold sqe_lock when calling this!
+static bool ioring_asyncioqueue_queue_task(void *userdata, SDL_AsyncIOTask *task)
+{
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) userdata;
+ const HRESULT hr = ioring.SubmitIoRing(queuedata->ring, 0, 0, NULL);
+ return (FAILED(hr) ? WIN_SetErrorFromHRESULT("SubmitIoRing", hr) : true);
+}
+
+static void ioring_asyncioqueue_cancel_task(void *userdata, SDL_AsyncIOTask *task)
+{
+ if (!task->asyncio || !task->asyncio->userdata) {
+ return; // Windows IoRing needs the file handle in question, so we'll just have to let it complete if unknown.
+ }
+
+ SDL_AsyncIOTask *cancel_task = (SDL_AsyncIOTask *) SDL_calloc(1, sizeof (*cancel_task));
+ if (!cancel_task) {
+ return; // oh well, the task can just finish on its own.
+ }
+
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) userdata;
+ HANDLE handle = (HANDLE) task->asyncio->userdata;
+ IORING_HANDLE_REF href = IoRingHandleRefFromHandle(handle);
+
+ // have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
+ SDL_LockMutex(queuedata->sqe_lock);
+ const HRESULT hr = ioring.BuildIoRingCancelRequest(queuedata->ring, href, (UINT_PTR) task, (UINT_PTR) cancel_task);
+ if (FAILED(hr)) {
+ SDL_UnlockMutex(queuedata->sqe_lock);
+ SDL_free(cancel_task); // oh well, the task can just finish on its own.
+ return;
+ }
+
+ cancel_task->app_userdata = task;
+ ioring_asyncioqueue_queue_task(userdata, task);
+ SDL_UnlockMutex(queuedata->sqe_lock);
+}
+
+static SDL_AsyncIOTask *ProcessCQE(WinIoRingAsyncIOQueueData *queuedata, IORING_CQE *cqe)
+{
+ if (!cqe) {
+ return NULL;
+ }
+
+ SDL_AsyncIOTask *task = (SDL_AsyncIOTask *) cqe->UserData;
+ if (task) { // can be NULL if this was just a wakeup message, a NOP, etc.
+ if (!task->queue) { // We leave `queue` blank to signify this was a task cancellation.
+ SDL_AsyncIOTask *cancel_task = task;
+ task = (SDL_AsyncIOTask *) cancel_task->app_userdata;
+ SDL_free(cancel_task);
+ if (SUCCEEDED(cqe->ResultCode)) { // cancel was successful?
+ task->result = SDL_ASYNCIO_CANCELLED;
+ } else {
+ task = NULL; // it already finished or was too far along to cancel, so we'll pick up the actual results later.
+ }
+ } else if (FAILED(cqe->ResultCode)) {
+ task->result = SDL_ASYNCIO_FAILURE;
+ // !!! FIXME: fill in task->error.
+ } else {
+ if ((task->type == SDL_ASYNCIO_TASK_WRITE) && (((Uint64) cqe->Information) < task->requested_size)) {
+ task->result = SDL_ASYNCIO_FAILURE; // it's always a failure on short writes.
+ }
+
+ // don't explicitly mark it as COMPLETE; that's the default value and a linked task might have failed in an earlier operation and this would overwrite it.
+
+ if ((task->type == SDL_ASYNCIO_TASK_READ) || (task->type == SDL_ASYNCIO_TASK_WRITE)) {
+ task->result_size = (Uint64) cqe->Information;
+ }
+ }
+
+ // we currently send all close operations through as flushes, requested or not, so the actually closing is (in theory) fast. We do that here.
+ // if a later IoRing interface version offers an asynchronous close operation, revisit this to only flush if requested, like we do in the Linux io_uring code.
+ if (task->type == SDL_ASYNCIO_TASK_CLOSE) {
+ SDL_assert(task->asyncio != NULL);
+ SDL_assert(task->asyncio->userdata != NULL);
+ HANDLE handle = (HANDLE) task->asyncio->userdata;
+ if (!CloseHandle(handle)) {
+ task->result = SDL_ASYNCIO_FAILURE; // shrug.
+ }
+ }
+ }
+
+ return task;
+}
+
+static SDL_AsyncIOTask *ioring_asyncioqueue_get_results(void *userdata)
+{
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) userdata;
+
+ // unlike liburing's io_uring_peek_cqe(), it's possible PopIoRingCompletion() is thread safe, but for now we wrap it in a mutex just in case.
+ SDL_LockMutex(queuedata->cqe_lock);
+ IORING_CQE cqe;
+ const HRESULT hr = ioring.PopIoRingCompletion(queuedata->ring, &cqe);
+ SDL_UnlockMutex(queuedata->cqe_lock);
+
+ if ((hr == S_FALSE) || FAILED(hr)) {
+ return NULL; // nothing available at the moment.
+ }
+
+ return ProcessCQE(queuedata, &cqe);
+}
+
+static SDL_AsyncIOTask *ioring_asyncioqueue_wait_results(void *userdata, Sint32 timeoutMS)
+{
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) userdata;
+ struct io_uring_cqe *cqe = NULL;
+
+ // the event only signals when the IoRing moves from empty to non-empty, so you have to try a (non-blocking) get_results first or risk eternal hangs.
+ SDL_AsyncIOTask *task = ioring_asyncioqueue_get_results(userdata);
+ if (!task) {
+ SDL_AddAtomicInt(&queuedata->num_waiting, 1);
+ WaitForSingleObject(queuedata->event, (timeoutMS < 0) ? INFINITE : (DWORD) timeoutMS);
+ SDL_AddAtomicInt(&queuedata->num_waiting, -1);
+
+ // (we don't care if the wait failed for any reason, as the upcoming get_results will report valid information. We just wanted the wait operation to block.)
+ task = ioring_asyncioqueue_get_results(userdata);
+ }
+
+ return task;
+}
+
+static void ioring_asyncioqueue_signal(void *userdata)
+{
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) userdata;
+ const int num_waiting = SDL_GetAtomicInt(&queuedata->num_waiting);
+ for (int i = 0; i < num_waiting; i++) {
+ SetEvent(queuedata->event);
+ }
+}
+
+static void ioring_asyncioqueue_destroy(void *userdata)
+{
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) userdata;
+ ioring.CloseIoRing(queuedata->ring);
+ CloseHandle(queuedata->event);
+ SDL_DestroyMutex(queuedata->sqe_lock);
+ SDL_DestroyMutex(queuedata->cqe_lock);
+ SDL_free(queuedata);
+}
+
+static bool SDL_SYS_CreateAsyncIOQueue_ioring(SDL_AsyncIOQueue *queue)
+{
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) SDL_calloc(1, sizeof (*queuedata));
+ if (!queuedata) {
+ return false;
+ }
+
+ HRESULT hr;
+ IORING_CREATE_FLAGS flags;
+
+ SDL_SetAtomicInt(&queuedata->num_waiting, 0);
+
+ queuedata->sqe_lock = SDL_CreateMutex();
+ if (!queuedata->sqe_lock) {
+ goto failed;
+ }
+
+ queuedata->cqe_lock = SDL_CreateMutex();
+ if (!queuedata->cqe_lock) {
+ goto failed;
+ }
+
+ queuedata->event = CreateEventW(NULL, FALSE, FALSE, NULL);
+ if (!queuedata->event) {
+ WIN_SetError("CreateEventW");
+ goto failed;
+ }
+
+ // !!! FIXME: no idea how large the queue should be. Is 128 overkill or too small?
+ flags.Required = IORING_CREATE_REQUIRED_FLAGS_NONE;
+ flags.Advisory = IORING_CREATE_ADVISORY_FLAGS_NONE;
+ hr = ioring.CreateIoRing(SDL_REQUIRED_IORING_VERSION, flags, 128, 128, &queuedata->ring);
+ if (FAILED(hr)) {
+ WIN_SetErrorFromHRESULT("CreateIoRing", hr);
+ goto failed;
+ }
+
+ hr = ioring.SetIoRingCompletionEvent(queuedata->ring, queuedata->event);
+ if (FAILED(hr)) {
+ WIN_SetErrorFromHRESULT("SetIoRingCompletionEvent", hr);
+ goto failed;
+ }
+
+ static const IORING_OP_CODE needed_ops[] = {
+ IORING_OP_NOP,
+ IORING_OP_FLUSH,
+ IORING_OP_READ,
+ IORING_OP_WRITE,
+ IORING_OP_CANCEL
+ };
+
+ for (int i = 0; i < SDL_arraysize(needed_ops); i++) {
+ if (!ioring.IsIoRingOpSupported(queuedata->ring, needed_ops[i])) {
+ SDL_SetError("Created IoRing doesn't support op %u", (unsigned int) needed_ops[i]);
+ goto failed;
+ }
+ }
+
+ static const SDL_AsyncIOQueueInterface SDL_AsyncIOQueue_ioring = {
+ ioring_asyncioqueue_queue_task,
+ ioring_asyncioqueue_cancel_task,
+ ioring_asyncioqueue_get_results,
+ ioring_asyncioqueue_wait_results,
+ ioring_asyncioqueue_signal,
+ ioring_asyncioqueue_destroy
+ };
+
+ SDL_copyp(&queue->iface, &SDL_AsyncIOQueue_ioring);
+ queue->userdata = queuedata;
+ return true;
+
+failed:
+ if (queuedata->ring) {
+ ioring.CloseIoRing(queuedata->ring);
+ }
+ if (queuedata->event) {
+ CloseHandle(queuedata->event);
+ }
+ if (queuedata->sqe_lock) {
+ SDL_DestroyMutex(queuedata->sqe_lock);
+ }
+ if (queuedata->cqe_lock) {
+ SDL_DestroyMutex(queuedata->cqe_lock);
+ }
+ SDL_free(queuedata);
+ return false;
+}
+
+static bool ioring_asyncio_read(void *userdata, SDL_AsyncIOTask *task)
+{
+ // !!! FIXME: UINT32 smaller than requested_size's Uint64. If we overflow it, we could try submitting multiple SQEs
+ // !!! FIXME: and make a note in the task that there are several in sequence.
+ if (task->requested_size > 0xFFFFFFFF) {
+ return SDL_SetError("ioring: i/o task is too large");
+ }
+
+ HANDLE handle = (HANDLE) userdata;
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) task->queue->userdata;
+ IORING_HANDLE_REF href = IoRingHandleRefFromHandle(handle);
+ IORING_BUFFER_REF bref = IoRingBufferRefFromPointer(task->buffer);
+
+ // have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
+ SDL_LockMutex(queuedata->sqe_lock);
+ bool retval;
+ const HRESULT hr = ioring.BuildIoRingReadFile(queuedata->ring, href, bref, (UINT32) task->requested_size, task->offset, (UINT_PTR) task, IOSQE_FLAGS_NONE);
+ if (FAILED(hr)) {
+ retval = WIN_SetErrorFromHRESULT("BuildIoRingReadFile", hr);
+ } else {
+ retval = task->queue->iface.queue_task(task->queue->userdata, task);
+ }
+ SDL_UnlockMutex(queuedata->sqe_lock);
+ return retval;
+}
+
+static bool ioring_asyncio_write(void *userdata, SDL_AsyncIOTask *task)
+{
+ // !!! FIXME: UINT32 smaller than requested_size's Uint64. If we overflow it, we could try submitting multiple SQEs
+ // !!! FIXME: and make a note in the task that there are several in sequence.
+ if (task->requested_size > 0xFFFFFFFF) {
+ return SDL_SetError("ioring: i/o task is too large");
+ }
+
+ HANDLE handle = (HANDLE) userdata;
+ WinIoRingAsyncIOQueueData *queuedata = (WinIoRingAsyncIOQueueData *) task->queue->userdata;
+ IORING_HANDLE_REF href = IoRingHandleRefFromHandle(handle);
+ IORING_BUFFER_REF bref = IoRingBufferRefFromPointer(task->buffer);
+
+ // have to hold a lock because otherwise two threads could get_sqe and submit while one request isn't fully set up.
+ SDL_LockMutex(queuedata->sqe_lock);
+ bool retval;
+ const HRESULT hr = ioring.BuildIoRingWriteFile(queuedata->ring, href, bref, (UINT32) task->requested_size, task->offset, FILE_WRITE_FLAGS_NONE, (UINT_PTR) task, IOSQE_FLAGS_NONE);
+ if (FAILED(hr)) {
+ retval = WIN_SetErrorFromHRESULT("BuildIoRingWriteFile", hr);
+ } else {
+ retval = task->queue->iface.queue_task(task->queue->userdata, task);
+ }
+ SDL_UnlockMutex(queuedata->sqe_lock);
+ return retval;
+}
+
+static bool ioring_asyncio_close(void *userdata, SDL_AsyncIOTask *task)
+{
+ // current IoRing operations don't offer asynchronous closing, but let's assume most of the potential work is flushing to disk, so just do it for eve
(Patch may be truncated, please check the link at the top of this post.)