From b4dff42dcd3864ebb6d4ce7a2a35d0140f5b2bd2 Mon Sep 17 00:00:00 2001
From: Evan Hemsley <[EMAIL REDACTED]>
Date: Tue, 29 Oct 2024 14:43:22 -0700
Subject: [PATCH] GPU: Add SDL_CancelGPUCommandBuffer (#11316)
---------
Co-authored-by: Caleb Cornett <caleb.cornett@outlook.com>
---
include/SDL3/SDL_gpu.h | 27 +++++
src/dynapi/SDL_dynapi.sym | 1 +
src/dynapi/SDL_dynapi_overrides.h | 1 +
src/dynapi/SDL_dynapi_procs.h | 1 +
src/gpu/SDL_gpu.c | 32 +++++-
src/gpu/SDL_sysgpu.h | 5 +
src/gpu/d3d11/SDL_gpu_d3d11.c | 77 ++++++++-----
src/gpu/d3d12/SDL_gpu_d3d12.c | 54 ++++++---
src/gpu/metal/SDL_gpu_metal.m | 166 ++++++++++++++++++++++------
src/gpu/vulkan/SDL_gpu_vulkan.c | 178 +++++++++++++++++-------------
10 files changed, 392 insertions(+), 150 deletions(-)
diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index 91174e24d4b7d..899297ef07125 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -3529,6 +3529,11 @@ extern SDL_DECLSPEC SDL_GPUTextureFormat SDLCALL SDL_GetGPUSwapchainTextureForma
* freed by the user. You MUST NOT call this function from any thread other
* than the one that created the window.
*
+ * When using SDL_GPU_PRESENTMODE_VSYNC, this function will block if too many frames are in flight.
+ * Otherwise, this function will fill the swapchain texture handle with NULL if too many frames are in flight.
+ * The best practice is to call SDL_CancelGPUCommandBuffer if the swapchain texture handle is NULL
+ * to avoid enqueuing needless work on the GPU.
+ *
* \param command_buffer a command buffer.
* \param window a window that has been claimed.
* \param swapchain_texture a pointer filled in with a swapchain texture
@@ -3542,9 +3547,11 @@ extern SDL_DECLSPEC SDL_GPUTextureFormat SDLCALL SDL_GetGPUSwapchainTextureForma
*
* \since This function is available since SDL 3.1.3.
*
+ * \sa SDL_GPUPresentMode
* \sa SDL_ClaimWindowForGPUDevice
* \sa SDL_SubmitGPUCommandBuffer
* \sa SDL_SubmitGPUCommandBufferAndAcquireFence
+ * \sa SDL_CancelGPUCommandBuffer
* \sa SDL_GetWindowSizeInPixels
*/
extern SDL_DECLSPEC bool SDLCALL SDL_AcquireGPUSwapchainTexture(
@@ -3603,6 +3610,26 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SubmitGPUCommandBuffer(
extern SDL_DECLSPEC SDL_GPUFence *SDLCALL SDL_SubmitGPUCommandBufferAndAcquireFence(
SDL_GPUCommandBuffer *command_buffer);
+/**
+ * Cancels a command buffer. None of the enqueued commands are executed.
+ *
+ * This must be called from the thread the command buffer was acquired on.
+ *
+ * You must not reference the command buffer after calling this function.
+ * It is an error to call this function after a swapchain texture has been acquired.
+ *
+ * \param command_buffer a command buffer.
+ * \returns true on success, false on error; call SDL_GetError() for more
+ * information.
+ *
+ * \since This function is available since SDL 3.2.0.
+ *
+ * \sa SDL_AcquireGPUCommandBuffer
+ * \sa SDL_AcquireGPUSwapchainTexture
+ */
+extern SDL_DECLSPEC bool SDLCALL SDL_CancelGPUCommandBuffer(
+ SDL_GPUCommandBuffer *command_buffer);
+
/**
* Blocks the thread until the GPU is completely idle.
*
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index e0efccc2b014f..fcdb93b05fec7 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -1183,6 +1183,7 @@ SDL3_0.0.0 {
SDL_GetDefaultLogOutputFunction;
SDL_RenderDebugText;
SDL_GetSandbox;
+ SDL_CancelGPUCommandBuffer;
# extra symbols go here (don't modify this line)
local: *;
};
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 4f5f16d889ecf..c33efb30b8c40 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -1208,3 +1208,4 @@
#define SDL_GetDefaultLogOutputFunction SDL_GetDefaultLogOutputFunction_REAL
#define SDL_RenderDebugText SDL_RenderDebugText_REAL
#define SDL_GetSandbox SDL_GetSandbox_REAL
+#define SDL_CancelGPUCommandBuffer SDL_CancelGPUCommandBuffer_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index dcfcef6d65c76..cebdf0206ea87 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -1214,3 +1214,4 @@ SDL_DYNAPI_PROC(bool,SDL_SetErrorV,(SDL_PRINTF_FORMAT_STRING const char *a,va_li
SDL_DYNAPI_PROC(SDL_LogOutputFunction,SDL_GetDefaultLogOutputFunction,(void),(),return)
SDL_DYNAPI_PROC(bool,SDL_RenderDebugText,(SDL_Renderer *a,float b,float c,const char *d),(a,b,c,d),return)
SDL_DYNAPI_PROC(SDL_Sandbox,SDL_GetSandbox,(void),(),return)
+SDL_DYNAPI_PROC(bool,SDL_CancelGPUCommandBuffer,(SDL_GPUCommandBuffer *a),(a),return)
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 1957ac5deb03e..6f1e8975ddfd0 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -1301,6 +1301,7 @@ SDL_GPUCommandBuffer *SDL_AcquireGPUCommandBuffer(
commandBufferHeader->compute_pipeline_bound = false;
commandBufferHeader->copy_pass.command_buffer = command_buffer;
commandBufferHeader->copy_pass.in_progress = false;
+ commandBufferHeader->swapchain_texture_acquired = false;
commandBufferHeader->submitted = false;
return command_buffer;
@@ -2666,6 +2667,8 @@ bool SDL_AcquireGPUSwapchainTexture(
Uint32 *swapchain_texture_width,
Uint32 *swapchain_texture_height)
{
+ CommandBufferCommonHeader *commandBufferHeader = (CommandBufferCommonHeader *)command_buffer;
+
if (command_buffer == NULL) {
SDL_InvalidParamError("command_buffer");
return false;
@@ -2684,12 +2687,18 @@ bool SDL_AcquireGPUSwapchainTexture(
CHECK_ANY_PASS_IN_PROGRESS("Cannot acquire a swapchain texture during a pass!", false)
}
- return COMMAND_BUFFER_DEVICE->AcquireSwapchainTexture(
+ bool result = COMMAND_BUFFER_DEVICE->AcquireSwapchainTexture(
command_buffer,
window,
swapchain_texture,
swapchain_texture_width,
swapchain_texture_height);
+
+ if (*swapchain_texture != NULL){
+ commandBufferHeader->swapchain_texture_acquired = true;
+ }
+
+ return result;
}
bool SDL_SubmitGPUCommandBuffer(
@@ -2746,6 +2755,27 @@ SDL_GPUFence *SDL_SubmitGPUCommandBufferAndAcquireFence(
command_buffer);
}
+bool SDL_CancelGPUCommandBuffer(
+ SDL_GPUCommandBuffer *command_buffer)
+{
+ CommandBufferCommonHeader *commandBufferHeader = (CommandBufferCommonHeader *)command_buffer;
+
+ if (command_buffer == NULL) {
+ SDL_InvalidParamError("command_buffer");
+ return false;
+ }
+
+ if (COMMAND_BUFFER_DEVICE->debug_mode) {
+ if (commandBufferHeader->swapchain_texture_acquired) {
+ SDL_assert_release(!"Cannot cancel command buffer after a swapchain texture has been acquired!");
+ return false;
+ }
+ }
+
+ return COMMAND_BUFFER_DEVICE->Cancel(
+ command_buffer);
+}
+
bool SDL_WaitForGPUIdle(
SDL_GPUDevice *device)
{
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 7f9ef6ca43324..c20531f99b9bf 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -40,6 +40,7 @@ typedef struct CommandBufferCommonHeader
Pass compute_pass;
bool compute_pipeline_bound;
Pass copy_pass;
+ bool swapchain_texture_acquired;
bool submitted;
} CommandBufferCommonHeader;
@@ -810,6 +811,9 @@ struct SDL_GPUDevice
SDL_GPUFence *(*SubmitAndAcquireFence)(
SDL_GPUCommandBuffer *commandBuffer);
+ bool (*Cancel)(
+ SDL_GPUCommandBuffer *commandBuffer);
+
bool (*Wait)(
SDL_GPURenderer *driverData);
@@ -928,6 +932,7 @@ struct SDL_GPUDevice
ASSIGN_DRIVER_FUNC(AcquireSwapchainTexture, name) \
ASSIGN_DRIVER_FUNC(Submit, name) \
ASSIGN_DRIVER_FUNC(SubmitAndAcquireFence, name) \
+ ASSIGN_DRIVER_FUNC(Cancel, name) \
ASSIGN_DRIVER_FUNC(Wait, name) \
ASSIGN_DRIVER_FUNC(WaitForFences, name) \
ASSIGN_DRIVER_FUNC(QueryFence, name) \
diff --git a/src/gpu/d3d11/SDL_gpu_d3d11.c b/src/gpu/d3d11/SDL_gpu_d3d11.c
index d667efddf5ba7..f2a864bac984b 100644
--- a/src/gpu/d3d11/SDL_gpu_d3d11.c
+++ b/src/gpu/d3d11/SDL_gpu_d3d11.c
@@ -748,7 +748,7 @@ typedef struct D3D11CommandBuffer
// Fences
D3D11Fence *fence;
- Uint8 autoReleaseFence;
+ bool autoReleaseFence;
// Reference Counting
D3D11Buffer **usedBuffers;
@@ -3280,15 +3280,10 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers);
- bool acquireFenceResult = D3D11_INTERNAL_AcquireFence(commandBuffer);
- commandBuffer->autoReleaseFence = 1;
+ commandBuffer->autoReleaseFence = true;
SDL_UnlockMutex(renderer->acquireCommandBufferLock);
- if (!acquireFenceResult) {
- return NULL;
- }
-
return (SDL_GPUCommandBuffer *)commandBuffer;
}
@@ -4806,7 +4801,8 @@ static bool D3D11_INTERNAL_MapAndCopyTextureDownload(
static bool D3D11_INTERNAL_CleanCommandBuffer(
D3D11Renderer *renderer,
- D3D11CommandBuffer *commandBuffer)
+ D3D11CommandBuffer *commandBuffer,
+ bool cancel)
{
Uint32 i, j;
bool result = true;
@@ -4817,17 +4813,21 @@ static bool D3D11_INTERNAL_CleanCommandBuffer(
D3D11TransferBuffer *transferBuffer = commandBuffer->usedTransferBuffers[i];
for (j = 0; j < transferBuffer->bufferDownloadCount; j += 1) {
- result &= D3D11_INTERNAL_MapAndCopyBufferDownload(
- renderer,
- transferBuffer,
- &transferBuffer->bufferDownloads[j]);
+ if (!cancel) {
+ result &= D3D11_INTERNAL_MapAndCopyBufferDownload(
+ renderer,
+ transferBuffer,
+ &transferBuffer->bufferDownloads[j]);
+ }
}
for (j = 0; j < transferBuffer->textureDownloadCount; j += 1) {
- result &= D3D11_INTERNAL_MapAndCopyTextureDownload(
- renderer,
- transferBuffer,
- &transferBuffer->textureDownloads[j]);
+ if (!cancel) {
+ result &= D3D11_INTERNAL_MapAndCopyTextureDownload(
+ renderer,
+ transferBuffer,
+ &transferBuffer->textureDownloads[j]);
+ }
}
transferBuffer->bufferDownloadCount = 0;
@@ -4887,10 +4887,12 @@ static bool D3D11_INTERNAL_CleanCommandBuffer(
SDL_UnlockMutex(renderer->acquireCommandBufferLock);
// Remove this command buffer from the submitted list
- for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
- if (renderer->submittedCommandBuffers[i] == commandBuffer) {
- renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
- renderer->submittedCommandBufferCount -= 1;
+ if (!cancel) {
+ for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
+ if (renderer->submittedCommandBuffers[i] == commandBuffer) {
+ renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
+ renderer->submittedCommandBufferCount -= 1;
+ }
}
}
@@ -5024,7 +5026,8 @@ static bool D3D11_WaitForFences(
if (res == S_OK) {
result &= D3D11_INTERNAL_CleanCommandBuffer(
renderer,
- renderer->submittedCommandBuffers[i]);
+ renderer->submittedCommandBuffers[i],
+ false);
}
}
@@ -5696,6 +5699,11 @@ static bool D3D11_Submit(
SDL_LockMutex(renderer->contextLock);
+ if (!D3D11_INTERNAL_AcquireFence(d3d11CommandBuffer)) {
+ SDL_UnlockMutex(renderer->contextLock);
+ return false;
+ }
+
// Notify the command buffer completion query that we have completed recording
ID3D11DeviceContext_End(
renderer->immediateContext,
@@ -5778,7 +5786,8 @@ static bool D3D11_Submit(
if (res == S_OK) {
result &= D3D11_INTERNAL_CleanCommandBuffer(
renderer,
- renderer->submittedCommandBuffers[i]);
+ renderer->submittedCommandBuffers[i],
+ false);
}
}
@@ -5793,12 +5802,26 @@ static SDL_GPUFence *D3D11_SubmitAndAcquireFence(
SDL_GPUCommandBuffer *commandBuffer)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
- D3D11Fence *fence = d3d11CommandBuffer->fence;
+ d3d11CommandBuffer->autoReleaseFence = false;
+ if (!D3D11_Submit(commandBuffer)) {
+ return NULL;
+ }
+ return (SDL_GPUFence *)d3d11CommandBuffer->fence;
+}
+
+static bool D3D11_Cancel(
+ SDL_GPUCommandBuffer *commandBuffer)
+{
+ D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
+ D3D11Renderer *renderer = d3d11CommandBuffer->renderer;
+ bool result;
- d3d11CommandBuffer->autoReleaseFence = 0;
- D3D11_Submit(commandBuffer);
+ d3d11CommandBuffer->autoReleaseFence = false;
+ SDL_LockMutex(renderer->contextLock);
+ result = D3D11_INTERNAL_CleanCommandBuffer(renderer, d3d11CommandBuffer, true);
+ SDL_UnlockMutex(renderer->contextLock);
- return (SDL_GPUFence *)fence;
+ return result;
}
static bool D3D11_Wait(
@@ -5822,7 +5845,7 @@ static bool D3D11_Wait(
for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
commandBuffer = renderer->submittedCommandBuffers[i];
- result &= D3D11_INTERNAL_CleanCommandBuffer(renderer, commandBuffer);
+ result &= D3D11_INTERNAL_CleanCommandBuffer(renderer, commandBuffer, false);
}
D3D11_INTERNAL_PerformPendingDestroys(renderer);
diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c
index a489f204b81f6..00c329312a84e 100644
--- a/src/gpu/d3d12/SDL_gpu_d3d12.c
+++ b/src/gpu/d3d12/SDL_gpu_d3d12.c
@@ -7297,18 +7297,20 @@ static bool D3D12_INTERNAL_CopyTextureDownload(
static bool D3D12_INTERNAL_CleanCommandBuffer(
D3D12Renderer *renderer,
- D3D12CommandBuffer *commandBuffer)
+ D3D12CommandBuffer *commandBuffer,
+ bool cancel)
{
Uint32 i;
HRESULT res;
bool result = true;
// Perform deferred texture data copies
-
for (i = 0; i < commandBuffer->textureDownloadCount; i += 1) {
- result &= D3D12_INTERNAL_CopyTextureDownload(
- commandBuffer,
- commandBuffer->textureDownloads[i]);
+ if (!cancel) {
+ result &= D3D12_INTERNAL_CopyTextureDownload(
+ commandBuffer,
+ commandBuffer->textureDownloads[i]);
+ }
SDL_free(commandBuffer->textureDownloads[i]);
}
commandBuffer->textureDownloadCount = 0;
@@ -7401,10 +7403,12 @@ static bool D3D12_INTERNAL_CleanCommandBuffer(
SDL_UnlockMutex(renderer->acquireCommandBufferLock);
// Remove this command buffer from the submitted list
- for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
- if (renderer->submittedCommandBuffers[i] == commandBuffer) {
- renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
- renderer->submittedCommandBufferCount -= 1;
+ if (!cancel) {
+ for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
+ if (renderer->submittedCommandBuffers[i] == commandBuffer) {
+ renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
+ renderer->submittedCommandBufferCount -= 1;
+ }
}
}
@@ -7573,7 +7577,8 @@ static bool D3D12_Submit(
if (fenceValue == D3D12_FENCE_SIGNAL_VALUE) {
result &= D3D12_INTERNAL_CleanCommandBuffer(
renderer,
- renderer->submittedCommandBuffers[i]);
+ renderer->submittedCommandBuffers[i],
+ false);
}
}
@@ -7589,10 +7594,32 @@ static SDL_GPUFence *D3D12_SubmitAndAcquireFence(
{
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
d3d12CommandBuffer->autoReleaseFence = false;
- D3D12_Submit(commandBuffer);
+ if (!D3D12_Submit(commandBuffer)) {
+ return NULL;
+ }
return (SDL_GPUFence *)d3d12CommandBuffer->inFlightFence;
}
+static bool D3D12_Cancel(
+ SDL_GPUCommandBuffer *commandBuffer)
+{
+ D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
+ D3D12Renderer *renderer = d3d12CommandBuffer->renderer;
+ bool result;
+ HRESULT res;
+
+ // Notify the command buffer that we have completed recording
+ res = ID3D12GraphicsCommandList_Close(d3d12CommandBuffer->graphicsCommandList);
+ CHECK_D3D12_ERROR_AND_RETURN("Failed to close command list!", false);
+
+ d3d12CommandBuffer->autoReleaseFence = false;
+ SDL_LockMutex(renderer->submitLock);
+ result = D3D12_INTERNAL_CleanCommandBuffer(renderer, d3d12CommandBuffer, true);
+ SDL_UnlockMutex(renderer->submitLock);
+
+ return result;
+}
+
static bool D3D12_Wait(
SDL_GPURenderer *driverData)
{
@@ -7636,7 +7663,7 @@ static bool D3D12_Wait(
// Clean up
for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
- result &= D3D12_INTERNAL_CleanCommandBuffer(renderer, renderer->submittedCommandBuffers[i]);
+ result &= D3D12_INTERNAL_CleanCommandBuffer(renderer, renderer->submittedCommandBuffers[i], false);
}
D3D12_INTERNAL_PerformPendingDestroys(renderer);
@@ -7692,7 +7719,8 @@ static bool D3D12_WaitForFences(
if (fenceValue == D3D12_FENCE_SIGNAL_VALUE) {
result &= D3D12_INTERNAL_CleanCommandBuffer(
renderer,
- renderer->submittedCommandBuffers[i]);
+ renderer->submittedCommandBuffers[i],
+ false);
}
}
diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index 04a5bff59692e..6c496a5c11402 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -446,6 +446,7 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
typedef struct MetalFence
{
SDL_AtomicInt complete;
+ SDL_AtomicInt referenceCount;
} MetalFence;
typedef struct MetalWindowData
@@ -453,9 +454,12 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
SDL_Window *window;
SDL_MetalView view;
CAMetalLayer *layer;
+ SDL_GPUPresentMode presentMode;
id<CAMetalDrawable> drawable;
MetalTexture texture;
MetalTextureContainer textureContainer;
+ SDL_GPUFence *inFlightFences[MAX_FRAMES_IN_FLIGHT];
+ Uint32 frameCounter;
} MetalWindowData;
typedef struct MetalShader
@@ -605,7 +609,7 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
// Fences
MetalFence *fence;
- Uint8 autoReleaseFence;
+ bool autoReleaseFence;
// Reference Counting
MetalBuffer **usedBuffers;
@@ -2019,6 +2023,7 @@ static Uint8 METAL_INTERNAL_CreateFence(
fence = SDL_calloc(1, sizeof(MetalFence));
SDL_SetAtomicInt(&fence->complete, 0);
+ SDL_SetAtomicInt(&fence->referenceCount, 0);
// Add it to the available pool
// FIXME: Should this be EXPAND_IF_NEEDED?
@@ -2036,7 +2041,7 @@ static Uint8 METAL_INTERNAL_CreateFence(
return 1;
}
-static Uint8 METAL_INTERNAL_AcquireFence(
+static bool METAL_INTERNAL_AcquireFence(
MetalRenderer *renderer,
MetalCommandBuffer *commandBuffer)
{
@@ -2049,7 +2054,7 @@ static Uint8 METAL_INTERNAL_AcquireFence(
if (!METAL_INTERNAL_CreateFence(renderer)) {
SDL_UnlockMutex(renderer->fenceLock);
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create fence!");
- return 0;
+ return false;
}
}
@@ -2061,8 +2066,9 @@ static Uint8 METAL_INTERNAL_AcquireFence(
// Associate the fence with the command buffer
commandBuffer->fence = fence;
SDL_SetAtomicInt(&fence->complete, 0); // FIXME: Is this right?
+ (void)SDL_AtomicIncRef(&commandBuffer->fence->referenceCount);
- return 1;
+ return true;
}
static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer(
@@ -2099,8 +2105,7 @@ static Uint8 METAL_INTERNAL_AcquireFence(
commandBuffer->needComputeTextureBind = true;
commandBuffer->needComputeUniformBind = true;
- METAL_INTERNAL_AcquireFence(renderer, commandBuffer);
- commandBuffer->autoReleaseFence = 1;
+ commandBuffer->autoReleaseFence = true;
SDL_UnlockMutex(renderer->acquireCommandBufferLock);
@@ -3266,29 +3271,36 @@ static void METAL_ReleaseFence(
SDL_GPURenderer *driverData,
SDL_GPUFence *fence)
{
- METAL_INTERNAL_ReleaseFenceToPool(
- (MetalRenderer *)driverData,
- (MetalFence *)fence);
+ MetalFence *metalFence = (MetalFence *)fence;
+ if (SDL_AtomicDecRef(&metalFence->referenceCount)) {
+ METAL_INTERNAL_ReleaseFenceToPool(
+ (MetalRenderer *)driverData,
+ (MetalFence *)fence);
+ }
}
// Cleanup
static void METAL_INTERNAL_CleanCommandBuffer(
MetalRenderer *renderer,
- MetalCommandBuffer *commandBuffer)
+ MetalCommandBuffer *commandBuffer,
+ bool cancel)
{
Uint32 i;
- // Reference Counting
- for (i = 0; i < commandBuffer->usedBufferCount; i += 1) {
- (void)SDL_AtomicDecRef(&commandBuffer->usedBuffers[i]->referenceCount);
+ // End any active passes
+ if (commandBuffer->renderEncoder) {
+ [commandBuffer->renderEncoder endEncoding];
+ commandBuffer->renderEncoder = nil;
}
- commandBuffer->usedBufferCount = 0;
-
- for (i = 0; i < commandBuffer->usedTextureCount; i += 1) {
- (void)SDL_AtomicDecRef(&commandBuffer->usedTextures[i]->referenceCount);
+ if (commandBuffer->computeEncoder) {
+ [commandBuffer->computeEncoder endEncoding];
+ commandBuffer->computeEncoder = nil;
+ }
+ if (commandBuffer->blitEncoder) {
+ [commandBuffer->blitEncoder endEncoding];
+ commandBuffer->blitEncoder = nil;
}
- commandBuffer->usedTextureCount = 0;
// Uniform buffers are now available
@@ -3303,6 +3315,18 @@ static void METAL_INTERNAL_CleanCommandBuffer(
SDL_UnlockMutex(renderer->acquireUniformBufferLock);
+ // Reference Counting
+
+ for (i = 0; i < commandBuffer->usedBufferCount; i += 1) {
+ (void)SDL_AtomicDecRef(&commandBuffer->usedBuffers[i]->referenceCount);
+ }
+ commandBuffer->usedBufferCount = 0;
+
+ for (i = 0; i < commandBuffer->usedTextureCount; i += 1) {
+ (void)SDL_AtomicDecRef(&commandBuffer->usedTextures[i]->referenceCount);
+ }
+ commandBuffer->usedTextureCount = 0;
+
// Reset presentation
commandBuffer->windowDataCount = 0;
@@ -3354,10 +3378,12 @@ static void METAL_INTERNAL_CleanCommandBuffer(
SDL_UnlockMutex(renderer->acquireCommandBufferLock);
// Remove this command buffer from the submitted list
- for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
- if (renderer->submittedCommandBuffers[i] == commandBuffer) {
- renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
- renderer->submittedCommandBufferCount -= 1;
+ if (!cancel) {
+ for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
+ if (renderer->submittedCommandBuffers[i] == commandBuffer) {
+ renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1];
+ renderer->submittedCommandBufferCount -= 1;
+ }
}
}
}
@@ -3483,12 +3509,19 @@ static Uint8 METAL_INTERNAL_CreateSwapchain(
windowData->view = SDL_Metal_CreateView(windowData->window);
windowData->drawable = nil;
+ windowData->presentMode = SDL_GPU_PRESENTMODE_VSYNC;
+ windowData->frameCounter = 0;
+
+ for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; i += 1) {
+ windowData->inFlightFences[i] = NULL;
+ }
windowData->layer = (__bridge CAMetalLayer *)(SDL_Metal_GetLayer(windowData->view));
windowData->layer.device = renderer->device;
#ifdef SDL_PLATFORM_MACOS
if (@available(macOS 10.13, *)) {
windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE);
+ windowData->presentMode = presentMode;
}
#endif
windowData->layer.pixelFormat = SDLToMetal_TextureFormat(SwapchainCompositionToFormat[swapchainComposition]);
@@ -3610,6 +3643,13 @@ static void METAL_ReleaseWindow(
METAL_Wait(driverData);
SDL_Metal_DestroyView(windowData->view);
+ for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; i += 1) {
+ if (windowData->inFlightFences[i] != NULL) {
+ METAL_ReleaseFence(
+ (SDL_GPURenderer *)renderer,
+ windowData->inFlightFences[i]);
+ }
+ }
SDL_LockMutex(renderer->windowLock);
for (Uint32 i = 0; i < renderer->claimedWindowCount; i += 1) {
@@ -3653,10 +3693,6 @@ static bool METAL_AcquireSwapchainTexture(
SET_STRING_ERROR_AND_RETURN("Window is not claimed by this SDL_GpuDevice", false);
}
- // Get the drawable and its underlying texture
- windowData->drawable = [windowData->layer nextDrawable];
- windowData->texture.handle = [windowData->drawable texture];
-
// Update the window size
drawableSize = windowData->layer.drawableSize;
windowData->textureContainer.header.info.width = (Uint32)drawableSize.width;
@@ -3668,6 +3704,39 @@ static bool METAL_AcquireSwapchainTexture(
*swapchainTextureHeight = (Uint32)drawableSize.height;
}
+ if (windowData->inFlightFences[windowData->frameCounter] != NULL) {
+ if (windowData->presentMode == SDL_GPU_PRESENTMODE_VSYNC) {
+ // In VSYNC mode, block until the least recent presented frame is done
+ if (!METAL_WaitForFences(
+ (SDL_GPURenderer *)renderer,
+ true,
+ &windowData->inFlightFences[windowData->frameCounter],
+ 1)) {
+ return false;
+ }
+ } else {
+ if (!METAL_QueryFence(
+ (SDL_GPURenderer *)metalCommandBuffer->renderer,
+ windowData->inFlightFences[windowData->frameCounter])) {
+ /*
+ * In IMMEDIATE mode, if the least recent fence is not signaled,
+ * return true to indicate that there is no error but rendering should be skipped
+ */
+ return true;
+ }
+ }
+
+ METAL_ReleaseFence(
+ (SDL_GPURenderer *)metalCommandBuffer->renderer,
+ windowData->inFlightFences[windowData->frameCounter]);
+
+ windowData->inFlightFences[windowData->frameCounter] = NULL;
+ }
+
+ // Get the drawable and its underlying texture
+ windowData->drawable = [windowData->layer nextDrawable];
+ windowData->texture.handle = [windowData->drawable texture];
+
// Set up presentation
if (metalCommandBuffer->windowDataCount == metalCommandBuffer->windowDataCapacity) {
metalCommandBuffer->windowDataCapacity += 1;
@@ -3723,9 +3792,12 @@ static bool METAL_SetSwapchainParameters(
METAL_Wait(driverData);
+ windowData->presentMode = SDL_GPU_PRESENTMODE_VSYNC;
+
#ifdef SDL_PLATFORM_MACOS
if (@available(macOS 10.13, *)) {
windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE);
+ windowData->presentMode = presentMode;
}
#endif
windowData->layer.pixelFormat = SDLToMetal_TextureFormat(SwapchainCompositionToFormat[swapchainComposition]);
@@ -3756,10 +3828,22 @@ static bool METAL_Submit(
SDL_LockMutex(renderer->submitLock);
+ if (!METAL_INTERNAL_AcquireFence(renderer, metalCommandBuffer)) {
+ SDL_UnlockMutex(renderer->submitLock);
+ return false;
+ }
+
// Enqueue present requests, if applicable
for (Uint32 i = 0; i < metalCommandBuffer->windowDataCount; i += 1) {
- [metalCommandBuffer->handle presentDrawable:metalCommandBuffer->windowDatas[i]->drawable];
- metalCommandBuffer->windowDatas[i]->drawable = nil;
+ MetalWindowData *windowData = metalCommandBuffer->windowDatas[i];
+ [metalCommandBuffer->handle presentDrawable:windowData->drawable];
+ windowData->drawable = nil;
+
+ windowData->inFlightFences[windowData->frameCounter] = (SDL_GPUFence *)metalCommandBuffer->fence;
+
+ (void)SDL_AtomicIncRef(&metalCommandBuffer->fence->referenceCount);
+
+ windowData->frameCounter = (windowData->frameCounter + 1) % MAX_FRAMES_IN_FLIGHT;
}
// Notify the fence when the command buffer has completed
@@ -3787,7 +3871,8 @@ static bool METAL_Submit(
if (SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) {
METAL_INTERNAL_CleanCommandBuffer(
renderer,
- renderer->submittedCommandBuffers[i]);
+ renderer->submittedCommandBuffers[i],
+ false);
}
}
@@ -3803,12 +3888,25 @@ static bool METAL_Submit(
SDL_GPUCommandBuffer *commandBuffer)
{
MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
- MetalFence *fence = metalCommandBuffer->fence;
+ metalCommandBuffer->autoReleaseFence = false;
+ if (!METAL_Submit(commandBuffer)) {
+ return NULL;
+ }
+ return (SDL_GPUFence *)metalCommandBuffer->fence;
+}
+
+static bool METAL_Cancel(
+ SDL_GPUCommandBuffer *commandBuffer)
+{
+ MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
+ MetalRenderer *renderer = metalCommandBuffer->renderer;
- metalCommandBuffer->autoReleaseFence = 0;
- METAL_Submit(commandBuffer);
+ metalCommandBuffer->autoReleaseFence = false;
+ SDL_LockMutex(renderer->submitLock);
+ METAL_INTERNAL_CleanCommandBuffer(renderer, metalCommandBuffer, true);
+ SDL_UnlockMutex(renderer->submitLock);
- return (SDL_GPUFence *)fence;
+ return true;
}
static bool METAL_Wait(
@@ -3832,7 +3930,7 @@ static bool METAL_Wait(
for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
commandBuffer = renderer->submittedCommandBuffers[i];
- METAL_INTERNAL_CleanCommandBuffer(renderer, commandBuffer);
+ METAL_INTERNAL_CleanCommandBuffer(renderer, commandBuffer, false);
}
METAL_INTERNAL_PerformPendingDestroys(renderer);
diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c
index ce3abd60d17b3..22983a52022b8 100644
--- a/src/gpu/vulkan/SDL_gpu_vulkan.c
+++ b/src/gpu/vulkan/SDL_gpu_vulkan.c
@@ -630,8 +630,6 @@ typedef struct VulkanTextureSubresource
VkImageView *renderTargetViews;
(Patch may be truncated, please check the link at the top of this post.)