From 514b26e4c459db2a006cae479946fcb903cac3c3 Mon Sep 17 00:00:00 2001
From: Alex Tselousov <[EMAIL REDACTED]>
Date: Tue, 17 Mar 2026 18:48:52 +0300
Subject: [PATCH] Removed busyloop from SDL_GPUFence on MacOS
Before, MetalFence was implemented as simply a busy loop on an atomic
int on metal, meaning the cpu would busy wait on the gpu to finish
taking power from it and decreasing battery life. This was the only kind
of cpu-gpu syncing (apart from requesting a swapchain)
---
src/gpu/metal/SDL_gpu_metal.m | 61 +++++++++++++++++++++--------------
1 file changed, 37 insertions(+), 24 deletions(-)
diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index de6c7b7fc8c7c..0b06c6014f436 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -430,6 +430,7 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
// Structs
typedef struct MetalRenderer MetalRenderer;
+typedef struct MetalCommandBuffer MetalCommandBuffer;
typedef struct MetalTexture
{
@@ -453,7 +454,8 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
typedef struct MetalFence
{
- SDL_AtomicInt complete;
+ // can be NULL if the command buffer was recycled
+ MetalCommandBuffer *commandBuffer;
SDL_AtomicInt referenceCount;
} MetalFence;
@@ -2093,7 +2095,6 @@ static Uint8 METAL_INTERNAL_CreateFence(
MetalFence *fence;
fence = SDL_calloc(1, sizeof(MetalFence));
- SDL_SetAtomicInt(&fence->complete, 0);
SDL_SetAtomicInt(&fence->referenceCount, 0);
// Add it to the available pool
@@ -2136,7 +2137,7 @@ static bool METAL_INTERNAL_AcquireFence(
// Associate the fence with the command buffer
commandBuffer->fence = fence;
- SDL_SetAtomicInt(&fence->complete, 0); // FIXME: Is this right?
+ fence->commandBuffer = commandBuffer;
(void)SDL_AtomicIncRef(&commandBuffer->fence->referenceCount);
return true;
@@ -3517,6 +3518,8 @@ static void METAL_INTERNAL_CleanCommandBuffer(
METAL_ReleaseFence(
(SDL_GPURenderer *)renderer,
(SDL_GPUFence *)commandBuffer->fence);
+ } else {
+ commandBuffer->fence->commandBuffer = NULL;
}
// Return command buffer to pool
@@ -3587,6 +3590,16 @@ static void METAL_INTERNAL_PerformPendingDestroys(
}
// Fences
+static bool METAL_INTERNAL_IsFenceBusy(
+ MetalFence *fence
+) {
+ if (!fence->commandBuffer) {
+ return false; // command buffer was recycled
+ }
+
+ MTLCommandBufferStatus status = fence->commandBuffer->handle.status;
+ return status == MTLCommandBufferStatusCommitted || status == MTLCommandBufferStatusScheduled;
+}
static bool METAL_WaitForFences(
SDL_GPURenderer *driverData,
@@ -3596,24 +3609,29 @@ static bool METAL_WaitForFences(
{
@autoreleasepool {
MetalRenderer *renderer = (MetalRenderer *)driverData;
- bool waiting;
if (waitAll) {
for (Uint32 i = 0; i < numFences; i += 1) {
- while (!SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete)) {
- // Spin!
+ MetalFence *fence = (MetalFence *)fences[i];
+ if (METAL_INTERNAL_IsFenceBusy(fence)) {
+ [fence->commandBuffer->handle waitUntilCompleted];
}
}
} else {
- waiting = 1;
- while (waiting) {
- for (Uint32 i = 0; i < numFences; i += 1) {
- if (SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete) > 0) {
- waiting = 0;
- break;
- }
- }
+ dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
+ for (Uint32 i = 0; i < numFences; i += 1) {
+ MetalFence *fence = (MetalFence *)fences[i];
+ // command buffer has completed and been recycled
+ if(!fence->commandBuffer)
+ return true;
+
+ // even if it's completed, the handle will call back straight away
+ [fence->commandBuffer->handle addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
+ dispatch_semaphore_signal(semaphore);
+ }];
}
+
+ dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
}
METAL_INTERNAL_PerformPendingDestroys(renderer);
@@ -3627,7 +3645,7 @@ static bool METAL_QueryFence(
SDL_GPUFence *fence)
{
MetalFence *metalFence = (MetalFence *)fence;
- return SDL_GetAtomicInt(&metalFence->complete) == 1;
+ return METAL_INTERNAL_IsFenceBusy(metalFence);
}
// Window and Swapchain Management
@@ -4090,11 +4108,6 @@ static bool METAL_Submit(
windowData->frameCounter = (windowData->frameCounter + 1) % renderer->allowedFramesInFlight;
}
- // Notify the fence when the command buffer has completed
- [metalCommandBuffer->handle addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
- SDL_AtomicIncRef(&metalCommandBuffer->fence->complete);
- }];
-
// Submit the command buffer
[metalCommandBuffer->handle commit];
metalCommandBuffer->handle = nil;
@@ -4112,7 +4125,8 @@ static bool METAL_Submit(
// Check if we can perform any cleanups
for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
- if (SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) {
+
+ if (!METAL_INTERNAL_IsFenceBusy(renderer->submittedCommandBuffers[i]->fence)) {
METAL_INTERNAL_CleanCommandBuffer(
renderer,
renderer->submittedCommandBuffers[i],
@@ -4165,9 +4179,8 @@ static bool METAL_Wait(
* Sort of equivalent to vkDeviceWaitIdle.
*/
for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
- while (!SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) {
- // Spin!
- }
+ SDL_GPUFence *opaqueFence = (SDL_GPUFence *)renderer->submittedCommandBuffers[i]->fence;
+ METAL_WaitForFences(driverData, true, &opaqueFence, 1);
}
SDL_LockMutex(renderer->submitLock);