SDL: GPU: Resource binding state shadowing (#12138)

From 8e766c925229af922630ec72812c034b16b8c974 Mon Sep 17 00:00:00 2001
From: Evan Hemsley <[EMAIL REDACTED]>
Date: Fri, 31 Jan 2025 08:34:10 -0800
Subject: [PATCH] GPU: Resource binding state shadowing (#12138)

---------

Co-authored-by: Caleb Cornett <caleb.cornett@outlook.com>
---
 src/gpu/d3d12/SDL_gpu_d3d12.c   | 207 ++++++++------
 src/gpu/metal/SDL_gpu_metal.m   | 479 ++++++++++++++++++--------------
 src/gpu/vulkan/SDL_gpu_vulkan.c | 281 +++++++++++--------
 3 files changed, 556 insertions(+), 411 deletions(-)

diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c
index c142bae4486f2..aa5dc6ca77ae4 100644
--- a/src/gpu/d3d12/SDL_gpu_d3d12.c
+++ b/src/gpu/d3d12/SDL_gpu_d3d12.c
@@ -4552,15 +4552,18 @@ static void D3D12_BindVertexBuffers(
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
         D3D12Buffer *currentBuffer = ((D3D12BufferContainer *)bindings[i].buffer)->activeBuffer;
-        d3d12CommandBuffer->vertexBuffers[firstSlot + i] = currentBuffer;
-        d3d12CommandBuffer->vertexBufferOffsets[firstSlot + i] = bindings[i].offset;
-        D3D12_INTERNAL_TrackBuffer(d3d12CommandBuffer, currentBuffer);
+
+        if (d3d12CommandBuffer->vertexBuffers[firstSlot + i] != currentBuffer || d3d12CommandBuffer->vertexBufferOffsets[firstSlot + i] != bindings[i].offset) {
+            D3D12_INTERNAL_TrackBuffer(d3d12CommandBuffer, currentBuffer);
+
+            d3d12CommandBuffer->vertexBuffers[firstSlot + i] = currentBuffer;
+            d3d12CommandBuffer->vertexBufferOffsets[firstSlot + i] = bindings[i].offset;
+            d3d12CommandBuffer->needVertexBufferBind = true;
+        }
     }
 
     d3d12CommandBuffer->vertexBufferCount =
         SDL_max(d3d12CommandBuffer->vertexBufferCount, firstSlot + numBindings);
-
-    d3d12CommandBuffer->needVertexBufferBind = true;
 }
 
 static void D3D12_BindIndexBuffer(
@@ -4596,19 +4599,24 @@ static void D3D12_BindVertexSamplers(
         D3D12TextureContainer *container = (D3D12TextureContainer *)textureSamplerBindings[i].texture;
         D3D12Sampler *sampler = (D3D12Sampler *)textureSamplerBindings[i].sampler;
 
-        D3D12_INTERNAL_TrackTexture(
-            d3d12CommandBuffer,
-            container->activeTexture);
+        if (d3d12CommandBuffer->vertexSamplers[firstSlot + i] != sampler) {
+            D3D12_INTERNAL_TrackSampler(
+                d3d12CommandBuffer,
+                sampler);
 
-        D3D12_INTERNAL_TrackSampler(
-            d3d12CommandBuffer,
-            sampler);
+            d3d12CommandBuffer->vertexSamplers[firstSlot + i] = sampler;
+            d3d12CommandBuffer->needVertexSamplerBind = true;
+        }
 
-        d3d12CommandBuffer->vertexSamplers[firstSlot + i] = sampler;
-        d3d12CommandBuffer->vertexSamplerTextures[firstSlot + i] = container->activeTexture;
-    }
+        if (d3d12CommandBuffer->vertexSamplerTextures[firstSlot + i] != container->activeTexture) {
+            D3D12_INTERNAL_TrackTexture(
+                d3d12CommandBuffer,
+                container->activeTexture);
 
-    d3d12CommandBuffer->needVertexSamplerBind = true;
+            d3d12CommandBuffer->vertexSamplerTextures[firstSlot + i] = container->activeTexture;
+            d3d12CommandBuffer->needVertexSamplerBind = true;
+        }
+    }
 }
 
 static void D3D12_BindVertexStorageTextures(
@@ -4623,12 +4631,13 @@ static void D3D12_BindVertexStorageTextures(
         D3D12TextureContainer *container = (D3D12TextureContainer *)storageTextures[i];
         D3D12Texture *texture = container->activeTexture;
 
-        D3D12_INTERNAL_TrackTexture(d3d12CommandBuffer, texture);
+        if (d3d12CommandBuffer->vertexStorageTextures[firstSlot + i] != texture) {
+            D3D12_INTERNAL_TrackTexture(d3d12CommandBuffer, texture);
 
-        d3d12CommandBuffer->vertexStorageTextures[firstSlot + i] = texture;
+            d3d12CommandBuffer->vertexStorageTextures[firstSlot + i] = texture;
+            d3d12CommandBuffer->needVertexStorageTextureBind = true;
+        }
     }
-
-    d3d12CommandBuffer->needVertexStorageTextureBind = true;
 }
 
 static void D3D12_BindVertexStorageBuffers(
@@ -4641,15 +4650,15 @@ static void D3D12_BindVertexStorageBuffers(
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
         D3D12BufferContainer *container = (D3D12BufferContainer *)storageBuffers[i];
+        if (d3d12CommandBuffer->vertexStorageBuffers[firstSlot + i] != container->activeBuffer) {
+            D3D12_INTERNAL_TrackBuffer(
+                d3d12CommandBuffer,
+                container->activeBuffer);
 
-        D3D12_INTERNAL_TrackBuffer(
-            d3d12CommandBuffer,
-            container->activeBuffer);
-
-        d3d12CommandBuffer->vertexStorageBuffers[firstSlot + i] = container->activeBuffer;
+            d3d12CommandBuffer->vertexStorageBuffers[firstSlot + i] = container->activeBuffer;
+            d3d12CommandBuffer->needVertexStorageBufferBind = true;
+        }
     }
-
-    d3d12CommandBuffer->needVertexStorageBufferBind = true;
 }
 
 static void D3D12_BindFragmentSamplers(
@@ -4664,19 +4673,24 @@ static void D3D12_BindFragmentSamplers(
         D3D12TextureContainer *container = (D3D12TextureContainer *)textureSamplerBindings[i].texture;
         D3D12Sampler *sampler = (D3D12Sampler *)textureSamplerBindings[i].sampler;
 
-        D3D12_INTERNAL_TrackTexture(
-            d3d12CommandBuffer,
-            container->activeTexture);
+        if (d3d12CommandBuffer->fragmentSamplers[firstSlot + i] != sampler) {
+            D3D12_INTERNAL_TrackSampler(
+                d3d12CommandBuffer,
+                sampler);
 
-        D3D12_INTERNAL_TrackSampler(
-            d3d12CommandBuffer,
-            sampler);
+            d3d12CommandBuffer->fragmentSamplers[firstSlot + i] = sampler;
+            d3d12CommandBuffer->needFragmentSamplerBind = true;
+        }
 
-        d3d12CommandBuffer->fragmentSamplers[firstSlot + i] = sampler;
-        d3d12CommandBuffer->fragmentSamplerTextures[firstSlot + i] = container->activeTexture;
-    }
+        if (d3d12CommandBuffer->fragmentSamplerTextures[firstSlot + i] != container->activeTexture) {
+            D3D12_INTERNAL_TrackTexture(
+                d3d12CommandBuffer,
+                container->activeTexture);
 
-    d3d12CommandBuffer->needFragmentSamplerBind = true;
+            d3d12CommandBuffer->fragmentSamplerTextures[firstSlot + i] = container->activeTexture;
+            d3d12CommandBuffer->needFragmentSamplerBind = true;
+        }
+    }
 }
 
 static void D3D12_BindFragmentStorageTextures(
@@ -4691,12 +4705,13 @@ static void D3D12_BindFragmentStorageTextures(
         D3D12TextureContainer *container = (D3D12TextureContainer *)storageTextures[i];
         D3D12Texture *texture = container->activeTexture;
 
-        D3D12_INTERNAL_TrackTexture(d3d12CommandBuffer, texture);
+        if (d3d12CommandBuffer->fragmentStorageTextures[firstSlot + i] != texture) {
+            D3D12_INTERNAL_TrackTexture(d3d12CommandBuffer, texture);
 
-        d3d12CommandBuffer->fragmentStorageTextures[firstSlot + i] = texture;
+            d3d12CommandBuffer->fragmentStorageTextures[firstSlot + i] = texture;
+            d3d12CommandBuffer->needFragmentStorageTextureBind = true;
+        }
     }
-
-    d3d12CommandBuffer->needFragmentStorageTextureBind = true;
 }
 
 static void D3D12_BindFragmentStorageBuffers(
@@ -4710,14 +4725,15 @@ static void D3D12_BindFragmentStorageBuffers(
     for (Uint32 i = 0; i < numBindings; i += 1) {
         D3D12BufferContainer *container = (D3D12BufferContainer *)storageBuffers[i];
 
-        D3D12_INTERNAL_TrackBuffer(
-            d3d12CommandBuffer,
-            container->activeBuffer);
+        if (d3d12CommandBuffer->fragmentStorageBuffers[firstSlot + i] != container->activeBuffer) {
+            D3D12_INTERNAL_TrackBuffer(
+                d3d12CommandBuffer,
+                container->activeBuffer);
 
-        d3d12CommandBuffer->fragmentStorageBuffers[firstSlot + i] = container->activeBuffer;
+            d3d12CommandBuffer->fragmentStorageBuffers[firstSlot + i] = container->activeBuffer;
+            d3d12CommandBuffer->needFragmentStorageBufferBind = true;
+        }
     }
-
-    d3d12CommandBuffer->needFragmentStorageBufferBind = true;
 }
 
 static void D3D12_PushVertexUniformData(
@@ -5330,20 +5346,26 @@ static void D3D12_BindComputeSamplers(
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
         D3D12TextureContainer *container = (D3D12TextureContainer *)textureSamplerBindings[i].texture;
+        D3D12Sampler *sampler = (D3D12Sampler *)textureSamplerBindings[i].sampler;
 
-        D3D12_INTERNAL_TrackSampler(
-            d3d12CommandBuffer,
-            (D3D12Sampler *)textureSamplerBindings[i].sampler);
+        if (d3d12CommandBuffer->computeSamplers[firstSlot + i] != sampler) {
+            D3D12_INTERNAL_TrackSampler(
+                d3d12CommandBuffer,
+                (D3D12Sampler *)textureSamplerBindings[i].sampler);
 
-        D3D12_INTERNAL_TrackTexture(
-            d3d12CommandBuffer,
-            container->activeTexture);
+            d3d12CommandBuffer->computeSamplers[firstSlot + i] = (D3D12Sampler *)textureSamplerBindings[i].sampler;
+            d3d12CommandBuffer->needComputeSamplerBind = true;
+        }
 
-        d3d12CommandBuffer->computeSamplerTextures[firstSlot + i] = container->activeTexture;
-        d3d12CommandBuffer->computeSamplers[firstSlot + i] = (D3D12Sampler *)textureSamplerBindings[i].sampler;
-    }
+        if (d3d12CommandBuffer->computeSamplerTextures[firstSlot + i] != container->activeTexture) {
+            D3D12_INTERNAL_TrackTexture(
+                d3d12CommandBuffer,
+                container->activeTexture);
 
-    d3d12CommandBuffer->needComputeSamplerBind = true;
+            d3d12CommandBuffer->computeSamplerTextures[firstSlot + i] = container->activeTexture;
+            d3d12CommandBuffer->needComputeSamplerBind = true;
+        }
+    }
 }
 
 static void D3D12_BindComputeStorageTextures(
@@ -5355,27 +5377,31 @@ static void D3D12_BindComputeStorageTextures(
     D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
-        if (d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] != NULL) {
-            D3D12_INTERNAL_TextureTransitionToDefaultUsage(
+        D3D12TextureContainer *container = (D3D12TextureContainer *)storageTextures[i];
+
+        if (d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] != container->activeTexture) {
+            /* If a different texture was in this slot, transition it back to its default usage */
+            if (d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] != NULL) {
+                D3D12_INTERNAL_TextureTransitionToDefaultUsage(
+                    d3d12CommandBuffer,
+                    D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
+                    d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i]);
+            }
+
+            /* Then transition the new texture and prepare it for binding */
+            D3D12_INTERNAL_TextureTransitionFromDefaultUsage(
                 d3d12CommandBuffer,
                 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
-                d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i]);
-        }
+                container->activeTexture);
 
-        D3D12TextureContainer *container = (D3D12TextureContainer *)storageTextures[i];
-        d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] = container->activeTexture;
-
-        D3D12_INTERNAL_TextureTransitionFromDefaultUsage(
-            d3d12CommandBuffer,
-            D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
-            container->activeTexture);
+            D3D12_INTERNAL_TrackTexture(
+                d3d12CommandBuffer,
+                container->activeTexture);
 
-        D3D12_INTERNAL_TrackTexture(
-            d3d12CommandBuffer,
-            container->activeTexture);
+            d3d12CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] = container->activeTexture;
+            d3d12CommandBuffer->needComputeReadOnlyStorageTextureBind = true;
+        }
     }
-
-    d3d12CommandBuffer->needComputeReadOnlyStorageTextureBind = true;
 }
 
 static void D3D12_BindComputeStorageBuffers(
@@ -5387,29 +5413,32 @@ static void D3D12_BindComputeStorageBuffers(
     D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
-        if (d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] != NULL) {
-            D3D12_INTERNAL_BufferTransitionToDefaultUsage(
-                d3d12CommandBuffer,
-                D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
-                d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i]);
-        }
-
         D3D12BufferContainer *container = (D3D12BufferContainer *)storageBuffers[i];
         D3D12Buffer *buffer = container->activeBuffer;
 
-        d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] = buffer;
+        if (d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] != buffer) {
+            /* If a different buffer was in this slot, transition it back to its default usage */
+            if (d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] != NULL) {
+                D3D12_INTERNAL_BufferTransitionToDefaultUsage(
+                    d3d12CommandBuffer,
+                    D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
+                    d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i]);
+            }
 
-        D3D12_INTERNAL_BufferTransitionFromDefaultUsage(
-            d3d12CommandBuffer,
-            D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
-            buffer);
+            /* Then transition the new buffer and prepare it for binding */
+            D3D12_INTERNAL_BufferTransitionFromDefaultUsage(
+                d3d12CommandBuffer,
+                D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
+                buffer);
 
-        D3D12_INTERNAL_TrackBuffer(
-            d3d12CommandBuffer,
-            buffer);
-    }
+            D3D12_INTERNAL_TrackBuffer(
+                d3d12CommandBuffer,
+                buffer);
 
-    d3d12CommandBuffer->needComputeReadOnlyStorageBufferBind = true;
+            d3d12CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] = buffer;
+            d3d12CommandBuffer->needComputeReadOnlyStorageBufferBind = true;
+        }
+    }
 }
 
 static void D3D12_PushComputeUniformData(
diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index ac712d52fa90b..3020492d91334 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -567,30 +567,37 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
     MetalComputePipeline *compute_pipeline;
 
     // Resource slot state
+    bool needVertexBufferBind;
     bool needVertexSamplerBind;
     bool needVertexStorageTextureBind;
     bool needVertexStorageBufferBind;
-    bool needVertexUniformBind;
+    bool needVertexUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
     bool needFragmentSamplerBind;
     bool needFragmentStorageTextureBind;
     bool needFragmentStorageBufferBind;
-    bool needFragmentUniformBind;
+    bool needFragmentUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
     bool needComputeSamplerBind;
-    bool needComputeTextureBind;
-    bool needComputeBufferBind;
-    bool needComputeUniformBind;
+    bool needComputeReadOnlyStorageTextureBind;
+    bool needComputeReadOnlyStorageBufferBind;
+    bool needComputeUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
+
+    id<MTLBuffer> vertexBuffers[MAX_VERTEX_BUFFERS];
+    Uint32 vertexBufferOffsets[MAX_VERTEX_BUFFERS];
+    Uint32 vertexBufferCount;
 
     id<MTLSamplerState> vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     id<MTLTexture> vertexTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     id<MTLTexture> vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
     id<MTLBuffer> vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
+    MetalUniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
     id<MTLSamplerState> fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     id<MTLTexture> fragmentTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     id<MTLTexture> fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
     id<MTLBuffer> fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
+    MetalUniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
     id<MTLTexture> computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     id<MTLSamplerState> computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
@@ -598,10 +605,6 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
     id<MTLBuffer> computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
     id<MTLTexture> computeReadWriteTextures[MAX_COMPUTE_WRITE_TEXTURES];
     id<MTLBuffer> computeReadWriteBuffers[MAX_COMPUTE_WRITE_BUFFERS];
-
-    // Uniform buffers
-    MetalUniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
-    MetalUniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
     MetalUniformBuffer *computeUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
     MetalUniformBuffer **usedUniformBuffers;
@@ -2130,20 +2133,6 @@ static bool METAL_INTERNAL_AcquireFence(
             commandBuffer->computeUniformBuffers[i] = NULL;
         }
 
-        // FIXME: Do we actually need to set this?
-        commandBuffer->needVertexSamplerBind = true;
-        commandBuffer->needVertexStorageTextureBind = true;
-        commandBuffer->needVertexStorageBufferBind = true;
-        commandBuffer->needVertexUniformBind = true;
-        commandBuffer->needFragmentSamplerBind = true;
-        commandBuffer->needFragmentStorageTextureBind = true;
-        commandBuffer->needFragmentStorageBufferBind = true;
-        commandBuffer->needFragmentUniformBind = true;
-        commandBuffer->needComputeSamplerBind = true;
-        commandBuffer->needComputeBufferBind = true;
-        commandBuffer->needComputeTextureBind = true;
-        commandBuffer->needComputeUniformBind = true;
-
         commandBuffer->autoReleaseFence = true;
 
         SDL_UnlockMutex(renderer->acquireCommandBufferLock);
@@ -2397,73 +2386,71 @@ static void METAL_BindGraphicsPipeline(
 {
     @autoreleasepool {
         MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
-        MetalGraphicsPipeline *metalGraphicsPipeline = (MetalGraphicsPipeline *)graphicsPipeline;
-        SDL_GPURasterizerState *rast = &metalGraphicsPipeline->rasterizerState;
+        MetalGraphicsPipeline *pipeline = (MetalGraphicsPipeline *)graphicsPipeline;
+        SDL_GPURasterizerState *rast = &pipeline->rasterizerState;
+        Uint32 i;
 
-        metalCommandBuffer->graphics_pipeline = metalGraphicsPipeline;
+        metalCommandBuffer->graphics_pipeline = pipeline;
 
-        [metalCommandBuffer->renderEncoder setRenderPipelineState:metalGraphicsPipeline->handle];
+        [metalCommandBuffer->renderEncoder setRenderPipelineState:pipeline->handle];
 
         // Apply rasterizer state
-        [metalCommandBuffer->renderEncoder setTriangleFillMode:SDLToMetal_PolygonMode[metalGraphicsPipeline->rasterizerState.fill_mode]];
-        [metalCommandBuffer->renderEncoder setCullMode:SDLToMetal_CullMode[metalGraphicsPipeline->rasterizerState.cull_mode]];
-        [metalCommandBuffer->renderEncoder setFrontFacingWinding:SDLToMetal_FrontFace[metalGraphicsPipeline->rasterizerState.front_face]];
-        [metalCommandBuffer->renderEncoder setDepthClipMode:SDLToMetal_DepthClipMode(metalGraphicsPipeline->rasterizerState.enable_depth_clip)];
+        [metalCommandBuffer->renderEncoder setTriangleFillMode:SDLToMetal_PolygonMode[pipeline->rasterizerState.fill_mode]];
+        [metalCommandBuffer->renderEncoder setCullMode:SDLToMetal_CullMode[pipeline->rasterizerState.cull_mode]];
+        [metalCommandBuffer->renderEncoder setFrontFacingWinding:SDLToMetal_FrontFace[pipeline->rasterizerState.front_face]];
+        [metalCommandBuffer->renderEncoder setDepthClipMode:SDLToMetal_DepthClipMode(pipeline->rasterizerState.enable_depth_clip)];
         [metalCommandBuffer->renderEncoder
             setDepthBias:((rast->enable_depth_bias) ? rast->depth_bias_constant_factor : 0)
               slopeScale:((rast->enable_depth_bias) ? rast->depth_bias_slope_factor : 0)
               clamp:((rast->enable_depth_bias) ? rast->depth_bias_clamp : 0)];
 
         // Apply depth-stencil state
-        if (metalGraphicsPipeline->depth_stencil_state != NULL) {
+        if (pipeline->depth_stencil_state != NULL) {
             [metalCommandBuffer->renderEncoder
-                setDepthStencilState:metalGraphicsPipeline->depth_stencil_state];
+                setDepthStencilState:pipeline->depth_stencil_state];
+        }
+
+        for (i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) {
+            metalCommandBuffer->needVertexUniformBufferBind[i] = true;
+            metalCommandBuffer->needFragmentUniformBufferBind[i] = true;
         }
 
-        for (Uint32 i = 0; i < metalGraphicsPipeline->vertexUniformBufferCount; i += 1) {
+        for (i = 0; i < pipeline->vertexUniformBufferCount; i += 1) {
             if (metalCommandBuffer->vertexUniformBuffers[i] == NULL) {
                 metalCommandBuffer->vertexUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool(
                     metalCommandBuffer);
             }
         }
 
-        for (Uint32 i = 0; i < metalGraphicsPipeline->fragmentUniformBufferCount; i += 1) {
+        for (i = 0; i < pipeline->fragmentUniformBufferCount; i += 1) {
             if (metalCommandBuffer->fragmentUniformBuffers[i] == NULL) {
                 metalCommandBuffer->fragmentUniformBuffers[i] = METAL_INTERNAL_AcquireUniformBufferFromPool(
                     metalCommandBuffer);
             }
         }
-
-        metalCommandBuffer->needVertexUniformBind = true;
-        metalCommandBuffer->needFragmentUniformBind = true;
     }
 }
 
 static void METAL_BindVertexBuffers(
     SDL_GPUCommandBuffer *commandBuffer,
-    Uint32 firstBinding,
+    Uint32 firstSlot,
     const SDL_GPUBufferBinding *bindings,
     Uint32 numBindings)
 {
-    @autoreleasepool {
-        MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
-        id<MTLBuffer> metalBuffers[MAX_VERTEX_BUFFERS];
-        NSUInteger bufferOffsets[MAX_VERTEX_BUFFERS];
-        NSRange range = NSMakeRange(METAL_FIRST_VERTEX_BUFFER_SLOT + firstBinding, numBindings);
-
-        if (range.length == 0) {
-            return;
-        }
+    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
 
-        for (Uint32 i = 0; i < numBindings; i += 1) {
-            MetalBuffer *currentBuffer = ((MetalBufferContainer *)bindings[i].buffer)->activeBuffer;
-            metalBuffers[firstBinding + i] = currentBuffer->handle;
-            bufferOffsets[firstBinding + i] = bindings[i].offset;
+    for (Uint32 i = 0; i < numBindings; i += 1) {
+        MetalBuffer *currentBuffer = ((MetalBufferContainer *)bindings[i].buffer)->activeBuffer;
+        if (metalCommandBuffer->vertexBuffers[firstSlot + i] != currentBuffer->handle || metalCommandBuffer->vertexBufferOffsets[firstSlot + i] != bindings[i].offset) {
+            metalCommandBuffer->vertexBuffers[firstSlot + i] = currentBuffer->handle;
+            metalCommandBuffer->vertexBufferOffsets[firstSlot + i] = bindings[i].offset;
+            metalCommandBuffer->needVertexBufferBind = true;
             METAL_INTERNAL_TrackBuffer(metalCommandBuffer, currentBuffer);
         }
-
-        [metalCommandBuffer->renderEncoder setVertexBuffers:metalBuffers offsets:bufferOffsets withRange:range];
     }
+
+    metalCommandBuffer->vertexBufferCount =
+        SDL_max(metalCommandBuffer->vertexBufferCount, firstSlot + numBindings);
 }
 
 static void METAL_BindIndexBuffer(
@@ -2487,22 +2474,28 @@ static void METAL_BindVertexSamplers(
 {
     MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
     MetalTextureContainer *textureContainer;
+    MetalSampler *sampler;
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
         textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture;
+        sampler = (MetalSampler *)textureSamplerBindings[i].sampler;
 
-        METAL_INTERNAL_TrackTexture(
-            metalCommandBuffer,
-            textureContainer->activeTexture);
+        if (metalCommandBuffer->vertexSamplers[firstSlot + i] != sampler->handle) {
+            metalCommandBuffer->vertexSamplers[firstSlot + i] = sampler->handle;
+            metalCommandBuffer->needVertexSamplerBind  = true;
+        }
 
-        metalCommandBuffer->vertexSamplers[firstSlot + i] =
-            ((MetalSampler *)textureSamplerBindings[i].sampler)->handle;
+        if (metalCommandBuffer->vertexTextures[firstSlot + i] != textureContainer->activeTexture->handle) {
+            METAL_INTERNAL_TrackTexture(
+                metalCommandBuffer,
+                textureContainer->activeTexture);
 
-        metalCommandBuffer->vertexTextures[firstSlot + i] =
-            textureContainer->activeTexture->handle;
-    }
+            metalCommandBuffer->vertexTextures[firstSlot + i] =
+                textureContainer->activeTexture->handle;
 
-    metalCommandBuffer->needVertexSamplerBind = true;
+            metalCommandBuffer->needVertexSamplerBind  = true;
+        }
+    }
 }
 
 static void METAL_BindVertexStorageTextures(
@@ -2517,15 +2510,17 @@ static void METAL_BindVertexStorageTextures(
     for (Uint32 i = 0; i < numBindings; i += 1) {
         textureContainer = (MetalTextureContainer *)storageTextures[i];
 
-        METAL_INTERNAL_TrackTexture(
-            metalCommandBuffer,
-            textureContainer->activeTexture);
+        if (metalCommandBuffer->vertexStorageTextures[firstSlot + i] != textureContainer->activeTexture->handle) {
+            METAL_INTERNAL_TrackTexture(
+                metalCommandBuffer,
+                textureContainer->activeTexture);
 
-        metalCommandBuffer->vertexStorageTextures[firstSlot + i] =
-            textureContainer->activeTexture->handle;
-    }
+            metalCommandBuffer->vertexStorageTextures[firstSlot + i] =
+                textureContainer->activeTexture->handle;
 
-    metalCommandBuffer->needVertexStorageTextureBind = true;
+            metalCommandBuffer->needVertexStorageTextureBind = true;
+        }
+    }
 }
 
 static void METAL_BindVertexStorageBuffers(
@@ -2540,15 +2535,17 @@ static void METAL_BindVertexStorageBuffers(
     for (Uint32 i = 0; i < numBindings; i += 1) {
         bufferContainer = (MetalBufferContainer *)storageBuffers[i];
 
-        METAL_INTERNAL_TrackBuffer(
-            metalCommandBuffer,
-            bufferContainer->activeBuffer);
+        if (metalCommandBuffer->vertexStorageBuffers[firstSlot + i] != bufferContainer->activeBuffer->handle) {
+            METAL_INTERNAL_TrackBuffer(
+                metalCommandBuffer,
+                bufferContainer->activeBuffer);
+
+            metalCommandBuffer->vertexStorageBuffers[firstSlot + i] =
+                bufferContainer->activeBuffer->handle;
 
-        metalCommandBuffer->vertexStorageBuffers[firstSlot + i] =
-            bufferContainer->activeBuffer->handle;
+            metalCommandBuffer->needVertexStorageBufferBind = true;
+        }
     }
-
-    metalCommandBuffer->needVertexStorageBufferBind = true;
 }
 
 static void METAL_BindFragmentSamplers(
@@ -2559,22 +2556,28 @@ static void METAL_BindFragmentSamplers(
 {
     MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
     MetalTextureContainer *textureContainer;
+    MetalSampler *sampler;
 
     for (Uint32 i = 0; i < numBindings; i += 1) {
         textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture;
+        sampler = (MetalSampler *)textureSamplerBindings[i].sampler;
 
-        METAL_INTERNAL_TrackTexture(
-            metalCommandBuffer,
-            textureContainer->activeTexture);
+        if (metalCommandBuffer->fragmentSamplers[firstSlot + i] != sampler->handle) {
+            metalCommandBuffer->fragmentSamplers[firstSlot + i] = sampler->handle;
+            metalCommandBuffer->needFragmentSamplerBind  = true;
+        }
 
-        metalCommandBuffer->fragmentSamplers[firstSlot + i] =
-            ((MetalSampler *)textureSamplerBindings[i].sampler)->handle;
+        if (metalCommandBuffer->fragmentTextures[firstSlot + i] != textureContainer->activeTexture->handle) {
+            METAL_INTERNAL_TrackTexture(
+                metalCommandBuffer,
+                textureContainer->activeTexture);
 
-        metalCommandBuffer->fragmentTextures[firstSlot + i] =
-            textureContainer->activeTexture->handle;
-    }
+            metalCommandBuffer->fragmentTextures[firstSlot + i] =
+                textureContainer->activeTexture->handle;
 
-    metalCommandBuffer->needFragmentSamplerBind = true;
+            metalCommandBuffer->needFragmentSamplerBind  = true;
+        }
+    }
 }
 
 static void METAL_BindFragmentStorageTextures(
@@ -2589,15 +2592,17 @@ static void METAL_BindFragmentStorageTextures(
     for (Uint32 i = 0; i < numBindings; i += 1) {
         textureContainer = (MetalTextureContainer *)storageTextures[i];
 
-        METAL_INTERNAL_TrackTexture(
-            metalCommandBuffer,
-            textureContainer->activeTexture);
+        if (metalCommandBuffer->fragmentStorageTextures[firstSlot + i] != textureContainer->activeTexture->handle) {
+            METAL_INTERNAL_TrackTexture(
+                metalCommandBuffer,
+                textureContainer->activeTexture);
+
+            metalCommandBuffer->fragmentStorageTextures[firstSlot + i] =
+                textureContainer->activeTexture->handle;
 
-        metalCommandBuffer->fragmentStorageTextures[firstSlot + i] =
-            textureContainer->activeTexture->handle;
+            metalCommandBuffer->needFragmentStorageTextureBind = true;
+        }
     }
-
-    metalCommandBuffer->needFragmentStorageTextureBind = true;
 }
 
 static void METAL_BindFragmentStorageBuffers(
@@ -2612,15 +2617,17 @@ static void METAL_BindFragmentStorageBuffers(
     for (Uint32 i = 0; i < numBindings; i += 1) {
         bufferContainer = (MetalBufferContainer *)storageBuffers[i];
 
-        METAL_INTERNAL_TrackBuffer(
-            metalCommandBuffer,
-            bufferContainer->activeBuffer);
+        if (metalCommandBuffer->fragmentStorageBuffers[firstSlot + i] != bufferContainer->activeBuffer->handle) {
+            METAL_INTERNAL_TrackBuffer(
+                metalCommandBuffer,
+                bufferContainer->activeBuffer);
 
-        metalCommandBuffer->fragmentStorageBuffers[firstSlot + i] =
-            bufferContainer->activeBuffer->handle;
-    }
+            metalCommandBuffer->fragmentStorageBuffers[firstSlot + i] =
+                bufferContainer->activeBuffer->handle;
 
-    metalCommandBuffer->needFragmentStorageBufferBind = true;
+            metalCommandBuffer->needFragmentStorageBufferBind = true;
+        }
+    }
 }
 
 // This function assumes that it's called from within an autorelease pool
@@ -2630,85 +2637,115 @@ static void METAL_INTERNAL_BindGraphicsResources(
     MetalGraphicsPipeline *graphicsPipeline = commandBuffer->graphics_pipeline;
     NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 };
 
+    // Ver

(Patch may be truncated, please check the link at the top of this post.)