SDL: Add SDL_BindGPUComputeSamplers (#10778)

From 2b8a349b2622f566645a90226ed3d7af7d17801f Mon Sep 17 00:00:00 2001
From: Evan Hemsley <[EMAIL REDACTED]>
Date: Tue, 10 Sep 2024 19:20:14 -0700
Subject: [PATCH] Add SDL_BindGPUComputeSamplers (#10778)

---------

Co-authored-by: Caleb Cornett <caleb.cornett@outlook.com>
---
 include/SDL3/SDL_gpu.h            |  27 +-
 src/dynapi/SDL_dynapi.sym         |   1 +
 src/dynapi/SDL_dynapi_overrides.h |   1 +
 src/dynapi/SDL_dynapi_procs.h     |   1 +
 src/gpu/SDL_gpu.c                 |  26 ++
 src/gpu/SDL_sysgpu.h              |   7 +
 src/gpu/d3d11/SDL_gpu_d3d11.c     | 478 +++++++++++++++++++++---------
 src/gpu/d3d12/SDL_gpu_d3d12.c     | 152 ++++++++--
 src/gpu/metal/SDL_gpu_metal.m     | 121 +++++---
 src/gpu/vulkan/SDL_gpu_vulkan.c   |  85 +++++-
 10 files changed, 691 insertions(+), 208 deletions(-)

diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index e6dbdf52c7487..df69c0f0d68f7 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -1491,6 +1491,7 @@ typedef struct SDL_GPUComputePipelineCreateInfo
     const Uint8 *code;                      /**< A pointer to compute shader code. */
     const char *entrypoint;                 /**< A pointer to a null-terminated UTF-8 string specifying the entry point function name for the shader. */
     SDL_GPUShaderFormat format;             /**< The format of the compute shader code. */
+    Uint32 num_samplers;                    /**< The number of samplers defined in the shader. */
     Uint32 num_readonly_storage_textures;   /**< The number of readonly storage textures defined in the shader. */
     Uint32 num_readonly_storage_buffers;    /**< The number of readonly storage buffers defined in the shader. */
     Uint32 num_writeonly_storage_textures;  /**< The number of writeonly storage textures defined in the shader. */
@@ -1791,13 +1792,13 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
  *
  * For SPIR-V shaders, use the following resource sets:
  *
- * - 0: Read-only storage textures, followed by read-only storage buffers
+ * - 0: Sampled textures, followed by read-only storage textures, followed by read-only storage buffers
  * - 1: Write-only storage textures, followed by write-only storage buffers
  * - 2: Uniform buffers
  *
  * For DXBC Shader Model 5_0 shaders, use the following register order:
  *
- * - t registers: Read-only storage textures, followed by read-only storage
+ * - t registers: Sampled textures, followed by read-only storage textures, followed by read-only storage
  *   buffers
  * - u registers: Write-only storage textures, followed by write-only storage
  *   buffers
@@ -1805,7 +1806,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
  *
  * For DXIL shaders, use the following register order:
  *
- * - (t[n], space0): Read-only storage textures, followed by read-only storage
+ * - (t[n], space0): Sampled textures, followed by read-only storage textures, followed by read-only storage
  *   buffers
  * - (u[n], space1): Write-only storage textures, followed by write-only
  *   storage buffers
@@ -1815,7 +1816,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
  *
  * - [[buffer]]: Uniform buffers, followed by write-only storage buffers,
  *   followed by write-only storage buffers
- * - [[texture]]: Read-only storage textures, followed by write-only storage
+ * - [[texture]]: Sampled textures, followed by read-only storage textures, followed by write-only storage
  *   textures
  *
  * \param device a GPU Context.
@@ -2757,6 +2758,24 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputePipeline(
     SDL_GPUComputePass *compute_pass,
     SDL_GPUComputePipeline *compute_pipeline);
 
+/**
+ * Binds texture-sampler pairs for use on the compute shader.
+ *
+ * The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER.
+ *
+ * \param compute_pass a compute pass handle.
+ * \param first_slot the compute sampler slot to begin binding from.
+ * \param texture_sampler_bindings an array of texture-sampler binding structs.
+ * \param num_bindings the number of texture-sampler bindings to bind from the array.
+ *
+ * \since This function is available since SDL 3.0.0
+ */
+extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeSamplers(
+    SDL_GPUComputePass *compute_pass,
+    Uint32 first_slot,
+    const SDL_GPUTextureSamplerBinding *texture_sampler_bindings,
+    Uint32 num_bindings);
+
 /**
  * Binds storage textures as readonly for use on the compute pipeline.
  *
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index 1b9d1b280fea4..8b60ab7b75afb 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -29,6 +29,7 @@ SDL3_0.0.0 {
     SDL_BindAudioStream;
     SDL_BindAudioStreams;
     SDL_BindGPUComputePipeline;
+    SDL_BindGPUComputeSamplers;
     SDL_BindGPUComputeStorageBuffers;
     SDL_BindGPUComputeStorageTextures;
     SDL_BindGPUFragmentSamplers;
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 99d95a43b2661..2a25777781f42 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -54,6 +54,7 @@
 #define SDL_BindAudioStream SDL_BindAudioStream_REAL
 #define SDL_BindAudioStreams SDL_BindAudioStreams_REAL
 #define SDL_BindGPUComputePipeline SDL_BindGPUComputePipeline_REAL
+#define SDL_BindGPUComputeSamplers SDL_BindGPUComputeSamplers_REAL
 #define SDL_BindGPUComputeStorageBuffers SDL_BindGPUComputeStorageBuffers_REAL
 #define SDL_BindGPUComputeStorageTextures SDL_BindGPUComputeStorageTextures_REAL
 #define SDL_BindGPUFragmentSamplers SDL_BindGPUFragmentSamplers_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 3a60c9f34c806..e66887bfe753d 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -74,6 +74,7 @@ SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer
 SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return)
 SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStreams,(SDL_AudioDeviceID a, SDL_AudioStream **b, int c),(a,b,c),return)
 SDL_DYNAPI_PROC(void,SDL_BindGPUComputePipeline,(SDL_GPUComputePass *a, SDL_GPUComputePipeline *b),(a,b),)
+SDL_DYNAPI_PROC(void,SDL_BindGPUComputeSamplers,(SDL_GPUComputePass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageBuffers,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUBuffer *const *c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageTextures,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUTexture *const *c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_BindGPUFragmentSamplers,(SDL_GPURenderPass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),)
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 7e2396d92bab0..344794a260922 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -1782,6 +1782,32 @@ void SDL_BindGPUComputePipeline(
     commandBufferHeader->compute_pipeline_bound = true;
 }
 
+void SDL_BindGPUComputeSamplers(
+    SDL_GPUComputePass *compute_pass,
+    Uint32 first_slot,
+    const SDL_GPUTextureSamplerBinding *texture_sampler_bindings,
+    Uint32 num_bindings)
+{
+    if (compute_pass == NULL) {
+        SDL_InvalidParamError("compute_pass");
+        return;
+    }
+    if (texture_sampler_bindings == NULL && num_bindings > 0) {
+        SDL_InvalidParamError("texture_sampler_bindings");
+        return;
+    }
+
+    if (COMPUTEPASS_DEVICE->debug_mode) {
+        CHECK_COMPUTEPASS
+    }
+
+    COMPUTEPASS_DEVICE->BindComputeSamplers(
+        COMPUTEPASS_COMMAND_BUFFER,
+        first_slot,
+        texture_sampler_bindings,
+        num_bindings);
+}
+
 void SDL_BindGPUComputeStorageTextures(
     SDL_GPUComputePass *compute_pass,
     Uint32 first_slot,
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 1706071dc2082..93a904cd352c8 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -511,6 +511,12 @@ struct SDL_GPUDevice
         SDL_GPUCommandBuffer *commandBuffer,
         SDL_GPUComputePipeline *computePipeline);
 
+    void (*BindComputeSamplers)(
+        SDL_GPUCommandBuffer *commandBuffer,
+        Uint32 firstSlot,
+        const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
+        Uint32 numBindings);
+
     void (*BindComputeStorageTextures)(
         SDL_GPUCommandBuffer *commandBuffer,
         Uint32 firstSlot,
@@ -740,6 +746,7 @@ struct SDL_GPUDevice
     ASSIGN_DRIVER_FUNC(EndRenderPass, name)                 \
     ASSIGN_DRIVER_FUNC(BeginComputePass, name)              \
     ASSIGN_DRIVER_FUNC(BindComputePipeline, name)           \
+    ASSIGN_DRIVER_FUNC(BindComputeSamplers, name)           \
     ASSIGN_DRIVER_FUNC(BindComputeStorageTextures, name)    \
     ASSIGN_DRIVER_FUNC(BindComputeStorageBuffers, name)     \
     ASSIGN_DRIVER_FUNC(PushComputeUniformData, name)        \
diff --git a/src/gpu/d3d11/SDL_gpu_d3d11.c b/src/gpu/d3d11/SDL_gpu_d3d11.c
index 711688ae3b635..c0cfede639478 100644
--- a/src/gpu/d3d11/SDL_gpu_d3d11.c
+++ b/src/gpu/d3d11/SDL_gpu_d3d11.c
@@ -414,13 +414,13 @@ typedef struct D3D11TextureContainer
     TextureCommonHeader header;
 
     D3D11Texture *activeTexture;
-    bool canBeCycled;
 
     Uint32 textureCapacity;
     Uint32 textureCount;
     D3D11Texture **textures;
 
     char *debugName;
+    bool canBeCycled;
 } D3D11TextureContainer;
 
 typedef struct D3D11TextureSubresource
@@ -523,6 +523,7 @@ typedef struct D3D11ComputePipeline
 {
     ID3D11ComputeShader *computeShader;
 
+    Uint32 numSamplers;
     Uint32 numReadonlyStorageTextures;
     Uint32 numWriteonlyStorageTextures;
     Uint32 numReadonlyStorageBuffers;
@@ -609,6 +610,11 @@ typedef struct D3D11UniformBuffer
     Uint32 currentBlockSize;
 } D3D11UniformBuffer;
 
+typedef struct D3D11Sampler
+{
+    ID3D11SamplerState *handle;
+} D3D11Sampler;
+
 typedef struct D3D11Renderer D3D11Renderer;
 
 typedef struct D3D11CommandBuffer
@@ -646,35 +652,40 @@ typedef struct D3D11CommandBuffer
     bool needVertexBufferBind;
 
     bool needVertexSamplerBind;
-    bool needVertexResourceBind;
+    bool needVertexStorageTextureBind;
+    bool needVertexStorageBufferBind;
     bool needVertexUniformBufferBind;
 
     bool needFragmentSamplerBind;
-    bool needFragmentResourceBind;
+    bool needFragmentStorageTextureBind;
+    bool needFragmentStorageBufferBind;
     bool needFragmentUniformBufferBind;
 
-    bool needComputeUAVBind;
-    bool needComputeSRVBind;
+    bool needComputeSamplerBind;
+    bool needComputeReadOnlyTextureBind;
+    bool needComputeReadOnlyBufferBind;
     bool needComputeUniformBufferBind;
 
     ID3D11Buffer *vertexBuffers[MAX_BUFFER_BINDINGS];
     Uint32 vertexBufferOffsets[MAX_BUFFER_BINDINGS];
     Uint32 vertexBufferCount;
 
-    ID3D11SamplerState *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
-    ID3D11ShaderResourceView *vertexShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE +
-                                                        MAX_STORAGE_BUFFERS_PER_STAGE +
-                                                        MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Texture *vertexSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Sampler *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Texture *vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Buffer *vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
 
-    ID3D11SamplerState *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
-    ID3D11ShaderResourceView *fragmentShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE +
-                                                          MAX_STORAGE_BUFFERS_PER_STAGE +
-                                                          MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Texture *fragmentSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Sampler *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Texture *fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
 
-    ID3D11ShaderResourceView *computeShaderResourceViews[MAX_STORAGE_TEXTURES_PER_STAGE +
-                                                         MAX_STORAGE_BUFFERS_PER_STAGE];
-    ID3D11UnorderedAccessView *computeUnorderedAccessViews[MAX_COMPUTE_WRITE_TEXTURES +
-                                                           MAX_COMPUTE_WRITE_BUFFERS];
+    D3D11Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
+    D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
+    D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
 
     // Uniform buffers
     D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
@@ -703,11 +714,6 @@ typedef struct D3D11CommandBuffer
     Uint32 usedUniformBufferCapacity;
 } D3D11CommandBuffer;
 
-typedef struct D3D11Sampler
-{
-    ID3D11SamplerState *handle;
-} D3D11Sampler;
-
 struct D3D11Renderer
 {
     ID3D11Device1 *device;
@@ -772,20 +778,19 @@ struct D3D11Renderer
     SDL_Mutex *acquireUniformBufferLock;
     SDL_Mutex *fenceLock;
     SDL_Mutex *windowLock;
-};
 
-// Null arrays for resetting shader resource slots
+    // Null arrays for resetting resource slots
+    ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS];
 
-ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS];
-
-ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE +
+    ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 +
                                    MAX_STORAGE_TEXTURES_PER_STAGE +
                                    MAX_STORAGE_BUFFERS_PER_STAGE];
 
-ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2];
 
-ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES +
+    ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES +
                                     MAX_COMPUTE_WRITE_BUFFERS];
+};
 
 // Logging
 
@@ -1527,6 +1532,7 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline(
 
     pipeline = SDL_malloc(sizeof(D3D11ComputePipeline));
     pipeline->computeShader = shader;
+    pipeline->numSamplers = createinfo->num_samplers;
     pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
     pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
     pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
@@ -3235,21 +3241,31 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
     }
 
     commandBuffer->needVertexSamplerBind = true;
-    commandBuffer->needVertexResourceBind = true;
+    commandBuffer->needVertexStorageTextureBind = true;
+    commandBuffer->needVertexStorageBufferBind = true;
     commandBuffer->needVertexUniformBufferBind = true;
     commandBuffer->needFragmentSamplerBind = true;
-    commandBuffer->needFragmentResourceBind = true;
+    commandBuffer->needFragmentStorageTextureBind = true;
+    commandBuffer->needFragmentStorageBufferBind = true;
     commandBuffer->needFragmentUniformBufferBind = true;
-    commandBuffer->needComputeUAVBind = true;
-    commandBuffer->needComputeSRVBind = true;
     commandBuffer->needComputeUniformBufferBind = true;
 
     SDL_zeroa(commandBuffer->vertexSamplers);
-    SDL_zeroa(commandBuffer->vertexShaderResourceViews);
+    SDL_zeroa(commandBuffer->vertexSamplerTextures);
+    SDL_zeroa(commandBuffer->vertexStorageTextures);
+    SDL_zeroa(commandBuffer->vertexStorageBuffers);
+
     SDL_zeroa(commandBuffer->fragmentSamplers);
-    SDL_zeroa(commandBuffer->fragmentShaderResourceViews);
-    SDL_zeroa(commandBuffer->computeShaderResourceViews);
-    SDL_zeroa(commandBuffer->computeUnorderedAccessViews);
+    SDL_zeroa(commandBuffer->fragmentSamplerTextures);
+    SDL_zeroa(commandBuffer->fragmentStorageTextures);
+    SDL_zeroa(commandBuffer->fragmentStorageBuffers);
+
+    SDL_zeroa(commandBuffer->computeSamplers);
+    SDL_zeroa(commandBuffer->computeSamplerTextures);
+    SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
+    SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
+    SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
+    SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
 
     D3D11_INTERNAL_AcquireFence(commandBuffer);
     commandBuffer->autoReleaseFence = 1;
@@ -3489,11 +3505,6 @@ static void D3D11_BeginRenderPass(
     SDL_GPUViewport viewport;
     SDL_Rect scissorRect;
 
-    d3d11CommandBuffer->needVertexSamplerBind = true;
-    d3d11CommandBuffer->needVertexResourceBind = true;
-    d3d11CommandBuffer->needFragmentSamplerBind = true;
-    d3d11CommandBuffer->needFragmentResourceBind = true;
-
     // Clear the bound targets for the current command buffer
     for (Uint32 i = 0; i < MAX_COLOR_TARGET_BINDINGS; i += 1) {
         d3d11CommandBuffer->colorTargetResolveTexture[i] = NULL;
@@ -3717,8 +3728,14 @@ static void D3D11_BindGraphicsPipeline(
         }
     }
 
-    // Mark that uniform bindings are needed
+    // Mark that bindings are needed
+    d3d11CommandBuffer->needVertexSamplerBind = true;
+    d3d11CommandBuffer->needVertexStorageTextureBind = true;
+    d3d11CommandBuffer->needVertexStorageBufferBind = true;
     d3d11CommandBuffer->needVertexUniformBufferBind = true;
+    d3d11CommandBuffer->needFragmentSamplerBind = true;
+    d3d11CommandBuffer->needFragmentStorageTextureBind = true;
+    d3d11CommandBuffer->needFragmentStorageBufferBind = true;
     d3d11CommandBuffer->needFragmentUniformBufferBind = true;
 }
 
@@ -3776,14 +3793,13 @@ static void D3D11_BindVertexSamplers(
             textureContainer->activeTexture);
 
         d3d11CommandBuffer->vertexSamplers[firstSlot + i] =
-            ((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle;
+            (D3D11Sampler *)textureSamplerBindings[i].sampler;
 
-        d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i] =
-            textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->vertexSamplerTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
     d3d11CommandBuffer->needVertexSamplerBind = true;
-    d3d11CommandBuffer->needVertexResourceBind = true;
 }
 
 static void D3D11_BindVertexStorageTextures(
@@ -3801,11 +3817,11 @@ static void D3D11_BindVertexStorageTextures(
             d3d11CommandBuffer,
             textureContainer->activeTexture);
 
-        d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i +
-                                                      d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount] = textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->vertexStorageTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
-    d3d11CommandBuffer->needVertexResourceBind = true;
+    d3d11CommandBuffer->needVertexStorageTextureBind = true;
 }
 
 static void D3D11_BindVertexStorageBuffers(
@@ -3825,12 +3841,11 @@ static void D3D11_BindVertexStorageBuffers(
             d3d11CommandBuffer,
             bufferContainer->activeBuffer);
 
-        d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i +
-                                                      d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount +
-                                                      d3d11CommandBuffer->graphicsPipeline->vertexStorageTextureCount] = bufferContainer->activeBuffer->srv;
+        d3d11CommandBuffer->vertexStorageBuffers[firstSlot + i] =
+            bufferContainer->activeBuffer;
     }
 
-    d3d11CommandBuffer->needVertexResourceBind = true;
+    d3d11CommandBuffer->needVertexStorageBufferBind = true;
 }
 
 static void D3D11_BindFragmentSamplers(
@@ -3849,14 +3864,13 @@ static void D3D11_BindFragmentSamplers(
             textureContainer->activeTexture);
 
         d3d11CommandBuffer->fragmentSamplers[firstSlot + i] =
-            ((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle;
+            (D3D11Sampler *)textureSamplerBindings[i].sampler;
 
-        d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i] =
-            textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->fragmentSamplerTextures[firstSlot + i] =
+            (D3D11Texture *)textureContainer->activeTexture;
     }
 
     d3d11CommandBuffer->needFragmentSamplerBind = true;
-    d3d11CommandBuffer->needFragmentResourceBind = true;
 }
 
 static void D3D11_BindFragmentStorageTextures(
@@ -3874,11 +3888,11 @@ static void D3D11_BindFragmentStorageTextures(
             d3d11CommandBuffer,
             textureContainer->activeTexture);
 
-        d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i +
-                                                        d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount] = textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->fragmentStorageTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
-    d3d11CommandBuffer->needFragmentResourceBind = true;
+    d3d11CommandBuffer->needFragmentStorageTextureBind = true;
 }
 
 static void D3D11_BindFragmentStorageBuffers(
@@ -3898,12 +3912,11 @@ static void D3D11_BindFragmentStorageBuffers(
             d3d11CommandBuffer,
             bufferContainer->activeBuffer);
 
-        d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i +
-                                                        d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount +
-                                                        d3d11CommandBuffer->graphicsPipeline->fragmentStorageTextureCount] = bufferContainer->activeBuffer->srv;
+        d3d11CommandBuffer->fragmentStorageBuffers[firstSlot + i] =
+            bufferContainer->activeBuffer;
     }
 
-    d3d11CommandBuffer->needFragmentResourceBind = true;
+    d3d11CommandBuffer->needFragmentStorageBufferBind = true;
 }
 
 static void D3D11_INTERNAL_BindGraphicsResources(
@@ -3911,18 +3924,8 @@ static void D3D11_INTERNAL_BindGraphicsResources(
 {
     D3D11GraphicsPipeline *graphicsPipeline = commandBuffer->graphicsPipeline;
 
-    Uint32 vertexResourceCount =
-        graphicsPipeline->vertexSamplerCount +
-        graphicsPipeline->vertexStorageTextureCount +
-        graphicsPipeline->vertexStorageBufferCount;
-
-    Uint32 fragmentResourceCount =
-        graphicsPipeline->fragmentSamplerCount +
-        graphicsPipeline->fragmentStorageTextureCount +
-        graphicsPipeline->fragmentStorageBufferCount;
-
     ID3D11Buffer *nullBuf = NULL;
-    Uint32 offsetInConstants, blockSizeInConstants, i;
+    Uint32 offsetInConstants, blockSizeInConstants;
 
     if (commandBuffer->needVertexBufferBind) {
         ID3D11DeviceContext_IASetVertexBuffers(
@@ -3936,30 +3939,68 @@ static void D3D11_INTERNAL_BindGraphicsResources(
 
     if (commandBuffer->needVertexSamplerBind) {
         if (graphicsPipeline->vertexSamplerCount > 0) {
+            ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+            ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->vertexSamplerCount; i += 1) {
+                samplerStates[i] = commandBuffer->vertexSamplers[i]->handle;
+                srvs[i] = commandBuffer->vertexSamplerTextures[i]->shaderView;
+            }
+
             ID3D11DeviceContext_VSSetSamplers(
                 commandBuffer->context,
                 0,
                 graphicsPipeline->vertexSamplerCount,
-                commandBuffer->vertexSamplers);
+                samplerStates);
+
+            ID3D11DeviceContext_VSSetShaderResources(
+                commandBuffer->context,
+                0,
+                graphicsPipeline->vertexSamplerCount,
+                srvs);
         }
 
         commandBuffer->needVertexSamplerBind = false;
     }
 
-    if (commandBuffer->needVertexResourceBind) {
-        if (vertexResourceCount > 0) {
+    if (commandBuffer->needVertexStorageTextureBind) {
+        if (graphicsPipeline->vertexStorageTextureCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->vertexStorageTextureCount; i += 1) {
+                srvs[i] = commandBuffer->vertexStorageTextures[i]->shaderView;
+            }
+
+            ID3D11DeviceContext_VSSetShaderResources(
+                commandBuffer->context,
+                graphicsPipeline->vertexSamplerCount,
+                graphicsPipeline->vertexStorageTextureCount,
+                srvs);
+        }
+
+        commandBuffer->needVertexStorageTextureBind = false;
+    }
+
+    if (commandBuffer->needVertexStorageBufferBind) {
+        if (graphicsPipeline->vertexStorageBufferCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->vertexStorageBufferCount; i += 1) {
+                srvs[i] = commandBuffer->vertexStorageBuffers[i]->srv;
+            }
+
             ID3D11DeviceContext_VSSetShaderResources(
                 commandBuffer->context,
-                0,
-                vertexResourceCount,
-                commandBuffer->vertexShaderResourceViews);
+                graphicsPipeline->vertexSamplerCount + graphicsPipeline->vertexStorageTextureCount,
+                graphicsPipeline->vertexStorageBufferCount,
+                srvs);
         }
 
-        commandBuffer->needVertexResourceBind = false;
+        commandBuffer->needVertexStorageBufferBind = false;
     }
 
     if (commandBuffer->needVertexUniformBufferBind) {
-        for (i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
+        for (Uint32 i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
             /* stupid workaround for god awful D3D11 drivers
              * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation
              */
@@ -3986,30 +4027,68 @@ static void D3D11_INTERNAL_BindGraphicsResources(
 
     if (commandBuffer->needFragmentSamplerBind) {
         if (graphicsPipeline->fragmentSamplerCount > 0) {
+            ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+            ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->fragmentSamplerCount; i += 1) {
+                samplerStates[i] = commandBuffer->fragmentSamplers[i]->handle;
+                srvs[i] = commandBuffer->fragmentSamplerTextures[i]->shaderView;
+            }
+
             ID3D11DeviceContext_PSSetSamplers(
                 commandBuffer->context,
                 0,
                 graphicsPipeline->fragmentSamplerCount,
-                commandBuffer->fragmentSamplers);
+                samplerStates);
+
+            ID3D11DeviceContext_PSSetShaderResources(
+                commandBuffer->context,
+                0,
+                graphicsPipeline->fragmentSamplerCount,
+                srvs);
         }
 
         commandBuffer->needFragmentSamplerBind = false;
     }
 
-    if (commandBuffer->needFragmentResourceBind) {
-        if (fragmentResourceCount > 0) {
+    if (commandBuffer->needFragmentStorageTextureBind) {
+        if (graphicsPipeline->fragmentStorageTextureCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageTextureCount; i += 1) {
+                srvs[i] = commandBuffer->fragmentStorageTextures[i]->shaderView;
+            }
+
             ID3D11DeviceContext_PSSetShaderResources(
                 commandBuffer->context,
-                0,
-                fragmentResourceCount,
-                commandBuffer->fragmentShaderResourceViews);
+                graphicsPipeline->fragmentSamplerCount,
+                graphicsPipeline->fragmentStorageTextureCount,
+                srvs);
+        }
+
+        commandBuffer->needFragmentStorageTextureBind = false;
+    }
+
+    if (commandBuffer->needFragmentStorageBufferBind) {
+        if (graphicsPipeline->fragmentStorageBufferCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageBufferCount; i += 1) {
+                srvs[i] = commandBuffer->fragmentStorageBuffers[i]->srv;
+            }
+
+            ID3D11DeviceContext_PSSetShaderResources(
+                commandBuffer->context,
+                graphicsPipeline->fragmentSamplerCount + graphicsPipeline->fragmentStorageTextureCount,
+                graphicsPipeline->fragmentStorageBufferCount,
+                srvs);
         }
 
-        commandBuffer->needFragmentResourceBind = false;
+        commandBuffer->needFragmentStorageBufferBind = false;
     }
 
     if (commandBuffer->needFragmentUniformBufferBind) {
-        for (i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
+        for (Uint32 i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
             /* stupid workaround for god awful D3D11 drivers
              * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-pssetconstantbuffers1#calling-pssetconstantbuffers1-with-command-list-emulation
              */
@@ -4127,6 +4206,7 @@ static void D3D11_EndRenderPass(
     SDL_GPUCommandBuffer *commandBuffer)
 {
     D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
+    D3D11Renderer *renderer = d3d11CommandBuffer->renderer;
     Uint32 i;
 
     // Set render target slots to NULL to avoid NULL set behavior
@@ -4134,7 +4214,7 @@ static void D3D11_EndRenderPass(
     ID3D11DeviceContext_OMSetRenderTargets(
         d3d11CommandBuffer->context,
         MAX_COLOR_TARGET_BINDINGS,
-        nullRTVs,
+        renderer->nullRTVs,
         NULL);
 
     // Resolve MSAA color render targets
@@ -4150,16 +4230,44 @@ static void D3D11_EndRenderPass(
         }
     }
 
+    ID3D11DeviceContext_VSSetSamplers(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE,
+        renderer->nullSamplers);
+
+    ID3D11DeviceContext_VSSetShaderResources(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
+        renderer->nullSRVs);
+
+    ID3D11DeviceContext_PSSetSamplers(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE,
+        renderer->nullSamplers);
+
+    ID3D11DeviceContext_PSSetShaderResources(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
+        renderer->nullSRVs);
+
     // Reset bind state
     SDL_zeroa(d3d11CommandBuffer->vertexBuffers);
     SDL_zeroa(d3d11CommandBuffer->vertexBufferOffsets);
     d3d11CommandBuffer->vertexBufferCount = 0;
 
     SDL_zeroa(d3d11CommandBuffer->vertexSamplers);
-    SDL_z

(Patch may be truncated, please check the link at the top of this post.)