SDL: GPU: Remove pitch parameters from indirect draw calls (#10803)

From ddd5723e2eee20fb2bef108a7bc80cc3a5afb90f Mon Sep 17 00:00:00 2001
From: Caleb Cornett <[EMAIL REDACTED]>
Date: Thu, 12 Sep 2024 01:30:14 -0500
Subject: [PATCH] GPU: Remove pitch parameters from indirect draw calls
 (#10803)

---
 include/SDL3/SDL_gpu.h          | 22 ++++------
 src/dynapi/SDL_dynapi_procs.h   |  4 +-
 src/gpu/SDL_gpu.c               | 12 ++----
 src/gpu/SDL_sysgpu.h            |  6 +--
 src/gpu/d3d11/SDL_gpu_d3d11.c   | 10 ++---
 src/gpu/d3d12/SDL_gpu_d3d12.c   | 74 ++++++++-------------------------
 src/gpu/metal/SDL_gpu_metal.m   | 10 ++---
 src/gpu/vulkan/SDL_gpu_vulkan.c |  8 ++--
 8 files changed, 47 insertions(+), 99 deletions(-)

diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index d4b56af16db0e..a8ed931e4308f 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -2651,7 +2651,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_DrawGPUPrimitives(
  * Draws data using bound graphics state and with draw parameters set from a
  * buffer.
  *
- * The buffer layout should match the layout of SDL_GPUIndirectDrawCommand.
+ * The buffer must consist of tightly-packed draw parameter sets that
+ * each match the the layout of SDL_GPUIndirectDrawCommand.
  * You must not call this function before binding a graphics pipeline.
  *
  * \param render_pass a render pass handle.
@@ -2659,7 +2660,6 @@ extern SDL_DECLSPEC void SDLCALL SDL_DrawGPUPrimitives(
  * \param offset the offset to start reading from the draw buffer.
  * \param draw_count the number of draw parameter sets that should be read
  *                   from the draw buffer.
- * \param pitch the byte pitch between sets of draw parameters.
  *
  * \since This function is available since SDL 3.0.0.
  */
@@ -2667,23 +2667,21 @@ extern SDL_DECLSPEC void SDLCALL SDL_DrawGPUPrimitivesIndirect(
     SDL_GPURenderPass *render_pass,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 draw_count,
-    Uint32 pitch);
+    Uint32 draw_count);
 
 /**
  * Draws data using bound graphics state with an index buffer enabled and with
  * draw parameters set from a buffer.
  *
- * The buffer layout should match the layout of
- * SDL_GPUIndexedIndirectDrawCommand. You must not call this function before
- * binding a graphics pipeline.
+ * The buffer must consist of tightly-packed draw parameter sets that
+ * each match the the layout of SDL_GPUIndexedIndirectDrawCommand.
+ * You must not call this function before binding a graphics pipeline.
  *
  * \param render_pass a render pass handle.
  * \param buffer a buffer containing draw parameters.
  * \param offset the offset to start reading from the draw buffer.
  * \param draw_count the number of draw parameter sets that should be read
  *                   from the draw buffer.
- * \param pitch the byte pitch between sets of draw parameters.
  *
  * \since This function is available since SDL 3.0.0.
  */
@@ -2691,8 +2689,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_DrawGPUIndexedPrimitivesIndirect(
     SDL_GPURenderPass *render_pass,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 draw_count,
-    Uint32 pitch);
+    Uint32 draw_count);
 
 /**
  * Ends the given render pass.
@@ -2846,9 +2843,8 @@ extern SDL_DECLSPEC void SDLCALL SDL_DispatchGPUCompute(
 /**
  * Dispatches compute work with parameters set from a buffer.
  *
- * The buffer layout should match the layout of
- * SDL_GPUIndirectDispatchCommand. You must not call this function before
- * binding a compute pipeline.
+ * The buffer layout should match the layout of SDL_GPUIndirectDispatchCommand.
+ * You must not call this function before binding a compute pipeline.
  *
  * A VERY IMPORTANT NOTE If you dispatch multiple times in a compute pass, and
  * the dispatches write to the same resource region as each other, there is no
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index e66887bfe753d..b30fe83f70722 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -188,9 +188,9 @@ SDL_DYNAPI_PROC(void,SDL_DispatchGPUComputeIndirect,(SDL_GPUComputePass *a, SDL_
 SDL_DYNAPI_PROC(void,SDL_DownloadFromGPUBuffer,(SDL_GPUCopyPass *a, const SDL_GPUBufferRegion *b, const SDL_GPUTransferBufferLocation *c),(a,b,c),)
 SDL_DYNAPI_PROC(void,SDL_DownloadFromGPUTexture,(SDL_GPUCopyPass *a, const SDL_GPUTextureRegion *b, const SDL_GPUTextureTransferInfo *c),(a,b,c),)
 SDL_DYNAPI_PROC(void,SDL_DrawGPUIndexedPrimitives,(SDL_GPURenderPass *a, Uint32 b, Uint32 c, Uint32 d, Sint32 e, Uint32 f),(a,b,c,d,e,f),)
-SDL_DYNAPI_PROC(void,SDL_DrawGPUIndexedPrimitivesIndirect,(SDL_GPURenderPass *a, SDL_GPUBuffer *b, Uint32 c, Uint32 d, Uint32 e),(a,b,c,d,e),)
+SDL_DYNAPI_PROC(void,SDL_DrawGPUIndexedPrimitivesIndirect,(SDL_GPURenderPass *a, SDL_GPUBuffer *b, Uint32 c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_DrawGPUPrimitives,(SDL_GPURenderPass *a, Uint32 b, Uint32 c, Uint32 d, Uint32 e),(a,b,c,d,e),)
-SDL_DYNAPI_PROC(void,SDL_DrawGPUPrimitivesIndirect,(SDL_GPURenderPass *a, SDL_GPUBuffer *b, Uint32 c, Uint32 d, Uint32 e),(a,b,c,d,e),)
+SDL_DYNAPI_PROC(void,SDL_DrawGPUPrimitivesIndirect,(SDL_GPURenderPass *a, SDL_GPUBuffer *b, Uint32 c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(SDL_Surface*,SDL_DuplicateSurface,(SDL_Surface *a),(a),return)
 SDL_DYNAPI_PROC(SDL_EGLConfig,SDL_EGL_GetCurrentConfig,(void),(),return)
 SDL_DYNAPI_PROC(SDL_EGLDisplay,SDL_EGL_GetCurrentDisplay,(void),(),return)
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index 344794a260922..398344b3640d5 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -1631,8 +1631,7 @@ void SDL_DrawGPUPrimitivesIndirect(
     SDL_GPURenderPass *render_pass,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 draw_count,
-    Uint32 pitch)
+    Uint32 draw_count)
 {
     if (render_pass == NULL) {
         SDL_InvalidParamError("render_pass");
@@ -1652,16 +1651,14 @@ void SDL_DrawGPUPrimitivesIndirect(
         RENDERPASS_COMMAND_BUFFER,
         buffer,
         offset,
-        draw_count,
-        pitch);
+        draw_count);
 }
 
 void SDL_DrawGPUIndexedPrimitivesIndirect(
     SDL_GPURenderPass *render_pass,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 draw_count,
-    Uint32 pitch)
+    Uint32 draw_count)
 {
     if (render_pass == NULL) {
         SDL_InvalidParamError("render_pass");
@@ -1681,8 +1678,7 @@ void SDL_DrawGPUIndexedPrimitivesIndirect(
         RENDERPASS_COMMAND_BUFFER,
         buffer,
         offset,
-        draw_count,
-        pitch);
+        draw_count);
 }
 
 void SDL_EndGPURenderPass(
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index 93a904cd352c8..260f36346c42b 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -485,15 +485,13 @@ struct SDL_GPUDevice
         SDL_GPUCommandBuffer *commandBuffer,
         SDL_GPUBuffer *buffer,
         Uint32 offset,
-        Uint32 drawCount,
-        Uint32 pitch);
+        Uint32 drawCount);
 
     void (*DrawIndexedPrimitivesIndirect)(
         SDL_GPUCommandBuffer *commandBuffer,
         SDL_GPUBuffer *buffer,
         Uint32 offset,
-        Uint32 drawCount,
-        Uint32 pitch);
+        Uint32 drawCount);
 
     void (*EndRenderPass)(
         SDL_GPUCommandBuffer *commandBuffer);
diff --git a/src/gpu/d3d11/SDL_gpu_d3d11.c b/src/gpu/d3d11/SDL_gpu_d3d11.c
index 826fe41a3ad8e..519dd44789fc7 100644
--- a/src/gpu/d3d11/SDL_gpu_d3d11.c
+++ b/src/gpu/d3d11/SDL_gpu_d3d11.c
@@ -4156,8 +4156,7 @@ static void D3D11_DrawPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
     D3D11_INTERNAL_BindGraphicsResources(d3d11CommandBuffer);
@@ -4171,7 +4170,7 @@ static void D3D11_DrawPrimitivesIndirect(
         ID3D11DeviceContext_DrawInstancedIndirect(
             d3d11CommandBuffer->context,
             d3d11Buffer->handle,
-            offset + (pitch * i));
+            offset + (sizeof(SDL_GPUIndirectDrawCommand) * i));
     }
 
     D3D11_INTERNAL_TrackBuffer(d3d11CommandBuffer, d3d11Buffer);
@@ -4181,8 +4180,7 @@ static void D3D11_DrawIndexedPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
     D3D11_INTERNAL_BindGraphicsResources(d3d11CommandBuffer);
@@ -4196,7 +4194,7 @@ static void D3D11_DrawIndexedPrimitivesIndirect(
         ID3D11DeviceContext_DrawIndexedInstancedIndirect(
             d3d11CommandBuffer->context,
             d3d11Buffer->handle,
-            offset + (pitch * i));
+            offset + (sizeof(SDL_GPUIndexedIndirectDrawCommand) * i));
     }
 
     D3D11_INTERNAL_TrackBuffer(d3d11CommandBuffer, d3d11Buffer);
diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c
index df8e74a8d7dc7..28cd5f8b5ac73 100644
--- a/src/gpu/d3d12/SDL_gpu_d3d12.c
+++ b/src/gpu/d3d12/SDL_gpu_d3d12.c
@@ -4722,80 +4722,42 @@ static void D3D12_DrawPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
     D3D12Buffer *d3d12Buffer = ((D3D12BufferContainer *)buffer)->activeBuffer;
 
     D3D12_INTERNAL_BindGraphicsResources(d3d12CommandBuffer);
 
-    if (pitch == sizeof(SDL_GPUIndirectDrawCommand)) {
-        // Real multi-draw!
-        ID3D12GraphicsCommandList_ExecuteIndirect(
-            d3d12CommandBuffer->graphicsCommandList,
-            d3d12CommandBuffer->renderer->indirectDrawCommandSignature,
-            drawCount,
-            d3d12Buffer->handle,
-            offset,
-            NULL,
-            0);
-    } else {
-        /* Fake multi-draw...
-         * FIXME: we could make this real multi-draw
-         * if we have a lookup to get command signature per pitch value
-         */
-        for (Uint32 i = 0; i < drawCount; i += 1) {
-            ID3D12GraphicsCommandList_ExecuteIndirect(
-                d3d12CommandBuffer->graphicsCommandList,
-                d3d12CommandBuffer->renderer->indirectDrawCommandSignature,
-                1,
-                d3d12Buffer->handle,
-                offset + (pitch * i),
-                NULL,
-                0);
-        }
-    }
+    ID3D12GraphicsCommandList_ExecuteIndirect(
+        d3d12CommandBuffer->graphicsCommandList,
+        d3d12CommandBuffer->renderer->indirectDrawCommandSignature,
+        drawCount,
+        d3d12Buffer->handle,
+        offset,
+        NULL,
+        0);
 }
 
 static void D3D12_DrawIndexedPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
     D3D12Buffer *d3d12Buffer = ((D3D12BufferContainer *)buffer)->activeBuffer;
 
     D3D12_INTERNAL_BindGraphicsResources(d3d12CommandBuffer);
 
-    if (pitch == sizeof(SDL_GPUIndexedIndirectDrawCommand)) {
-        // Real multi-draw!
-        ID3D12GraphicsCommandList_ExecuteIndirect(
-            d3d12CommandBuffer->graphicsCommandList,
-            d3d12CommandBuffer->renderer->indirectIndexedDrawCommandSignature,
-            drawCount,
-            d3d12Buffer->handle,
-            offset,
-            NULL,
-            0);
-    } else {
-        /* Fake multi-draw...
-         * FIXME: we could make this real multi-draw
-         * if we have a lookup to get command signature per pitch value
-         */
-        for (Uint32 i = 0; i < drawCount; i += 1) {
-            ID3D12GraphicsCommandList_ExecuteIndirect(
-                d3d12CommandBuffer->graphicsCommandList,
-                d3d12CommandBuffer->renderer->indirectIndexedDrawCommandSignature,
-                1,
-                d3d12Buffer->handle,
-                offset + (pitch * i),
-                NULL,
-                0);
-        }
-    }
+    ID3D12GraphicsCommandList_ExecuteIndirect(
+        d3d12CommandBuffer->graphicsCommandList,
+        d3d12CommandBuffer->renderer->indirectIndexedDrawCommandSignature,
+        drawCount,
+        d3d12Buffer->handle,
+        offset,
+        NULL,
+        0);
 }
 
 static void D3D12_EndRenderPass(
diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m
index 149a0c05be004..efe2d214ffafc 100644
--- a/src/gpu/metal/SDL_gpu_metal.m
+++ b/src/gpu/metal/SDL_gpu_metal.m
@@ -2750,8 +2750,7 @@ static void METAL_DrawPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     @autoreleasepool {
         MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
@@ -2767,7 +2766,7 @@ static void METAL_DrawPrimitivesIndirect(
             [metalCommandBuffer->renderEncoder
                       drawPrimitives:SDLToMetal_PrimitiveType[primitiveType]
                       indirectBuffer:metalBuffer->handle
-                indirectBufferOffset:offset + (pitch * i)];
+                indirectBufferOffset:offset + (sizeof(SDL_GPUIndirectDrawCommand) * i)];
         }
 
         METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer);
@@ -2778,8 +2777,7 @@ static void METAL_DrawIndexedPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     @autoreleasepool {
         MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
@@ -2795,7 +2793,7 @@ static void METAL_DrawIndexedPrimitivesIndirect(
                           indexBuffer:metalCommandBuffer->indexBuffer->handle
                     indexBufferOffset:metalCommandBuffer->indexBufferOffset
                        indirectBuffer:metalBuffer->handle
-                 indirectBufferOffset:offset + (pitch * i)];
+                 indirectBufferOffset:offset + (sizeof(SDL_GPUIndexedIndirectDrawCommand) * i)];
         }
 
         METAL_INTERNAL_TrackBuffer(metalCommandBuffer, metalBuffer);
diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c
index 5668bbe4bdfa5..5ea9a42d9d71c 100644
--- a/src/gpu/vulkan/SDL_gpu_vulkan.c
+++ b/src/gpu/vulkan/SDL_gpu_vulkan.c
@@ -5357,12 +5357,12 @@ static void VULKAN_DrawPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer;
     VulkanRenderer *renderer = (VulkanRenderer *)vulkanCommandBuffer->renderer;
     VulkanBuffer *vulkanBuffer = ((VulkanBufferContainer *)buffer)->activeBufferHandle->vulkanBuffer;
+    Uint32 pitch = sizeof(SDL_GPUIndirectDrawCommand);
     Uint32 i;
 
     VULKAN_INTERNAL_BindGraphicsDescriptorSets(renderer, vulkanCommandBuffer);
@@ -5394,12 +5394,12 @@ static void VULKAN_DrawIndexedPrimitivesIndirect(
     SDL_GPUCommandBuffer *commandBuffer,
     SDL_GPUBuffer *buffer,
     Uint32 offset,
-    Uint32 drawCount,
-    Uint32 pitch)
+    Uint32 drawCount)
 {
     VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer;
     VulkanRenderer *renderer = (VulkanRenderer *)vulkanCommandBuffer->renderer;
     VulkanBuffer *vulkanBuffer = ((VulkanBufferContainer *)buffer)->activeBufferHandle->vulkanBuffer;
+    Uint32 pitch = sizeof(SDL_GPUIndexedIndirectDrawCommand);
     Uint32 i;
 
     VULKAN_INTERNAL_BindGraphicsDescriptorSets(renderer, vulkanCommandBuffer);