From 05d0656bd673ec9d39e16b2e63699e0e510015e4 Mon Sep 17 00:00:00 2001
From: Evan Hemsley <[EMAIL REDACTED]>
Date: Fri, 27 Sep 2024 12:49:37 -0700
Subject: [PATCH] GPU: Simultaneous compute pass read-write (#10965)
---
include/SDL3/SDL_gpu.h | 78 +++++++++++---------
src/dynapi/SDL_dynapi_procs.h | 2 +-
src/gpu/SDL_gpu.c | 18 +++--
src/gpu/SDL_sysgpu.h | 4 +-
src/gpu/d3d11/SDL_gpu_d3d11.c | 52 +++++++++-----
src/gpu/d3d12/SDL_gpu_d3d12.c | 121 +++++++++++++++++---------------
src/gpu/metal/SDL_gpu_metal.m | 44 ++++++------
src/gpu/vulkan/SDL_gpu_vulkan.c | 108 ++++++++++++++--------------
8 files changed, 241 insertions(+), 186 deletions(-)
diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h
index f3cf36b27a025..9b7149801a25e 100644
--- a/include/SDL3/SDL_gpu.h
+++ b/include/SDL3/SDL_gpu.h
@@ -464,18 +464,24 @@ typedef enum SDL_GPUTextureFormat
* A texture must have at least one usage flag. Note that some usage flag
* combinations are invalid.
*
- * \since This enum is available since SDL 3.0.0
+ * With regards to compute storage usage, READ | WRITE means that you can have shader A that only writes into the texture and shader B that only reads from the texture and bind the same texture to either shader respectively.
+ * SIMULTANEOUS means that you can do reads and writes within the same shader or compute pass. It also implies that atomic ops can be used, since those are read-modify-write operations.
+ * If you use SIMULTANEOUS, you are responsible for avoiding data races, as there is no data synchronization within a compute pass.
+ * Note that SIMULTANEOUS usage is only supported by a limited number of texture formats.
+ *
+ * \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUTexture
*/
typedef Uint32 SDL_GPUTextureUsageFlags;
-#define SDL_GPU_TEXTUREUSAGE_SAMPLER (1u << 0) /**< Texture supports sampling. */
-#define SDL_GPU_TEXTUREUSAGE_COLOR_TARGET (1u << 1) /**< Texture is a color render target. */
-#define SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET (1u << 2) /**< Texture is a depth stencil target. */
-#define SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Texture supports storage reads in graphics stages. */
-#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Texture supports storage reads in the compute stage. */
-#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Texture supports storage writes in the compute stage. */
+#define SDL_GPU_TEXTUREUSAGE_SAMPLER (1u << 0) /**< Texture supports sampling. */
+#define SDL_GPU_TEXTUREUSAGE_COLOR_TARGET (1u << 1) /**< Texture is a color render target. */
+#define SDL_GPU_TEXTUREUSAGE_DEPTH_STENCIL_TARGET (1u << 2) /**< Texture is a depth stencil target. */
+#define SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Texture supports storage reads in graphics stages. */
+#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Texture supports storage reads in the compute stage. */
+#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Texture supports storage writes in the compute stage. */
+#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE (1u << 6) /**< Texture supports reads and writes in the same compute shader. This is NOT equivalent to READ | WRITE. */
/**
* Specifies the type of a texture.
@@ -536,18 +542,21 @@ typedef enum SDL_GPUCubeMapFace
* A buffer must have at least one usage flag. Note that some usage flag
* combinations are invalid.
*
- * \since This enum is available since SDL 3.0.0
+ * Unlike textures, READ | WRITE can be used for simultaneous read-write usage.
+ * The same data synchronization concerns as textures apply.
+ *
+ * \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUBuffer
*/
typedef Uint32 SDL_GPUBufferUsageFlags;
-#define SDL_GPU_BUFFERUSAGE_VERTEX (1u << 0) /**< Buffer is a vertex buffer. */
-#define SDL_GPU_BUFFERUSAGE_INDEX (1u << 1) /**< Buffer is an index buffer. */
-#define SDL_GPU_BUFFERUSAGE_INDIRECT (1u << 2) /**< Buffer is an indirect buffer. */
-#define SDL_GPU_BUFFERUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Buffer supports storage reads in graphics stages. */
-#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Buffer supports storage reads in the compute stage. */
-#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Buffer supports storage writes in the compute stage. */
+#define SDL_GPU_BUFFERUSAGE_VERTEX (1u << 0) /**< Buffer is a vertex buffer. */
+#define SDL_GPU_BUFFERUSAGE_INDEX (1u << 1) /**< Buffer is an index buffer. */
+#define SDL_GPU_BUFFERUSAGE_INDIRECT (1u << 2) /**< Buffer is an indirect buffer. */
+#define SDL_GPU_BUFFERUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Buffer supports storage reads in graphics stages. */
+#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Buffer supports storage reads in the compute stage. */
+#define SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Buffer supports storage writes in the compute stage. */
/**
* Specifies how a transfer buffer is intended to be used by the client.
@@ -811,7 +820,7 @@ typedef enum SDL_GPUBlendFactor
/**
* Specifies which color components are written in a graphics pipeline.
*
- * \since This enum is available since SDL 3.0.0
+ * \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUGraphicsPipeline
*/
@@ -1489,8 +1498,8 @@ typedef struct SDL_GPUComputePipelineCreateInfo
Uint32 num_samplers; /**< The number of samplers defined in the shader. */
Uint32 num_readonly_storage_textures; /**< The number of readonly storage textures defined in the shader. */
Uint32 num_readonly_storage_buffers; /**< The number of readonly storage buffers defined in the shader. */
- Uint32 num_writeonly_storage_textures; /**< The number of writeonly storage textures defined in the shader. */
- Uint32 num_writeonly_storage_buffers; /**< The number of writeonly storage buffers defined in the shader. */
+ Uint32 num_readwrite_storage_textures; /**< The number of read-write storage textures defined in the shader. */
+ Uint32 num_readwrite_storage_buffers; /**< The number of read-write storage buffers defined in the shader. */
Uint32 num_uniform_buffers; /**< The number of uniform buffers defined in the shader. */
Uint32 threadcount_x; /**< The number of threads in the X dimension. This should match the value in the shader. */
Uint32 threadcount_y; /**< The number of threads in the Y dimension. This should match the value in the shader. */
@@ -1667,14 +1676,14 @@ typedef struct SDL_GPUTextureSamplerBinding
*
* \sa SDL_BeginGPUComputePass
*/
-typedef struct SDL_GPUStorageBufferWriteOnlyBinding
+typedef struct SDL_GPUStorageBufferReadWriteBinding
{
SDL_GPUBuffer *buffer; /**< The buffer to bind. Must have been created with SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE. */
- bool cycle; /**< true cycles the buffer if it is already bound. */
+ bool cycle; /**< true cycles the buffer if it is already bound. */
Uint8 padding1;
Uint8 padding2;
Uint8 padding3;
-} SDL_GPUStorageBufferWriteOnlyBinding;
+} SDL_GPUStorageBufferReadWriteBinding;
/**
* A structure specifying parameters related to binding textures in a compute
@@ -1684,16 +1693,16 @@ typedef struct SDL_GPUStorageBufferWriteOnlyBinding
*
* \sa SDL_BeginGPUComputePass
*/
-typedef struct SDL_GPUStorageTextureWriteOnlyBinding
+typedef struct SDL_GPUStorageTextureReadWriteBinding
{
- SDL_GPUTexture *texture; /**< The texture to bind. Must have been created with SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE. */
+ SDL_GPUTexture *texture; /**< The texture to bind. Must have been created with SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE or SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE. */
Uint32 mip_level; /**< The mip level index to bind. */
Uint32 layer; /**< The layer index to bind. */
- bool cycle; /**< true cycles the texture if it is already bound. */
+ bool cycle; /**< true cycles the texture if it is already bound. */
Uint8 padding1;
Uint8 padding2;
Uint8 padding3;
-} SDL_GPUStorageTextureWriteOnlyBinding;
+} SDL_GPUStorageTextureReadWriteBinding;
/* Functions */
@@ -2807,17 +2816,22 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass(
* Begins a compute pass on a command buffer.
*
* A compute pass is defined by a set of texture subresources and buffers that
- * will be written to by compute pipelines. These textures and buffers must
- * have been created with the COMPUTE_STORAGE_WRITE bit. All operations
+ * may be written to by compute pipelines. These textures and buffers must
+ * have been created with the COMPUTE_STORAGE_WRITE bit or the COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE bit.
+ * If you do not create a texture with COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE, you must not read from the texture in the compute pass.
+ * All operations
* related to compute pipelines must take place inside of a compute pass. You
* must not begin another compute pass, or a render pass or copy pass before
* ending the compute pass.
*
- * A VERY IMPORTANT NOTE Textures and buffers bound as write-only MUST NOT be
- * read from during the compute pass. Doing so will result in undefined
- * behavior. If your compute work requires reading the output from a previous
+ * A VERY IMPORTANT NOTE - Reads and writes in compute passes are NOT implicitly synchronized.
+ * This means you may cause data races by both reading and writing a resource region in a compute pass,
+ * or by writing multiple times to a resource region.
+ * If your compute work depends on reading the completed output from a previous
* dispatch, you MUST end the current compute pass and begin a new one before
- * you can safely access the data.
+ * you can safely access the data. Otherwise you will receive unexpected results.
+ * Reading and writing a texture in the same compute pass is only supported by specific texture formats.
+ * Make sure you check the format support!
*
* \param command_buffer a command buffer.
* \param storage_texture_bindings an array of writeable storage texture
@@ -2836,9 +2850,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass(
*/
extern SDL_DECLSPEC SDL_GPUComputePass *SDLCALL SDL_BeginGPUComputePass(
SDL_GPUCommandBuffer *command_buffer,
- const SDL_GPUStorageTextureWriteOnlyBinding *storage_texture_bindings,
+ const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings,
Uint32 num_storage_texture_bindings,
- const SDL_GPUStorageBufferWriteOnlyBinding *storage_buffer_bindings,
+ const SDL_GPUStorageBufferReadWriteBinding *storage_buffer_bindings,
Uint32 num_storage_buffer_bindings);
/**
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 0aaa592b049c8..a25e2cbdc7d2d 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -63,7 +63,7 @@ SDL_DYNAPI_PROC(SDL_TimerID,SDL_AddTimerNS,(Uint64 a, SDL_NSTimerCallback b, voi
SDL_DYNAPI_PROC(bool,SDL_AddVulkanRenderSemaphores,(SDL_Renderer *a, Uint32 b, Sint64 c, Sint64 d),(a,b,c,d),return)
SDL_DYNAPI_PROC(SDL_JoystickID,SDL_AttachVirtualJoystick,(const SDL_VirtualJoystickDesc *a),(a),return)
SDL_DYNAPI_PROC(bool,SDL_AudioDevicePaused,(SDL_AudioDeviceID a),(a),return)
-SDL_DYNAPI_PROC(SDL_GPUComputePass*,SDL_BeginGPUComputePass,(SDL_GPUCommandBuffer *a, const SDL_GPUStorageTextureWriteOnlyBinding *b, Uint32 c, const SDL_GPUStorageBufferWriteOnlyBinding *d, Uint32 e),(a,b,c,d,e),return)
+SDL_DYNAPI_PROC(SDL_GPUComputePass*,SDL_BeginGPUComputePass,(SDL_GPUCommandBuffer *a, const SDL_GPUStorageTextureReadWriteBinding *b, Uint32 c, const SDL_GPUStorageBufferReadWriteBinding *d, Uint32 e),(a,b,c,d,e),return)
SDL_DYNAPI_PROC(SDL_GPUCopyPass*,SDL_BeginGPUCopyPass,(SDL_GPUCommandBuffer *a),(a),return)
SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer *a, const SDL_GPUColorTargetInfo *b, Uint32 c, const SDL_GPUDepthStencilTargetInfo *d),(a,b,c,d),return)
SDL_DYNAPI_PROC(bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return)
diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c
index fb9c4d107afb9..aedb81259afc1 100644
--- a/src/gpu/SDL_gpu.c
+++ b/src/gpu/SDL_gpu.c
@@ -705,11 +705,11 @@ SDL_GPUComputePipeline *SDL_CreateGPUComputePipeline(
SDL_assert_release(!"Incompatible shader format for GPU backend");
return NULL;
}
- if (createinfo->num_writeonly_storage_textures > MAX_COMPUTE_WRITE_TEXTURES) {
+ if (createinfo->num_readwrite_storage_textures > MAX_COMPUTE_WRITE_TEXTURES) {
SDL_assert_release(!"Compute pipeline write-only texture count cannot be higher than 8!");
return NULL;
}
- if (createinfo->num_writeonly_storage_buffers > MAX_COMPUTE_WRITE_BUFFERS) {
+ if (createinfo->num_readwrite_storage_buffers > MAX_COMPUTE_WRITE_BUFFERS) {
SDL_assert_release(!"Compute pipeline write-only buffer count cannot be higher than 8!");
return NULL;
}
@@ -1868,9 +1868,9 @@ void SDL_EndGPURenderPass(
SDL_GPUComputePass *SDL_BeginGPUComputePass(
SDL_GPUCommandBuffer *command_buffer,
- const SDL_GPUStorageTextureWriteOnlyBinding *storage_texture_bindings,
+ const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings,
Uint32 num_storage_texture_bindings,
- const SDL_GPUStorageBufferWriteOnlyBinding *storage_buffer_bindings,
+ const SDL_GPUStorageBufferReadWriteBinding *storage_buffer_bindings,
Uint32 num_storage_buffer_bindings)
{
CommandBufferCommonHeader *commandBufferHeader;
@@ -1898,6 +1898,16 @@ SDL_GPUComputePass *SDL_BeginGPUComputePass(
if (COMMAND_BUFFER_DEVICE->debug_mode) {
CHECK_COMMAND_BUFFER_RETURN_NULL
CHECK_ANY_PASS_IN_PROGRESS("Cannot begin compute pass during another pass!", NULL)
+
+ for (Uint32 i = 0; i < num_storage_texture_bindings; i += 1) {
+ TextureCommonHeader *header = (TextureCommonHeader *)storage_texture_bindings[i].texture;
+ if (!(header->info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) && !(header->info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
+ SDL_assert_release(!"Texture must be created with COMPUTE_STORAGE_WRITE or COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE flag");
+ return NULL;
+ }
+ }
+
+ // TODO: validate buffer usage?
}
COMMAND_BUFFER_DEVICE->BeginComputePass(
diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h
index e2e60cffb3041..5388696d1021d 100644
--- a/src/gpu/SDL_sysgpu.h
+++ b/src/gpu/SDL_sysgpu.h
@@ -503,9 +503,9 @@ struct SDL_GPUDevice
void (*BeginComputePass)(
SDL_GPUCommandBuffer *commandBuffer,
- const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
+ const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
- const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
+ const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings);
void (*BindComputePipeline)(
diff --git a/src/gpu/d3d11/SDL_gpu_d3d11.c b/src/gpu/d3d11/SDL_gpu_d3d11.c
index 073b2ed28fbb6..af0f6e93ed4bd 100644
--- a/src/gpu/d3d11/SDL_gpu_d3d11.c
+++ b/src/gpu/d3d11/SDL_gpu_d3d11.c
@@ -529,9 +529,9 @@ typedef struct D3D11ComputePipeline
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
- Uint32 numWriteonlyStorageTextures;
+ Uint32 numReadWriteStorageTextures;
Uint32 numReadonlyStorageBuffers;
- Uint32 numWriteonlyStorageBuffers;
+ Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
} D3D11ComputePipeline;
@@ -687,8 +687,8 @@ typedef struct D3D11CommandBuffer
D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
- D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
- D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
+ D3D11TextureSubresource *computeReadWriteStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
+ D3D11Buffer *computeReadWriteStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
// Uniform buffers
D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
@@ -1524,9 +1524,9 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline(
pipeline->computeShader = shader;
pipeline->numSamplers = createinfo->num_samplers;
pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
- pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
+ pipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
- pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
+ pipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
pipeline->numUniformBuffers = createinfo->num_uniform_buffers;
// thread counts are ignored in d3d11
@@ -1891,7 +1891,8 @@ static D3D11Texture *D3D11_INTERNAL_CreateTexture(
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ) ||
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ);
needSubresourceUAV =
- (createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE);
+ (createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
+ (createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
isMultisample = createInfo->sample_count > SDL_GPU_SAMPLECOUNT_1;
isStaging = createInfo->usage == 0;
isMippable =
@@ -3224,8 +3225,8 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
- SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
- SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
+ SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources);
+ SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers);
bool acquireFenceResult = D3D11_INTERNAL_AcquireFence(commandBuffer);
commandBuffer->autoReleaseFence = 1;
@@ -4284,9 +4285,9 @@ static void D3D11_Blit(
static void D3D11_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
- const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
+ const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
- const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
+ const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
@@ -4310,7 +4311,7 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
textureSubresource->parent);
- d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = textureSubresource;
+ d3d11CommandBuffer->computeReadWriteStorageTextureSubresources[i] = textureSubresource;
}
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
@@ -4325,15 +4326,15 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
buffer);
- d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
+ d3d11CommandBuffer->computeReadWriteStorageBuffers[i] = buffer;
}
for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
- uavs[i] = d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uav;
+ uavs[i] = d3d11CommandBuffer->computeReadWriteStorageTextureSubresources[i]->uav;
}
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
- uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i]->uav;
+ uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeReadWriteStorageBuffers[i]->uav;
}
ID3D11DeviceContext_CSSetUnorderedAccessViews(
@@ -4622,8 +4623,8 @@ static void D3D11_EndComputePass(
SDL_zeroa(d3d11CommandBuffer->computeSamplerTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageBuffers);
- SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources);
- SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageBuffers);
+ SDL_zeroa(d3d11CommandBuffer->computeReadWriteStorageTextureSubresources);
+ SDL_zeroa(d3d11CommandBuffer->computeReadWriteStorageBuffers);
}
// Fence Cleanup
@@ -5763,6 +5764,7 @@ static bool D3D11_SupportsTextureFormat(
DXGI_FORMAT dxgiFormat = SDLToD3D11_TextureFormat[format];
DXGI_FORMAT typelessFormat = D3D11_INTERNAL_GetTypelessFormat(dxgiFormat);
UINT formatSupport, sampleableFormatSupport;
+ D3D11_FEATURE_DATA_FORMAT_SUPPORT2 formatSupport2 = { dxgiFormat, 0 };
HRESULT res;
res = ID3D11Device_CheckFormatSupport(
@@ -5787,6 +5789,19 @@ static bool D3D11_SupportsTextureFormat(
}
}
+ // Checks for SIMULTANEOUS_READ_WRITE support
+ if (usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
+ res = ID3D11Device_CheckFeatureSupport(
+ renderer->device,
+ D3D11_FEATURE_FORMAT_SUPPORT2,
+ &formatSupport2,
+ sizeof(formatSupport2));
+ if (FAILED(res)) {
+ // Format is apparently unknown
+ return false;
+ }
+ }
+
// Is the texture type supported?
if (type == SDL_GPU_TEXTURETYPE_2D && !(formatSupport & D3D11_FORMAT_SUPPORT_TEXTURE2D)) {
return false;
@@ -5815,6 +5830,9 @@ static bool D3D11_SupportsTextureFormat(
// TYPED_UNORDERED_ACCESS_VIEW implies support for typed UAV stores
return false;
}
+ if ((usage & (SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) && !(formatSupport2.OutFormatSupport2 & D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD))) {
+ return false;
+ }
if ((usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET) && !(formatSupport & D3D11_FORMAT_SUPPORT_RENDER_TARGET)) {
return false;
}
diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c
index ed1238b553f16..df81d6a595fe8 100644
--- a/src/gpu/d3d12/SDL_gpu_d3d12.c
+++ b/src/gpu/d3d12/SDL_gpu_d3d12.c
@@ -732,10 +732,10 @@ struct D3D12CommandBuffer
D3D12Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D12Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D12Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
- D3D12TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
- Uint32 computeWriteOnlyStorageTextureSubresourceCount;
- D3D12Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
- Uint32 computeWriteOnlyStorageBufferCount;
+ D3D12TextureSubresource *computeReadWriteStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
+ Uint32 computeReadWriteStorageTextureSubresourceCount;
+ D3D12Buffer *computeReadWriteStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
+ Uint32 computeReadWriteStorageBufferCount;
D3D12UniformBuffer *computeUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
// Resource tracking
@@ -825,8 +825,8 @@ typedef struct D3D12ComputeRootSignature
Sint32 samplerTextureRootIndex;
Sint32 readOnlyStorageTextureRootIndex;
Sint32 readOnlyStorageBufferRootIndex;
- Sint32 writeOnlyStorageTextureRootIndex;
- Sint32 writeOnlyStorageBufferRootIndex;
+ Sint32 readWriteStorageTextureRootIndex;
+ Sint32 readWriteStorageBufferRootIndex;
Sint32 uniformBufferRootIndex[MAX_UNIFORM_BUFFERS_PER_STAGE];
} D3D12ComputeRootSignature;
@@ -838,8 +838,8 @@ struct D3D12ComputePipeline
Uint32 numSamplers;
Uint32 numReadOnlyStorageTextures;
Uint32 numReadOnlyStorageBuffers;
- Uint32 numWriteOnlyStorageTextures;
- Uint32 numWriteOnlyStorageBuffers;
+ Uint32 numReadWriteStorageTextures;
+ Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
SDL_AtomicInt referenceCount;
@@ -1521,13 +1521,17 @@ static void D3D12_INTERNAL_TextureSubresourceBarrier(
D3D12_RESOURCE_STATES destinationState,
D3D12TextureSubresource *textureSubresource)
{
+ bool needsUAVBarrier =
+ (textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
+ (textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
+
D3D12_INTERNAL_ResourceBarrier(
commandBuffer,
sourceState,
destinationState,
textureSubresource->parent->resource,
textureSubresource->index,
- textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE);
+ needsUAVBarrier);
}
static D3D12_RESOURCE_STATES D3D12_INTERNAL_DefaultTextureResourceState(
@@ -1547,6 +1551,8 @@ static D3D12_RESOURCE_STATES D3D12_INTERNAL_DefaultTextureResourceState(
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
} else if (usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+ } else if (usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
+ return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
} else {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Texture has no default usage mode!");
return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
@@ -2174,8 +2180,8 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
d3d12ComputeRootSignature->samplerTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageBufferRootIndex = -1;
- d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = -1;
- d3d12ComputeRootSignature->writeOnlyStorageBufferRootIndex = -1;
+ d3d12ComputeRootSignature->readWriteStorageTextureRootIndex = -1;
+ d3d12ComputeRootSignature->readWriteStorageBufferRootIndex = -1;
for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) {
d3d12ComputeRootSignature->uniformBufferRootIndex[i] = -1;
@@ -2251,9 +2257,9 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
parameterCount += 1;
}
- if (createInfo->num_writeonly_storage_textures) {
+ if (createInfo->num_readwrite_storage_textures) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
- descriptorRange.NumDescriptors = createInfo->num_writeonly_storage_textures;
+ descriptorRange.NumDescriptors = createInfo->num_readwrite_storage_textures;
descriptorRange.BaseShaderRegister = 0;
descriptorRange.RegisterSpace = 1;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
@@ -2264,15 +2270,15 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
- d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = parameterCount;
+ d3d12ComputeRootSignature->readWriteStorageTextureRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
- if (createInfo->num_writeonly_storage_buffers) {
+ if (createInfo->num_readwrite_storage_buffers) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
- descriptorRange.NumDescriptors = createInfo->num_writeonly_storage_buffers;
- descriptorRange.BaseShaderRegister = createInfo->num_writeonly_storage_textures;
+ descriptorRange.NumDescriptors = createInfo->num_readwrite_storage_buffers;
+ descriptorRange.BaseShaderRegister = createInfo->num_readwrite_storage_textures;
descriptorRange.RegisterSpace = 1;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
@@ -2282,7 +2288,7 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
- d3d12ComputeRootSignature->writeOnlyStorageBufferRootIndex = parameterCount;
+ d3d12ComputeRootSignature->readWriteStorageBufferRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
@@ -2409,8 +2415,8 @@ static SDL_GPUComputePipeline *D3D12_CreateComputePipeline(
computePipeline->numSamplers = createinfo->num_samplers;
computePipeline->numReadOnlyStorageTextures = createinfo->num_readonly_storage_textures;
computePipeline->numReadOnlyStorageBuffers = createinfo->num_readonly_storage_buffers;
- computePipeline->numWriteOnlyStorageTextures = createinfo->num_writeonly_storage_textures;
- computePipeline->numWriteOnlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
+ computePipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
+ computePipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
computePipeline->numUniformBuffers = createinfo->num_uniform_buffers;
SDL_SetAtomicInt(&computePipeline->referenceCount, 0);
@@ -2792,6 +2798,9 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
D3D12_RESOURCE_STATES initialState = (D3D12_RESOURCE_STATES)0;
D3D12_CLEAR_VALUE clearValue;
bool useClearValue = false;
+ bool needsUAV =
+ (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
+ (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
HRESULT res;
texture = (D3D12Texture *)SDL_calloc(1, sizeof(D3D12Texture));
@@ -2819,7 +2828,7 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
clearValue.DepthStencil.Stencil = (UINT8)SDL_GetNumberProperty(createinfo->props, SDL_PROP_GPU_CREATETEXTURE_D3D12_CLEAR_STENCIL_UINT8, 0);
}
- if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE
(Patch may be truncated, please check the link at the top of this post.)