SDL: Added support for custom shaders with the GPU renderer

From 2aee105b43f77ef0a84e7d9c5acab29a9ca0d7b5 Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Thu, 13 Mar 2025 16:41:58 -0700
Subject: [PATCH] Added support for custom shaders with the GPU renderer

Added an example of MSDF font rendering with the SDL 2D renderer
---
 include/SDL3/SDL_render.h           | 112 ++++++++
 src/dynapi/SDL_dynapi.sym           |   4 +
 src/dynapi/SDL_dynapi_overrides.h   |   4 +
 src/dynapi/SDL_dynapi_procs.h       |   4 +
 src/render/SDL_render.c             | 153 +++++++++++
 src/render/SDL_sysrender.h          |  32 +++
 src/render/gpu/SDL_pipeline_gpu.c   |  32 +--
 src/render/gpu/SDL_pipeline_gpu.h   |   1 +
 src/render/gpu/SDL_render_gpu.c     |  31 ++-
 src/render/gpu/SDL_shaders_gpu.c    |   6 +
 src/render/gpu/SDL_shaders_gpu.h    |   1 +
 test/CMakeLists.txt                 |   3 +-
 test/msdf_font.bmp                  | Bin 0 -> 196662 bytes
 test/msdf_font.csv                  |  95 +++++++
 test/testgpurender_msdf.c           | 321 ++++++++++++++++++++++
 test/testgpurender_msdf.frag.dxil.h | 399 ++++++++++++++++++++++++++++
 test/testgpurender_msdf.frag.hlsl   |  37 +++
 test/testgpurender_msdf.frag.msl.h  |  97 +++++++
 test/testgpurender_msdf.frag.spv.h  | 162 +++++++++++
 19 files changed, 1460 insertions(+), 34 deletions(-)
 create mode 100755 test/msdf_font.bmp
 create mode 100755 test/msdf_font.csv
 create mode 100644 test/testgpurender_msdf.c
 create mode 100644 test/testgpurender_msdf.frag.dxil.h
 create mode 100644 test/testgpurender_msdf.frag.hlsl
 create mode 100644 test/testgpurender_msdf.frag.msl.h
 create mode 100644 test/testgpurender_msdf.frag.spv.h

diff --git a/include/SDL3/SDL_render.h b/include/SDL3/SDL_render.h
index 432cacfc4db84..242556c127bdb 100644
--- a/include/SDL3/SDL_render.h
+++ b/include/SDL3/SDL_render.h
@@ -59,6 +59,7 @@
 #include <SDL3/SDL_rect.h>
 #include <SDL3/SDL_surface.h>
 #include <SDL3/SDL_video.h>
+#include <SDL3/SDL_gpu.h>
 
 #include <SDL3/SDL_begin_code.h>
 /* Set up for C function definitions, even when using C++ */
@@ -2709,6 +2710,117 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SetDefaultTextureScaleMode(SDL_Renderer *re
  */
 extern SDL_DECLSPEC bool SDLCALL SDL_GetDefaultTextureScaleMode(SDL_Renderer *renderer, SDL_ScaleMode *scale_mode);
 
+/**
+ * GPU render state description.
+ *
+ * This structure should be initialized using SDL_INIT_INTERFACE().
+ *
+ * \since This struct is available since SDL 3.4.0.
+ *
+ * \sa SDL_CreateGPURenderState
+ */
+typedef struct SDL_GPURenderStateDesc
+{
+    Uint32 version;                 /**< the version of this interface */
+
+    SDL_GPUShader *fragment_shader; /**< The fragment shader to use when this render state is active */
+
+    Sint32 num_sampler_bindings;    /**< The number of additional fragment samplers to bind when this render state is active */
+    const SDL_GPUTextureSamplerBinding *sampler_bindings;   /** Additional fragment samplers to bind when this render state is active */
+
+    Sint32 num_storage_textures;    /**< The number of storage textures to bind when this render state is active */
+    SDL_GPUTexture *const *storage_textures;    /** Storage textures to bind when this render state is active */
+
+    Sint32 num_storage_buffers;    /**< The number of storage buffers to bind when this render state is active */
+    SDL_GPUBuffer *const *storage_buffers;      /** Storage buffers to bind when this render state is active */
+} SDL_GPURenderStateDesc;
+
+/* Check the size of SDL_GPURenderStateDesc
+ *
+ * If this assert fails, either the compiler is padding to an unexpected size,
+ * or the interface has been updated and this should be updated to match and
+ * the code using this interface should be updated to handle the old version.
+ */
+SDL_COMPILE_TIME_ASSERT(SDL_GPURenderStateDesc_SIZE,
+    (sizeof(void *) == 4 && sizeof(SDL_GPURenderStateDesc) == 32) ||
+    (sizeof(void *) == 8 && sizeof(SDL_GPURenderStateDesc) == 64));
+
+/**
+ * A custom GPU render state.
+ *
+ * \since This struct is available since SDL 3.4.0.
+ *
+ * \sa SDL_CreateGPURenderState
+ * \sa SDL_SetGPURenderStateFragmentUniformData
+ * \sa SDL_SetRenderGPUState
+ * \sa SDL_DestroyGPURenderState
+ */
+typedef struct SDL_GPURenderState SDL_GPURenderState;
+
+/**
+ * Create custom GPU render state.
+ *
+ * \param renderer the renderer to use.
+ * \param desc GPU render state description, initialized using SDL_INIT_INTERFACE().
+ * \returns a custom GPU render state or NULL on failure; call SDL_GetError() for more information.
+ *
+ * \threadsafety This function should be called on the thread that created the renderer.
+ *
+ * \since This function is available since SDL 3.4.0.
+ *
+ * \sa SDL_SetGPURenderStateFragmentUniformData
+ * \sa SDL_SetRenderGPUState
+ * \sa SDL_DestroyGPURenderState
+ */
+extern SDL_DECLSPEC SDL_GPURenderState * SDLCALL SDL_CreateGPURenderState(SDL_Renderer *renderer, SDL_GPURenderStateDesc *desc);
+
+/**
+ * Set fragment shader uniform variables in a custom GPU render state.
+ *
+ * The data is copied and will be pushed using SDL_PushGPUFragmentUniformData() during draw call execution.
+ *
+ * \param state the state to modify.
+ * \param slot_index the fragment uniform slot to push data to.
+ * \param data client data to write.
+ * \param length the length of the data to write.
+ * \returns true on success or false on failure; call SDL_GetError() for more
+ *          information.
+ *
+ * \threadsafety This function should be called on the thread that created the renderer.
+ *
+ * \since This function is available since SDL 3.4.0.
+ */
+extern SDL_DECLSPEC bool SDLCALL SDL_SetGPURenderStateFragmentUniformData(SDL_GPURenderState *state, Uint32 slot_index, const void *data, Uint32 length);
+
+/**
+ * Set custom GPU render state.
+ *
+ * This function sets custom GPU render state for subsequent draw calls. This allows using custom shaders with the GPU renderer.
+ *
+ * \param renderer the renderer to use.
+ * \param state the state to to use, or NULL to clear custom GPU render state.
+ * \returns true on success or false on failure; call SDL_GetError() for more
+ *          information.
+ *
+ * \threadsafety This function should be called on the thread that created the renderer.
+ *
+ * \since This function is available since SDL 3.4.0.
+ */
+extern SDL_DECLSPEC bool SDLCALL SDL_SetRenderGPUState(SDL_Renderer *renderer, SDL_GPURenderState *state);
+
+/**
+ * Destroy custom GPU render state.
+ *
+ * \param state the state to destroy.
+ *
+ * \threadsafety This function should be called on the thread that created the renderer.
+ *
+ * \since This function is available since SDL 3.4.0.
+ *
+ * \sa SDL_CreateGPURenderState
+ */
+extern SDL_DECLSPEC void SDLCALL SDL_DestroyGPURenderState(SDL_GPURenderState *state);
+
 /* Ends C function definitions when using C++ */
 #ifdef __cplusplus
 }
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index ecf08a7d938b2..d40edd232005d 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -1238,6 +1238,10 @@ SDL3_0.0.0 {
     SDL_RenderTexture9GridTiled;
     SDL_SetDefaultTextureScaleMode;
     SDL_GetDefaultTextureScaleMode;
+    SDL_CreateGPURenderState;
+    SDL_SetGPURenderStateFragmentUniformData;
+    SDL_SetRenderGPUState;
+    SDL_DestroyGPURenderState;
     # extra symbols go here (don't modify this line)
   local: *;
 };
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 2b35173adcdd3..31f8229eaf10f 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -1263,3 +1263,7 @@
 #define SDL_RenderTexture9GridTiled SDL_RenderTexture9GridTiled_REAL
 #define SDL_SetDefaultTextureScaleMode SDL_SetDefaultTextureScaleMode_REAL
 #define SDL_GetDefaultTextureScaleMode SDL_GetDefaultTextureScaleMode_REAL
+#define SDL_CreateGPURenderState SDL_CreateGPURenderState_REAL
+#define SDL_SetGPURenderStateFragmentUniformData SDL_SetGPURenderStateFragmentUniformData_REAL
+#define SDL_SetRenderGPUState SDL_SetRenderGPUState_REAL
+#define SDL_DestroyGPURenderState SDL_DestroyGPURenderState_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 6ea49ff7c9d03..c87a014dfbcdd 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -1271,3 +1271,7 @@ SDL_DYNAPI_PROC(bool,SDL_SetRelativeMouseTransform,(SDL_MouseMotionTransformCall
 SDL_DYNAPI_PROC(bool,SDL_RenderTexture9GridTiled,(SDL_Renderer *a,SDL_Texture *b,const SDL_FRect *c,float d,float e,float f,float g,float h,const SDL_FRect *i,float j),(a,b,c,d,e,f,g,h,i,j),return)
 SDL_DYNAPI_PROC(bool,SDL_SetDefaultTextureScaleMode,(SDL_Renderer *a,SDL_ScaleMode b),(a,b),return)
 SDL_DYNAPI_PROC(bool,SDL_GetDefaultTextureScaleMode,(SDL_Renderer *a,SDL_ScaleMode *b),(a,b),return)
+SDL_DYNAPI_PROC(SDL_GPURenderState*,SDL_CreateGPURenderState,(SDL_Renderer *a,SDL_GPURenderStateDesc *b),(a,b),return)
+SDL_DYNAPI_PROC(bool,SDL_SetGPURenderStateFragmentUniformData,(SDL_GPURenderState *a,Uint32 b,const void *c,Uint32 d),(a,b,c,d),return)
+SDL_DYNAPI_PROC(bool,SDL_SetRenderGPUState,(SDL_Renderer *a,SDL_GPURenderState *b),(a,b),return)
+SDL_DYNAPI_PROC(void,SDL_DestroyGPURenderState,(SDL_GPURenderState *a),(a),)
diff --git a/src/render/SDL_render.c b/src/render/SDL_render.c
index a03796a4e6ff8..8622e3a1af9e3 100644
--- a/src/render/SDL_render.c
+++ b/src/render/SDL_render.c
@@ -346,6 +346,16 @@ static bool FlushRenderCommandsIfTextureNeeded(SDL_Texture *texture)
     return true;
 }
 
+static bool FlushRenderCommandsIfGPURenderStateNeeded(SDL_GPURenderState *state)
+{
+    SDL_Renderer *renderer = state->renderer;
+    if (state->last_command_generation == renderer->render_command_generation) {
+        // the current command queue depends on this state, flush the queue now before it changes
+        return FlushRenderCommands(renderer);
+    }
+    return true;
+}
+
 bool SDL_FlushRenderer(SDL_Renderer *renderer)
 {
     if (!FlushRenderCommands(renderer)) {
@@ -577,6 +587,10 @@ static SDL_RenderCommand *PrepQueueCmdDraw(SDL_Renderer *renderer, const SDL_Ren
                 cmd->data.draw.texture_scale_mode = texture->scaleMode;
             }
             cmd->data.draw.texture_address_mode = SDL_TEXTURE_ADDRESS_CLAMP;
+            cmd->data.draw.gpu_render_state = renderer->gpu_render_state;
+            if (renderer->gpu_render_state) {
+                renderer->gpu_render_state->last_command_generation = renderer->render_command_generation;
+            }
         }
     }
     return cmd;
@@ -5824,3 +5838,142 @@ bool SDL_GetDefaultTextureScaleMode(SDL_Renderer *renderer, SDL_ScaleMode *scale
     }
     return true;
 }
+
+SDL_GPURenderState *SDL_CreateGPURenderState(SDL_Renderer *renderer, SDL_GPURenderStateDesc *desc)
+{
+    CHECK_RENDERER_MAGIC(renderer, false);
+
+    if (!desc) {
+        SDL_InvalidParamError("desc");
+        return NULL;
+    }
+
+    if (desc->version < sizeof(*desc)) {
+        // Update this to handle older versions of this interface
+        SDL_SetError("Invalid desc, should be initialized with SDL_INIT_INTERFACE()");
+        return NULL;
+    }
+
+    if (!desc->fragment_shader) {
+        SDL_SetError("desc->fragment_shader is required");
+        return NULL;
+    }
+
+    SDL_GPUDevice *device = (SDL_GPUDevice *)SDL_GetPointerProperty(renderer->props, SDL_PROP_RENDERER_GPU_DEVICE_POINTER, NULL);
+    if (!device) {
+        SDL_SetError("Renderer isn't associated with a GPU device");
+        return NULL;
+    }
+
+    SDL_GPURenderState *state = (SDL_GPURenderState *)SDL_calloc(1, sizeof(*state));
+    if (!state) {
+        return NULL;
+    }
+
+    state->renderer = renderer;
+    state->fragment_shader = desc->fragment_shader;
+
+    if (desc->num_sampler_bindings > 0) {
+        state->sampler_bindings = (SDL_GPUTextureSamplerBinding *)SDL_calloc(desc->num_sampler_bindings, sizeof(*state->sampler_bindings));
+        if (!state->sampler_bindings) {
+            SDL_DestroyGPURenderState(state);
+            return NULL;
+        }
+        SDL_memcpy(state->sampler_bindings, desc->sampler_bindings, desc->num_sampler_bindings * sizeof(*state->sampler_bindings));
+        state->num_sampler_bindings = desc->num_sampler_bindings;
+    }
+
+    if (desc->num_storage_textures > 0) {
+        state->storage_textures = (SDL_GPUTexture **)SDL_calloc(desc->num_storage_textures, sizeof(*state->storage_textures));
+        if (!state->storage_textures) {
+            SDL_DestroyGPURenderState(state);
+            return NULL;
+        }
+        SDL_memcpy(state->storage_textures, desc->storage_textures, desc->num_storage_textures * sizeof(*state->storage_textures));
+        state->num_storage_textures = desc->num_storage_textures;
+    }
+
+    if (desc->num_storage_buffers > 0) {
+        state->storage_buffers = (SDL_GPUBuffer **)SDL_calloc(desc->num_storage_buffers, sizeof(*state->storage_buffers));
+        if (!state->storage_buffers) {
+            SDL_DestroyGPURenderState(state);
+            return NULL;
+        }
+        SDL_memcpy(state->storage_buffers, desc->storage_buffers, desc->num_storage_buffers * sizeof(*state->storage_buffers));
+        state->num_storage_buffers = desc->num_storage_buffers;
+    }
+
+    return state;
+}
+
+bool SDL_SetGPURenderStateFragmentUniformData(SDL_GPURenderState *state, Uint32 slot_index, const void *data, Uint32 length)
+{
+    if (!state) {
+        return SDL_InvalidParamError("state");
+    }
+
+    if (!FlushRenderCommandsIfGPURenderStateNeeded(state)) {
+        return false;
+    }
+
+    for (int i = 0; i < state->num_uniform_buffers; i++) {
+        SDL_GPURenderStateUniformBuffer *buffer = &state->uniform_buffers[i];
+        if (buffer->slot_index == slot_index) {
+            void *new_data = SDL_realloc(buffer->data, length);
+            if (!new_data) {
+                return false;
+            }
+            SDL_memcpy(new_data, data, length);
+            buffer->data = new_data;
+            buffer->length = length;
+            return true;
+        }
+    }
+
+    SDL_GPURenderStateUniformBuffer *buffers = (SDL_GPURenderStateUniformBuffer *)SDL_realloc(state->uniform_buffers, (state->num_uniform_buffers + 1) * sizeof(*state->uniform_buffers));
+    if (!buffers) {
+        return false;
+    }
+
+    SDL_GPURenderStateUniformBuffer *buffer = &buffers[state->num_uniform_buffers];
+    buffer->slot_index = slot_index;
+    buffer->length = length;
+    buffer->data = SDL_malloc(length);
+    if (!buffer->data) {
+        SDL_free(buffers);
+        return false;
+    }
+    SDL_memcpy(buffer->data, data, length);
+
+    state->uniform_buffers = buffers;
+    ++state->num_uniform_buffers;
+    return true;
+}
+
+bool SDL_SetRenderGPUState(SDL_Renderer *renderer, SDL_GPURenderState *state)
+{
+    CHECK_RENDERER_MAGIC(renderer, false);
+
+    renderer->gpu_render_state = state;
+    return true;
+}
+
+void SDL_DestroyGPURenderState(SDL_GPURenderState *state)
+{
+    if (!state) {
+        return;
+    }
+
+    FlushRenderCommandsIfGPURenderStateNeeded(state);
+
+    if (state->num_uniform_buffers > 0) {
+        for (int i = 0; i < state->num_uniform_buffers; i++) {
+            SDL_free(state->uniform_buffers[i].data);
+        }
+        SDL_free(state->uniform_buffers);
+    }
+    SDL_free(state->sampler_bindings);
+    SDL_free(state->storage_textures);
+    SDL_free(state->storage_buffers);
+    SDL_free(state);
+}
diff --git a/src/render/SDL_sysrender.h b/src/render/SDL_sysrender.h
index 58692388ffd06..2e6cc76cde051 100644
--- a/src/render/SDL_sysrender.h
+++ b/src/render/SDL_sysrender.h
@@ -118,6 +118,36 @@ struct SDL_Texture
     SDL_Texture *next;
 };
 
+// Define the GPU render state structure
+typedef struct SDL_GPURenderStateUniformBuffer
+{
+    Uint32 slot_index;
+    void *data;
+    Uint32 length;
+} SDL_GPURenderStateUniformBuffer;
+
+// Define the GPU render state structure
+struct SDL_GPURenderState
+{
+    SDL_Renderer *renderer;
+
+    Uint32 last_command_generation; // last command queue generation this state was in.
+
+    SDL_GPUShader *fragment_shader;
+
+    int num_sampler_bindings;
+    SDL_GPUTextureSamplerBinding *sampler_bindings;
+
+    int num_storage_textures;
+    SDL_GPUTexture **storage_textures;
+
+    int num_storage_buffers;
+    SDL_GPUBuffer **storage_buffers;
+
+    int num_uniform_buffers;
+    SDL_GPURenderStateUniformBuffer *uniform_buffers;
+};
+
 typedef enum
 {
     SDL_RENDERCMD_NO_OP,
@@ -158,6 +188,7 @@ typedef struct SDL_RenderCommand
             SDL_Texture *texture;
             SDL_ScaleMode texture_scale_mode;
             SDL_TextureAddressMode texture_address_mode;
+            SDL_GPURenderState *gpu_render_state;
         } draw;
         struct
         {
@@ -282,6 +313,7 @@ struct SDL_Renderer
     SDL_FColor color;        /**< Color for drawing operations values */
     SDL_BlendMode blendMode; /**< The drawing blend mode */
     SDL_TextureAddressMode texture_address_mode;
+    SDL_GPURenderState *gpu_render_state;
 
     SDL_RenderCommand *render_commands;
     SDL_RenderCommand *render_commands_tail;
diff --git a/src/render/gpu/SDL_pipeline_gpu.c b/src/render/gpu/SDL_pipeline_gpu.c
index 078337401b338..31da279777f85 100644
--- a/src/render/gpu/SDL_pipeline_gpu.c
+++ b/src/render/gpu/SDL_pipeline_gpu.c
@@ -27,40 +27,10 @@
 
 #include "../SDL_sysrender.h"
 
-struct GPU_PipelineCacheKeyStruct
-{
-    Uint64 blend_mode : 28;
-    Uint64 frag_shader : 4;
-    Uint64 vert_shader : 4;
-    Uint64 attachment_format : 6;
-    Uint64 primitive_type : 3;
-};
-
-typedef union GPU_PipelineCacheKeyConverter
-{
-    struct GPU_PipelineCacheKeyStruct as_struct;
-    Uint64 as_uint64;
-} GPU_PipelineCacheKeyConverter;
-
-SDL_COMPILE_TIME_ASSERT(GPU_PipelineCacheKeyConverter_Size, sizeof(GPU_PipelineCacheKeyConverter) <= sizeof(Uint64));
-
 static Uint32 SDLCALL HashPipelineCacheKey(void *userdata, const void *key)
 {
     const GPU_PipelineParameters *params = (const GPU_PipelineParameters *) key;
-    GPU_PipelineCacheKeyConverter cvt;
-    cvt.as_uint64 = 0;
-    cvt.as_struct.blend_mode = params->blend_mode;
-    cvt.as_struct.frag_shader = params->frag_shader;
-    cvt.as_struct.vert_shader = params->vert_shader;
-    cvt.as_struct.attachment_format = params->attachment_format;
-    cvt.as_struct.primitive_type = params->primitive_type;
-
-    // 64-bit uint hash function stolen from taisei (which stole it from somewhere else)
-    Uint64 x = cvt.as_uint64;
-    x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
-    x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
-    x = x ^ (x >> 31);
-    return (Uint32)(x & 0xffffffff);
+    return SDL_murmur3_32(params, sizeof(*params), 0);
 }
 
 static bool SDLCALL MatchPipelineCacheKey(void *userdata, const void *a, const void *b)
diff --git a/src/render/gpu/SDL_pipeline_gpu.h b/src/render/gpu/SDL_pipeline_gpu.h
index c3fc39bc692d4..96e7bd9433e17 100644
--- a/src/render/gpu/SDL_pipeline_gpu.h
+++ b/src/render/gpu/SDL_pipeline_gpu.h
@@ -33,6 +33,7 @@ typedef struct GPU_PipelineParameters
     GPU_VertexShaderID vert_shader;
     SDL_GPUTextureFormat attachment_format;
     SDL_GPUPrimitiveType primitive_type;
+    SDL_GPUShader *custom_frag_shader;
 } GPU_PipelineParameters;
 
 typedef struct GPU_PipelineCache
diff --git a/src/render/gpu/SDL_render_gpu.c b/src/render/gpu/SDL_render_gpu.c
index d7604f9290345..da28b7e55bd2f 100644
--- a/src/render/gpu/SDL_render_gpu.c
+++ b/src/render/gpu/SDL_render_gpu.c
@@ -541,6 +541,8 @@ static void Draw(
     }
 
     SDL_GPURenderPass *pass = data->state.render_pass;
+    SDL_GPURenderState *custom_state = cmd->data.draw.gpu_render_state;
+    SDL_GPUShader *custom_frag_shader = custom_state ? custom_state->fragment_shader : NULL;
     GPU_VertexShaderID v_shader;
     GPU_FragmentShaderID f_shader;
 
@@ -570,12 +572,18 @@ static void Draw(
         f_shader = FRAG_SHADER_COLOR;
     }
 
+    if (custom_frag_shader) {
+        f_shader = FRAG_SHADER_TEXTURE_CUSTOM;
+        data->shaders.frag_shaders[FRAG_SHADER_TEXTURE_CUSTOM] = custom_frag_shader;
+    }
+
     GPU_PipelineParameters pipe_params;
     SDL_zero(pipe_params);
     pipe_params.blend_mode = cmd->data.draw.blend;
     pipe_params.vert_shader = v_shader;
     pipe_params.frag_shader = f_shader;
     pipe_params.primitive_type = prim;
+    pipe_params.custom_frag_shader = custom_frag_shader;
 
     if (data->state.render_target) {
         pipe_params.attachment_format = ((GPU_TextureData *)data->state.render_target->internal)->format;
@@ -590,15 +598,34 @@ static void Draw(
 
     SDL_BindGPUGraphicsPipeline(pass, pipe);
 
+    Uint32 sampler_slot = 0;
     if (cmd->data.draw.texture) {
         GPU_TextureData *tdata = (GPU_TextureData *)cmd->data.draw.texture->internal;
         SDL_GPUTextureSamplerBinding sampler_bind;
         SDL_zero(sampler_bind);
         sampler_bind.sampler = *SamplerPointer(data, cmd->data.draw.texture_address_mode, cmd->data.draw.texture_scale_mode);
         sampler_bind.texture = tdata->texture;
-        SDL_BindGPUFragmentSamplers(pass, 0, &sampler_bind, 1);
+        SDL_BindGPUFragmentSamplers(pass, sampler_slot++, &sampler_bind, 1);
+    }
+    if (custom_state) {
+        if (custom_state->num_sampler_bindings > 0) {
+            SDL_BindGPUFragmentSamplers(pass, sampler_slot, custom_state->sampler_bindings, custom_state->num_sampler_bindings);
+        }
+        if (custom_state->num_storage_textures > 0) {
+            SDL_BindGPUFragmentStorageTextures(pass, 0, custom_state->storage_textures, custom_state->num_storage_textures);
+        }
+        if (custom_state->num_storage_buffers > 0) {
+            SDL_BindGPUFragmentStorageBuffers(pass, 0, custom_state->storage_buffers, custom_state->num_storage_buffers);
+        }
+        if (custom_state->num_uniform_buffers > 0) {
+            for (int i = 0; i < custom_state->num_uniform_buffers; i++) {
+                SDL_GPURenderStateUniformBuffer *ub = &custom_state->uniform_buffers[i];
+                SDL_PushGPUFragmentUniformData(data->state.command_buffer, ub->slot_index, ub->data, ub->length);
+            }
+        }
+    } else {
+        PushFragmentUniforms(data, cmd);
     }
-    PushFragmentUniforms(data, cmd);
 
     SDL_GPUBufferBinding buffer_bind;
     SDL_zero(buffer_bind);
diff --git a/src/render/gpu/SDL_shaders_gpu.c b/src/render/gpu/SDL_shaders_gpu.c
index 04e06f69db584..f09a40e46878d 100644
--- a/src/render/gpu/SDL_shaders_gpu.c
+++ b/src/render/gpu/SDL_shaders_gpu.c
@@ -196,6 +196,9 @@ bool GPU_InitShaders(GPU_Shaders *shaders, SDL_GPUDevice *device)
     }
 
     for (int i = 0; i < SDL_arraysize(frag_shader_sources); ++i) {
+        if (i == FRAG_SHADER_TEXTURE_CUSTOM) {
+            continue;
+        }
         shaders->frag_shaders[i] = CompileShader(
             &frag_shader_sources[i], device, SDL_GPU_SHADERSTAGE_FRAGMENT);
         if (shaders->frag_shaders[i] == NULL) {
@@ -215,6 +218,9 @@ void GPU_ReleaseShaders(GPU_Shaders *shaders, SDL_GPUDevice *device)
     }
 
     for (int i = 0; i < SDL_arraysize(shaders->frag_shaders); ++i) {
+        if (i == FRAG_SHADER_TEXTURE_CUSTOM) {
+            continue;
+        }
         SDL_ReleaseGPUShader(device, shaders->frag_shaders[i]);
         shaders->frag_shaders[i] = NULL;
     }
diff --git a/src/render/gpu/SDL_shaders_gpu.h b/src/render/gpu/SDL_shaders_gpu.h
index d3190fa227846..9dc03b6efa700 100644
--- a/src/render/gpu/SDL_shaders_gpu.h
+++ b/src/render/gpu/SDL_shaders_gpu.h
@@ -44,6 +44,7 @@ typedef enum
     FRAG_SHADER_TEXTURE_RGBA,
     FRAG_SHADER_TEXTURE_RGB_PIXELART,
     FRAG_SHADER_TEXTURE_RGBA_PIXELART,
+    FRAG_SHADER_TEXTURE_CUSTOM,
 
     NUM_FRAG_SHADERS,
 } GPU_FragmentShaderID;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3be04e215e5ee..efbea0c485a2c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -39,7 +39,7 @@ add_library(sdltests_utils OBJECT
 )
 target_link_libraries(sdltests_utils PRIVATE SDL3::Headers)
 
-file(GLOB RESOURCE_FILES *.bmp *.wav *.hex moose.dat utf8.txt)
+file(GLOB RESOURCE_FILES *.bmp *.wav *.csv *.hex moose.dat utf8.txt)
 
 option(SDLTEST_TRACKMEM "Run tests with --trackmem" OFF)
 
@@ -348,6 +348,7 @@ add_sdl_test_executable(testgl SOURCES testgl.c)
 add_sdl_test_executable(testgles SOURCES testgles.c)
 add_sdl_test_executable(testgpu_simple_clear SOURCES testgpu_simple_clear.c)
 add_sdl_test_executable(testgpu_spinning_cube SOURCES testgpu_spinning_cube.c)
+add_sdl_test_executable(testgpurender_msdf MAIN_CALLBACKS NEEDS_RESOURCES TESTUTILS SOURCES testgpurender_msdf.c)
 if(ANDROID)
     target_link_libraries(testgles PRIVATE GLESv1_CM)
 elseif(IOS OR TVOS)
diff --git a/test/msdf_font.bmp b/test/msdf_font.bmp
new file mode 100755
index 0000000000000000000000000000000000000000..43d7b72709cb41d0a1d487a2aa966dd0e4c8598e
GIT binary patch
literal 196662
zcmeF437}QO|G-C<kR@x8%D%UuykA+alt@?06%pzcl|qt23z0QzmMkfRpS7J9+r=xC
zp479IrC#WlkT#^n{6F70XU>^(@45HA<mdPM`8Q4X&6zXvo$t)~o;fq$Z5k&Vs4bO*
z2K+x%5?grxw@_8(im;{Ht&%eTz3a`tn@3>t2ox$+s+8WySgI-%R`6c7TtnDRsU;?o
z$Qv3IR;ciYP*C8$Icy$*O&WoXr~k^j^2wzq7ZzTn)boDR+bNY5yKT4K5)(<p+?EPn
zcpgxtC!ee<SKg?%kXL3i{o66z{P&NJz{b#jbzQx+ZhfP^aZ2fw!ou&(H^a7jn^Mu(
z?Y3@9Zo7@xty}blU(Qxk8c?NErsx}Q)U8|V>eb8n7C7{Om{KDQGBcEF8^A3i(ORh@
zQ|Sq%>XuP<^X<Rw2$YZh57&oZpf9*r-#cAT|4si^S_;uFRcZ>u(lHpCDB*U~-InMj
zOP1(v-J&;Su_d9S`BiEvpa7_U`%O=uuJ659UvPmw{P65L895>S_f=i`cIn%9Uf;gI
z_w75R@1A@1RmzQ~9XF$I)v8jk@A|%dKkM7KYgg_`Xs^`#ja&y(o?4s3|LzFnrT?w;
zRt37Chwky1er&FutGzI9Ug<N>6c+YT>SX%eQK<z%_Z=F|P5)AwDpWLqrBt71N}u^m
zf9AuVJ6AvUnC{U-7Zm8NwsJs5wkaX~|Dx*rQfJgJqkj2C|B_Dsa{B4NDCI`eiktCE
z>(;*%6-n`5MvVC7$RoL@RQ>wx>(`%OzrNm>Ncr~lscV|p{PRCJ0y*e^7rjeU-SirL
z&5QcQg?gb-CiC;V@9GC0(49M%)~iQ7FLX4-43+x9(rXU-mr@j25YoS_<Li~y>#RH9
zukU|Hzq3ZKk>(aI)GxlMuenAyZK`+K#X=2Vfsp>qy-V~Z3-kiLdbJ)jNbj&iP(Bka
zrK?ueLx$+}>$U#qBl_Z%RO+Sn?O%Fn{!1_EjfwN67IkeaoB#f=jX)Ot*VHxJ=r%X&
zn_tnd{HT8vQe+V`X_BnZ+qBX9?hC<$v}fmkOGUQ(T4m9{f6FVmAWHup4~nMQU)-Q?
zfCv}s#nSYTKk8Rr(Kp{LP1mgH)%AWu9LzQI_Otp~sq*s6`lOSBk`^KU`gv?Jii)J5
ze&i8-*kN9-Qoprt|J(feEbcZUQl47Wbz0e+{GS>DhyIV$M|RX5@6-3atKVI#*9wJv
z@`-->Y2CZGP}I&l2SgRpUM~7q8T22Hi3ems9Q}I~x})Cl1bxEg`tnhFREaJTG}f+_
zPtSe#>5d)skw<zELL4kwy>zc{^|yNIQr)+&u28|t4jVSm!^wZn9NnRVtxBnW?c4X8
z->;wEm`HhQQP*i@bMk*`1PawQrQ7^N|Kn19=@a^ikMu`EZ@>O3TT$q(uu#{j6A(#|
z2x%`5{i|UA8yOG}$b#JTZ|7;n`^cvI9(~VS`mI%Zl_187<%uWsrI+e|{G)W6ZFqMj
zShO1HMq~6CDfYq(`q*Q=Y_CXAE}Z;_4bul5WUI>k&D&pRtp1-QcWYMItY2wA$fi`o
zg3)^MVp;#*aD#5uO4q0nkVi%$7yVN#2;HE66S_G9umH}2a?rm=*n8`}Tj&-8^}w-u
ztdAtzqA_Di`}Heq)=cdd(m%WPtMpY%^-{fHfoz}6od6%M_S#F|e!JeVLF?jTecpKi
zG9+wZQ@=u~E9|9eV!5Pl;WE=jN-COAL#am$B0%0E0J}Ny-!THW78br*TKW@&-{+%`
zP8>CA+siLkC!C;m*daf9A^l&eI$qgv!If8PC#oy2BzC*<$|YCw0!YHkam!GpW5+9%
zA{sds5c7s@)wT^)!yc7-jA%S!_BFFXxb&ye!dDB`ts(vMz9e*ly@R~Sk3a6MlpLtC
zhnO}^ZVH!mzx`Y?Zmg8V#@9A(ytZ-U{^o+Jmo{l~X%U~Bjf_;GBGsEK)x7yN&71$+
zy!mtWo7Y!zZKH$P=H!291lkl94k=|fxNhB&B^@SAs5M}KYT8uoymPtHS*1Fz>bPLl
z0=<eAN*GtITGegUswF0pAh>r)FIcduW5-oW5z7~L*+rdts=DsFLtcLQoF9Hzw~jq*
zY2lDU)h0s!+w1N5QY_cY^~WE};?02_PX04y=yvT~;wSwUaocU*y5*L)-qJU|b>r4=
zZ7o^qhbB#aC@T6v|FCh9DpaKUOr>U?bI#0}AJ3fm)x|R}R&s5lgWKlhe`*8@t5z*#
zO?=yJ`t7&bX8!b(MiR^Nr$r0B#~v9p;$GS>z<kwle#iOq7tEipqfyPD-)+f!{w<l`
zZT@_&GVU*!-?1aoBciA=0M3OTNwZ}=;|zV%P5RYW^~#lOg~1ejTemK)T9vAqHBZr}
zOx06ml4pg#|Ng<ihLiu@cgIVaQNXz89_rnt@7hW4BnYVmO`0qyDq=acagi!iq`GvZ
zVMF1bX+2yIKS&=G_A|h2bK*ZY0zU0ksiIr9)VJQM-+U7W;LktnbXsT(n%QTc2r*h}
zgJ2EqwO8%icWmE&LHqXFiDas7?Yk{$&%Y(@yLFRNM7IbvI<n&w>8X!{1s%4>2IR~$
zWy#K@yk-rINw5P)Fa_U`3maAt5IgIg2kXJew)Fe&>$A`HhIX&Ll)v(S=N*BW-4;1~
zAs&7hLVZv_SXEaw^xvULlMWq<;1g|RERu>!8U7L)9R}=7J@ZU`W`6odL3gE6z5jFc
z@c*sl2<`2;r_8js+#=M!W(^V|m_SUH=bXblnn5y_{$;#68gW)oz#qb3|EtQ{07r1}
zDD%Jr^||Njp+kl3`TOr{)(A%61$N+QiPz!~WY`4z<+M&i$@-2vR8`UcZqr7|%AYRr
z8tlEdEf^A`<j_f&OKctV<NN9T4E>vZEW6qG3c2&`2H3ARa3He^as@;im2@Q^nv7G@
zjWuc{FH0s@CX>CBUcNt)oB#a#Mj(dvcHdnmlOl3@;|*biYCUU~EG3u$t+0rvYH3d}
z>*&ybh^f%O77d^X{d3==i3K48D1rl;KQQ?pd{9_4h7k)E-~vYAWh;9Li$g}XR(t3@
zZq>K0)9d(fsAO{Qy%q0)uzgrLvDk}32``U0LO=gJjXtNJL*MDoq<?lIUws8+_-x1O
zV;%Zurk~}-!w;Wz-F36(%#zT(cXy?>4~BA!&5Wa9EdK07g!Wv**ljo210eH#{dLWH
zNNcom9(`1I>Ea0lEbR$C9pmZW=)Z~V*q#3=?h)LAEc&loSD=p=p-V*OMVqut`(Obs
zwwjx;IJkK>jR+f6JrsP@4L77xJ58UiM6k;z#QA>MvOY%om}B(Bi8MVyPdHj1{b$nu
zsi(@PRO@+q9uh`@{Iv|6S?RFe(Y0#}YP=#acn7Q;P_?(USyHOCYCT8|x>jA=QnmEz
zZ2tWdM<6Htdjz-ZuKM)T_29w!wbw+oh9@{rj~b;fxumo~1FCwVqoseF!C$NY;L$%1
zJQBEG;Lv}A2Bnu=qMv+HplfM|)@YNKY2QO6?l&L(SJTzl#sY2cNt5olqw(**Yx$h<
z6^R4nXC8mNG_3Wj`qks~aepTLBSRcL8pwp(^lj`=)KlJgF#oUQvjg!jS|oRe>0z~3
z)($oRC##bus0r)U`n789cs0JUuw^%gKXU}ipns2AP(M5EG~o)tfq|2$;f9nJK38ZT
zQ%6J11}lfWJpG@5{)0O{_6sciU!ee13V%flY+9flT8mS83t)JOeDn|Xo~_S*U%#LF
z{`=|EuaeIR-;%tqlRo7XZ<d~>r=j}?gn>#8Y|><45zF_Dj8vgw@UFqaLT}Inq<Hl|
zuCKay!_a&G{a0Rpy>OQZQ}mQG^cn63xRdI%P%YF-6G?FCkKX1#f6@q)i~ham2z!w4
zI8STp)Y6rug}Tt6A!YqzLX<pn(Lbg9(9!(W3Qz!?I#poPLb>h0%AkMtuqc#5S2FqY
zUhz@|1R0xvuk^tOl>b0~AatnJdZpGkX|lekXua&5Hzrbria{$VBcYWlc%aWza`v#Z
z)gjXNU3cx>r_V}stOzLC_SU^C>WVU|mH`C8m#Wgu^lu2|-}18&^zS`QwBVXGD;+eb
za7v+Q3It)bQqkCLb+;v}`L|?sw{FopQZNiF6rctTDs9%x)vzmrR1Gec{+B70Bc8wP
z{12CXxNO;qWrGGS+j-|4rKENVv0b~8l26%u@Y}AW9a*!Ln%$(y?4qLCv-QSBs!&m>
zAwj(4k|9IBNAG&bkaw=KT-o!_?>&9GFBo}Z{u4*dKT6~wK@|InlUersqJCMeR*zHT
zHm`p|viXxz8%_Tn<?Xm*;VGx6Yb_G!wq(U_eY*AOvqWOIZdo^_xF4=nr<_u_<Bkw>
zd1&4(W9k1grE<iBFFW|M%O1JxvdNd7a!QUOe$|vxsZ?PqwJ?>^=C@F#WW(9ANt2c>
zi}*5ZWGs@3O2N<fW2N?$D$T1@>6c2CZmCqMDw&8rdbe-C(nv)<Y2W@Lk&qa9NbqY0
zD5;XysB5lPSD&s<w?nr1@1HgTe~|t?=~o$ALg>9MDcb{t0NErg${LH#N)0e%SXMy=
z*YP{^8uClp3!AI~=fgQosrQV<C<#Bv?$@+8T&crt=H|bD?Fg_FTWlV}1f`D7+jMQE
zMw^n9m;IYN7w1Yl0C!UAcBR%Rb+wlXFXQd<0dav#UC4Qzsrjf<)tweHlT4WJn{xb;
zjt{UJA2fvk3E-Teg;MXDa(CcFDQ78j<6xy8H$|r@6;gX<opwrnW=gr;%ax`i<q5e8
zY)t;fl$8d@{!KjuB$P@dx+D^d6X;DQCLEnOT1i%XtevQxNQ^cmDc^-skzs_d-ng*{
z$u7H0Uw*mn-d%Le{_zi8sgfcj)WL~^6Nx7ii8YDD)zl?4_UIq=02q&L+v<xhlDfTm
z$?t_1itYuLxI66>X+`QK5(5&6bq0|~6N&286Y=;`?({_BeN&F#({uH!YNG0crVt<j
zY=AXVDl;~^tWl#SjW982G_g^mh7H}@f|PUJsL|<-8cjDv7dL9u!?_cdl3uh+BtA0@
zGxlXhN>ZMNGKRxkq9T5hKIwdYK1QGz0b#Fwo<5Hp@?*=#!BC<Q3kQrRh_WkBf!|J@
zgkxO0_Smhq`dfO)NGp}@(j}c<jCR3<^n|0+M=QySkG0daN2f<qQp!`yeH4rvTRWKN
zUw$bTi_4em@4ge;u?Z7k9IA#5d2T1BPkb%?S~|TZef8DUB{Z&A@9m2(7SS>mj%+)>
z`NpriY?=Q0YuRv)8z)Rl!~l*JJje|gkWQ~Nh&=jex_b2-_^R~jr>E2Jn{w};o<2QS
zzpAFIKA3)xUI7xwrt_~T6^VQ7@ygwIgQbb{UwNgl(7i34ajq|a<?>fvS?ZU*^Uhax
z-PO727o?ST?b7Mb40fX2QC6fR6=+CD;%<63*u0nsy{2DdBmKSp9^F?gAP}^XGgJ>f
zN1szgSIMXqg`va_h{*>fCG@Pm_(GIjd4fDg>?~%^)XzLK_L^(z);&q7iqg}+QIK&!
z<ciIUEA-499e==U>)NBW5#msuTETOGKA(LSeIeoEY5o28-+%GN7hit<CC|dU){i}g
z>T)~P?qmJ26ubIr>MGT>apO^Mzx}1DBtI+k3Z$Q(>(9T`Uy9;{7buVNB+YHNo!27G
z4j6#Gwz-W!x_WMZK_G?-oYqJIPS>Yr&_HmBsv8P#6ayxZgKoKr1`YK1@g73v7bQIB
z!z8`2_4d*GJfMA6zZ4xiR>aETeZQbm2exZ>=VzaZV12`eQ5!Zi+^|73G;+qzKHJc)
z-3F&203ECk##|F=g~7@2d6{+)r15Kbt>k0Mdab?|OS%9<5)pikJ5Jwqm#B-|fa)O6
z;%6<md136>AL`bfsMIym)4x%Wao9xqXX>z$orMb*4j(?eXV0GJpMU-Z?Ju~j{bfjC
z-_&pNQ0qPZ_~j>FzO3FdhyG=`ggy>4)p$Lgl5oE}=}zc~V)NKtcfU#Bge8mU@DTX2
zSfx!+{_E+VrJN{bEYr)D-MfsxP&I2aOVvUV_JUOh;$uX4LsqTYuxf+G%J{6a?9HH*
z)M$6|$)DliLhFIwe?M^W;DH0H@u@U1R@nC0z;^8rs0ASK6);E-V$PTG;fo1M2o70x
zvS{HK@i|{l)Y*+>0dEEe>H~wiAwKj1RjU;%D7IR!MgYfyz=n=RRA1efM}Xg(B3(3g
z?2jm?n6?G;zfF)XZ6f`%N;h-tiWMtvzWL@_wQ9M8Q$<y2rCXu*zH;TtapP{&<8+k%
z-yQn)P=sHogzc(#4W5pd*g@}bsy-EyWO=>><|w1O1Im9r{bLi40Qsk$KlQru%APA$
z^!((Lp68v{GlL)ufppPbkYxS(>y_7BbLFxtmx)N*eD6Y%v$bUVi}#iH-xulbtXb;9
z3*DhJrQAw(18nmy))&vwb7W+g@KIGi;e_B5DMk2Po_L}xN{wE{r~1>Lx@QGl!5dp2
z2Ga2Bt`h;h*5BxF?$mc8_1<0Y9y|zyalAFnb@RSs`_`@doKjyzG5nvul5yBX`e&E)
zy{vPJii%D<Esocr70w*+<RDLq5UzjjY}~kQ^|z}BqaB+-Tg;#0*h-iw7nJ{E`e$Fr
zI*Tw*%{%Yh^VBDwuu@Tj2dnM3k5GVCI8C};-%iN|3sk30?LSk-qZeLQqMlnAc7o79
zW(hakpq4I`S77vLF};ffpH9uY5TSpZl0C0QRaF*D*mBmY<rWRo=+$wTGAzXO*yHu_
z5&B27kZlJr^ah-M)YWxk9|4(LjNPJc-D6m?MzH(ezmS32MEYlPGdj`}CQLZ$sH0+r
z#5@dqTd~ezs2%#RW*I={@rKVVa>?X_*@#qRYsP{}8u+W}AJ@F7&Jofoee&@qt5;qT
za((`JRj*#8zgqDjV=YI?H{XP

(Patch may be truncated, please check the link at the top of this post.)