SDL: render: Replaced SDL_RenderFlush with SDL_FlushRenderer.

From dfee3f9e9209081b51f16436cd031f17d05ccee9 Mon Sep 17 00:00:00 2001
From: "Ryan C. Gordon" <[EMAIL REDACTED]>
Date: Sat, 25 Nov 2023 22:41:23 -0500
Subject: [PATCH] render: Replaced SDL_RenderFlush with SDL_FlushRenderer.

This uses the same `SDL_VerbNoun` format as the rest of SDL3, and also
adds stronger effort to invalidate cached state in the backend, so cooperation
improves with apps that are using lowlevel rendering APIs directly.

Fixes #367.
---
 build-scripts/SDL_migration.cocci        |  7 ++++-
 docs/README-migration.md                 |  3 ++-
 include/SDL3/SDL_oldnames.h              |  2 ++
 include/SDL3/SDL_render.h                | 19 +++++++++----
 src/dynapi/SDL_dynapi.sym                |  2 +-
 src/dynapi/SDL_dynapi_overrides.h        |  2 +-
 src/dynapi/SDL_dynapi_procs.h            |  2 +-
 src/render/SDL_render.c                  |  8 ++++--
 src/render/SDL_sysrender.h               |  1 +
 src/render/direct3d/SDL_render_d3d.c     | 12 +++++++++
 src/render/direct3d11/SDL_render_d3d11.c | 14 ++++++++++
 src/render/direct3d12/SDL_render_d3d12.c | 11 ++++++++
 src/render/metal/SDL_render_metal.m      |  6 +++++
 src/render/opengl/SDL_render_gl.c        | 34 +++++++++++++++++++++---
 src/render/opengles2/SDL_render_gles2.c  | 24 +++++++++++++++--
 src/render/ps2/SDL_render_ps2.c          |  6 +++++
 src/render/psp/SDL_render_psp.c          |  6 +++++
 src/render/software/SDL_render_sw.c      |  7 +++++
 src/render/vitagxm/SDL_render_vita_gxm.c |  8 ++++++
 19 files changed, 156 insertions(+), 18 deletions(-)

diff --git a/build-scripts/SDL_migration.cocci b/build-scripts/SDL_migration.cocci
index e4ac8a989973..50e17402d077 100644
--- a/build-scripts/SDL_migration.cocci
+++ b/build-scripts/SDL_migration.cocci
@@ -2749,4 +2749,9 @@ expression e, n, v;
 expression w, i, s;
 @@
 - SDL_Vulkan_CreateSurface(w, i, s)
-+ SDL_Vulkan_CreateSurface(w, i, NULL, s)
\ No newline at end of file
++ SDL_Vulkan_CreateSurface(w, i, NULL, s)
+@@
+@@
+- SDL_RenderFlush
++ SDL_FlushRenderer
+  (...)
diff --git a/docs/README-migration.md b/docs/README-migration.md
index a7a97015efa9..60de306e73c1 100644
--- a/docs/README-migration.md
+++ b/docs/README-migration.md
@@ -816,7 +816,7 @@ The 2D renderer API always uses batching in SDL3. There is no magic to turn
 it on and off; it doesn't matter if you select a specific renderer or try to
 use any hint. This means that all apps that use SDL3's 2D renderer and also
 want to call directly into the platform's lower-layer graphics API _must_ call
-SDL_RenderFlush() before doing so. This will make sure any pending rendering
+SDL_FlushRenderer() before doing so. This will make sure any pending rendering
 work from SDL is done before the app starts directly drawing.
 
 SDL_GetRenderDriverInfo() has been removed, since most of the information it reported were
@@ -868,6 +868,7 @@ The following functions have been renamed:
 * SDL_RenderDrawRectsF() => SDL_RenderRects()
 * SDL_RenderFillRectF() => SDL_RenderFillRect()
 * SDL_RenderFillRectsF() => SDL_RenderFillRects()
+* SDL_RenderFlush() => SDL_FlushRenderer()
 * SDL_RenderGetClipRect() => SDL_GetRenderClipRect()
 * SDL_RenderGetIntegerScale() => SDL_GetRenderIntegerScale()
 * SDL_RenderGetLogicalSize() => SDL_GetRenderLogicalPresentation()
diff --git a/include/SDL3/SDL_oldnames.h b/include/SDL3/SDL_oldnames.h
index 39b1a61598cd..bfe64d4f5d8a 100644
--- a/include/SDL3/SDL_oldnames.h
+++ b/include/SDL3/SDL_oldnames.h
@@ -394,6 +394,7 @@
 #define SDL_RenderDrawRectsF SDL_RenderRects
 #define SDL_RenderFillRectF SDL_RenderFillRect
 #define SDL_RenderFillRectsF SDL_RenderFillRects
+#define SDL_RenderFlush SDL_FlushRenderer
 #define SDL_RenderGetClipRect SDL_GetRenderClipRect
 #define SDL_RenderGetLogicalSize SDL_GetRenderLogicalPresentation
 #define SDL_RenderGetMetalCommandEncoder SDL_GetRenderMetalCommandEncoder
@@ -838,6 +839,7 @@
 #define SDL_RenderDrawRectsF SDL_RenderDrawRectsF_renamed_SDL_RenderRects
 #define SDL_RenderFillRectF SDL_RenderFillRectF_renamed_SDL_RenderFillRect
 #define SDL_RenderFillRectsF SDL_RenderFillRectsF_renamed_SDL_RenderFillRects
+#define SDL_RenderFlush SDL_RenderFlush_renamed_SDL_FlushRenderer
 #define SDL_RenderGetClipRect SDL_RenderGetClipRect_renamed_SDL_GetRenderClipRect
 #define SDL_RenderGetLogicalSize SDL_RenderGetLogicalSize_renamed_SDL_GetRenderLogicalPresentation
 #define SDL_RenderGetMetalCommandEncoder SDL_RenderGetMetalCommandEncoder_renamed_SDL_GetRenderMetalCommandEncoder
diff --git a/include/SDL3/SDL_render.h b/include/SDL3/SDL_render.h
index caff6f87a672..d235e40ae41a 100644
--- a/include/SDL3/SDL_render.h
+++ b/include/SDL3/SDL_render.h
@@ -1552,27 +1552,36 @@ extern DECLSPEC void SDLCALL SDL_DestroyTexture(SDL_Texture *texture);
 extern DECLSPEC void SDLCALL SDL_DestroyRenderer(SDL_Renderer *renderer);
 
 /**
- * Force the rendering context to flush any pending commands to the underlying
- * rendering API.
+ * Force the rendering context to flush any pending commands and state.
  *
  * You do not need to (and in fact, shouldn't) call this function unless you
- * are planning to call into OpenGL/Direct3D/Metal/whatever directly in
+ * are planning to call into OpenGL/Direct3D/Metal/whatever directly, in
  * addition to using an SDL_Renderer.
  *
  * This is for a very-specific case: if you are using SDL's render API, and
  * you plan to make OpenGL/D3D/whatever calls in addition to SDL render API
- * calls. If this applies, you should call SDL_RenderFlush() between calls to
+ * calls. If this applies, you should call this function between calls to
  * SDL's render API and the low-level API you're using in cooperation.
  *
  * In all other cases, you can ignore this function.
  *
+ * This call makes SDL flush any pending rendering work it was queueing up
+ * to do later in a single batch, and marks any internal cached state as
+ * invalid, so it'll prepare all its state again later, from scratch.
+ *
+ * This means you do not need to save state in your rendering code to protect
+ * the SDL renderer. However, there lots of arbitrary pieces of Direct3D
+ * and OpenGL state that can confuse things; you should use your best
+ * judgement and be prepared to make changes if specific state needs to be
+ * protected.
+ *
  * \param renderer the rendering context
  * \returns 0 on success or a negative error code on failure; call
  *          SDL_GetError() for more information.
  *
  * \since This function is available since SDL 3.0.0.
  */
-extern DECLSPEC int SDLCALL SDL_RenderFlush(SDL_Renderer *renderer);
+extern DECLSPEC int SDLCALL SDL_FlushRenderer(SDL_Renderer *renderer);
 
 
 /**
diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym
index 0cb08454f431..5194954994a7 100644
--- a/src/dynapi/SDL_dynapi.sym
+++ b/src/dynapi/SDL_dynapi.sym
@@ -107,6 +107,7 @@ SDL3_0.0.0 {
     SDL_FlashWindow;
     SDL_FlushEvent;
     SDL_FlushEvents;
+    SDL_FlushRenderer;
     SDL_GDKGetTaskQueue;
     SDL_GDKSuspendComplete;
     SDL_GL_BindTexture;
@@ -512,7 +513,6 @@ SDL3_0.0.0 {
     SDL_RenderCoordinatesToWindow;
     SDL_RenderFillRect;
     SDL_RenderFillRects;
-    SDL_RenderFlush;
     SDL_RenderGeometry;
     SDL_RenderGeometryRaw;
     SDL_RenderLine;
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index fcd77a293013..478005db8f2a 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -131,6 +131,7 @@
 #define SDL_FlashWindow SDL_FlashWindow_REAL
 #define SDL_FlushEvent SDL_FlushEvent_REAL
 #define SDL_FlushEvents SDL_FlushEvents_REAL
+#define SDL_FlushRenderer SDL_FlushRenderer_REAL
 #define SDL_GDKGetTaskQueue SDL_GDKGetTaskQueue_REAL
 #define SDL_GDKSuspendComplete  SDL_GDKSuspendComplete_REAL
 #define SDL_GL_BindTexture SDL_GL_BindTexture_REAL
@@ -536,7 +537,6 @@
 #define SDL_RenderCoordinatesToWindow SDL_RenderCoordinatesToWindow_REAL
 #define SDL_RenderFillRect SDL_RenderFillRect_REAL
 #define SDL_RenderFillRects SDL_RenderFillRects_REAL
-#define SDL_RenderFlush SDL_RenderFlush_REAL
 #define SDL_RenderGeometry SDL_RenderGeometry_REAL
 #define SDL_RenderGeometryRaw SDL_RenderGeometryRaw_REAL
 #define SDL_RenderLine SDL_RenderLine_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 4149b05cd805..17567ae673fd 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -193,6 +193,7 @@ SDL_DYNAPI_PROC(void,SDL_FilterEvents,(SDL_EventFilter a, void *b),(a,b),)
 SDL_DYNAPI_PROC(int,SDL_FlashWindow,(SDL_Window *a, SDL_FlashOperation b),(a,b),return)
 SDL_DYNAPI_PROC(void,SDL_FlushEvent,(Uint32 a),(a),)
 SDL_DYNAPI_PROC(void,SDL_FlushEvents,(Uint32 a, Uint32 b),(a,b),)
+SDL_DYNAPI_PROC(int,SDL_FlushRenderer,(SDL_Renderer *a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_GL_BindTexture,(SDL_Texture *a, float *b, float *c),(a,b,c),return)
 SDL_DYNAPI_PROC(SDL_GLContext,SDL_GL_CreateContext,(SDL_Window *a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_GL_DeleteContext,(SDL_GLContext a),(a),return)
@@ -580,7 +581,6 @@ SDL_DYNAPI_PROC(int,SDL_RenderCoordinatesFromWindow,(SDL_Renderer *a, float b, f
 SDL_DYNAPI_PROC(int,SDL_RenderCoordinatesToWindow,(SDL_Renderer *a, float b, float c, float *d, float *e),(a,b,c,d,e),return)
 SDL_DYNAPI_PROC(int,SDL_RenderFillRect,(SDL_Renderer *a, const SDL_FRect *b),(a,b),return)
 SDL_DYNAPI_PROC(int,SDL_RenderFillRects,(SDL_Renderer *a, const SDL_FRect *b, int c),(a,b,c),return)
-SDL_DYNAPI_PROC(int,SDL_RenderFlush,(SDL_Renderer *a),(a),return)
 SDL_DYNAPI_PROC(int,SDL_RenderGeometry,(SDL_Renderer *a, SDL_Texture *b, const SDL_Vertex *c, int d, const int *e, int f),(a,b,c,d,e,f),return)
 SDL_DYNAPI_PROC(int,SDL_RenderGeometryRaw,(SDL_Renderer *a, SDL_Texture *b, const float *c, int d, const SDL_Color *e, int f, const float *g, int h, int i, const void *j, int k, int l),(a,b,c,d,e,f,g,h,i,j,k,l),return)
 SDL_DYNAPI_PROC(int,SDL_RenderLine,(SDL_Renderer *a, float b, float c, float d, float e),(a,b,c,d,e),return)
diff --git a/src/render/SDL_render.c b/src/render/SDL_render.c
index a9cd2105ad26..464b8f2e3e40 100644
--- a/src/render/SDL_render.c
+++ b/src/render/SDL_render.c
@@ -265,9 +265,13 @@ static int FlushRenderCommandsIfTextureNeeded(SDL_Texture *texture)
     return 0;
 }
 
-int SDL_RenderFlush(SDL_Renderer *renderer)
+int SDL_FlushRenderer(SDL_Renderer *renderer)
 {
-    return FlushRenderCommands(renderer);
+    if (FlushRenderCommands(renderer) == -1) {
+        return -1;
+    }
+    renderer->InvalidateCachedState(renderer);
+    return 0;
 }
 
 void *SDL_AllocateRenderVertices(SDL_Renderer *renderer, const size_t numbytes, const size_t alignment, size_t *offset)
diff --git a/src/render/SDL_sysrender.h b/src/render/SDL_sysrender.h
index 54d2d151484e..01bf8ec29a93 100644
--- a/src/render/SDL_sysrender.h
+++ b/src/render/SDL_sysrender.h
@@ -179,6 +179,7 @@ struct SDL_Renderer
                          int num_vertices, const void *indices, int num_indices, int size_indices,
                          float scale_x, float scale_y);
 
+    void (*InvalidateCachedState)(SDL_Renderer *renderer);
     int (*RunCommandQueue)(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize);
     int (*UpdateTexture)(SDL_Renderer *renderer, SDL_Texture *texture,
                          const SDL_Rect *rect, const void *pixels,
diff --git a/src/render/direct3d/SDL_render_d3d.c b/src/render/direct3d/SDL_render_d3d.c
index 6a882ce0f101..73a8a71508db 100644
--- a/src/render/direct3d/SDL_render_d3d.c
+++ b/src/render/direct3d/SDL_render_d3d.c
@@ -1086,6 +1086,17 @@ static int SetDrawState(D3D_RenderData *data, const SDL_RenderCommand *cmd)
     return 0;
 }
 
+static void D3D_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    D3D_RenderData *data = (D3D_RenderData *)renderer->driverdata;
+    data->drawstate.viewport_dirty = SDL_TRUE;
+    data->drawstate.cliprect_enabled_dirty = SDL_TRUE;
+    data->drawstate.cliprect_dirty = SDL_TRUE;
+    data->drawstate.blend = SDL_BLENDMODE_INVALID;
+    data->drawstate.texture = NULL;
+    data->drawstate.shader = NULL;
+}
+
 static int D3D_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     D3D_RenderData *data = (D3D_RenderData *)renderer->driverdata;
@@ -1586,6 +1597,7 @@ SDL_Renderer *D3D_CreateRenderer(SDL_Window *window, SDL_PropertiesID create_pro
     renderer->QueueDrawPoints = D3D_QueueDrawPoints;
     renderer->QueueDrawLines = D3D_QueueDrawPoints; /* lines and points queue vertices the same way. */
     renderer->QueueGeometry = D3D_QueueGeometry;
+    renderer->InvalidateCachedState = D3D_InvalidateCachedState;
     renderer->RunCommandQueue = D3D_RunCommandQueue;
     renderer->RenderReadPixels = D3D_RenderReadPixels;
     renderer->RenderPresent = D3D_RenderPresent;
diff --git a/src/render/direct3d11/SDL_render_d3d11.c b/src/render/direct3d11/SDL_render_d3d11.c
index 103eb21715f8..4119275e6f0b 100644
--- a/src/render/direct3d11/SDL_render_d3d11.c
+++ b/src/render/direct3d11/SDL_render_d3d11.c
@@ -2124,6 +2124,19 @@ static void D3D11_DrawPrimitives(SDL_Renderer *renderer, D3D11_PRIMITIVE_TOPOLOG
     ID3D11DeviceContext_Draw(rendererData->d3dContext, (UINT)vertexCount, (UINT)vertexStart);
 }
 
+static void D3D11_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    D3D11_RenderData *data = (D3D11_RenderData *)renderer->driverdata;
+    data->currentRenderTargetView = NULL;
+    data->currentRasterizerState = NULL;
+    data->currentBlendState = NULL;
+    data->currentShader = NULL;
+    data->currentShaderResource = NULL;
+    data->currentSampler = NULL;
+    data->cliprectDirty = SDL_TRUE;
+    data->viewportDirty = SDL_TRUE;
+}
+
 static int D3D11_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     D3D11_RenderData *rendererData = (D3D11_RenderData *)renderer->driverdata;
@@ -2454,6 +2467,7 @@ SDL_Renderer *D3D11_CreateRenderer(SDL_Window *window, SDL_PropertiesID create_p
     renderer->QueueDrawPoints = D3D11_QueueDrawPoints;
     renderer->QueueDrawLines = D3D11_QueueDrawPoints; /* lines and points queue vertices the same way. */
     renderer->QueueGeometry = D3D11_QueueGeometry;
+    renderer->InvalidateCachedState = D3D11_InvalidateCachedState;
     renderer->RunCommandQueue = D3D11_RunCommandQueue;
     renderer->RenderReadPixels = D3D11_RenderReadPixels;
     renderer->RenderPresent = D3D11_RenderPresent;
diff --git a/src/render/direct3d12/SDL_render_d3d12.c b/src/render/direct3d12/SDL_render_d3d12.c
index dee07a489874..195371406e4f 100644
--- a/src/render/direct3d12/SDL_render_d3d12.c
+++ b/src/render/direct3d12/SDL_render_d3d12.c
@@ -2587,6 +2587,16 @@ static void D3D12_DrawPrimitives(SDL_Renderer *renderer, D3D12_PRIMITIVE_TOPOLOG
     D3D_CALL(rendererData->commandList, DrawInstanced, (UINT)vertexCount, 1, (UINT)vertexStart, 0);
 }
 
+static void D3D12_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    D3D12_RenderData *data = (D3D12_RenderData *)renderer->driverdata;
+    data->currentRenderTargetView.ptr = 0;
+    data->currentShaderResource.ptr = 0;
+    data->currentSampler.ptr = 0;
+    data->cliprectDirty = SDL_TRUE;
+    data->viewportDirty = SDL_TRUE;
+}
+
 static int D3D12_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     D3D12_RenderData *rendererData = (D3D12_RenderData *)renderer->driverdata;
@@ -3008,6 +3018,7 @@ SDL_Renderer *D3D12_CreateRenderer(SDL_Window *window, SDL_PropertiesID create_p
     renderer->QueueDrawPoints = D3D12_QueueDrawPoints;
     renderer->QueueDrawLines = D3D12_QueueDrawPoints; /* lines and points queue vertices the same way. */
     renderer->QueueGeometry = D3D12_QueueGeometry;
+    renderer->InvalidateCachedState = D3D12_InvalidateCachedState;
     renderer->RunCommandQueue = D3D12_RunCommandQueue;
     renderer->RenderReadPixels = D3D12_RenderReadPixels;
     renderer->RenderPresent = D3D12_RenderPresent;
diff --git a/src/render/metal/SDL_render_metal.m b/src/render/metal/SDL_render_metal.m
index dc2b6124b609..983e07939885 100644
--- a/src/render/metal/SDL_render_metal.m
+++ b/src/render/metal/SDL_render_metal.m
@@ -1310,6 +1310,11 @@ static SDL_bool SetCopyState(SDL_Renderer *renderer, const SDL_RenderCommand *cm
     return SDL_TRUE;
 }
 
+static void METAL_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    // METAL_DrawStateCache only exists during a run of METAL_RunCommandQueue, so there's nothing to invalidate!
+}
+
 static int METAL_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     @autoreleasepool {
@@ -1905,6 +1910,7 @@ in case we want to use it later (recreating the renderer)
         renderer->QueueDrawPoints = METAL_QueueDrawPoints;
         renderer->QueueDrawLines = METAL_QueueDrawLines;
         renderer->QueueGeometry = METAL_QueueGeometry;
+        renderer->InvalidateCachedState = METAL_InvalidateCachedState;
         renderer->RunCommandQueue = METAL_RunCommandQueue;
         renderer->RenderReadPixels = METAL_RenderReadPixels;
         renderer->RenderPresent = METAL_RenderPresent;
diff --git a/src/render/opengl/SDL_render_gl.c b/src/render/opengl/SDL_render_gl.c
index 04c5d5100268..8a102f649c7d 100644
--- a/src/render/opengl/SDL_render_gl.c
+++ b/src/render/opengl/SDL_render_gl.c
@@ -75,10 +75,13 @@ typedef struct
     SDL_bool cliprect_dirty;
     SDL_Rect cliprect;
     SDL_bool texturing;
+    SDL_bool texturing_dirty;
     SDL_bool vertex_array;
     SDL_bool color_array;
     SDL_bool texture_array;
+    SDL_bool color_dirty;
     Uint32 color;
+    SDL_bool clear_color_dirty;
     Uint32 clear_color;
 } GL_DrawStateCache;
 
@@ -450,7 +453,7 @@ static int GL_CreateTexture(SDL_Renderer *renderer, SDL_Texture *texture, SDL_Pr
     GL_ActivateRenderer(renderer);
 
     renderdata->drawstate.texture = NULL; /* we trash this state. */
-    renderdata->drawstate.texturing = SDL_FALSE; /* we trash this state. */
+    renderdata->drawstate.texturing_dirty = SDL_TRUE; /* we trash this state. */
 
     if (texture->access == SDL_TEXTUREACCESS_TARGET &&
         !renderdata->GL_EXT_framebuffer_object_supported) {
@@ -1108,7 +1111,7 @@ static int SetDrawState(GL_RenderData *data, const SDL_RenderCommand *cmd, const
         data->drawstate.shader = shader;
     }
 
-    if ((cmd->data.draw.texture != NULL) != data->drawstate.texturing) {
+    if (data->drawstate.texturing_dirty || ((cmd->data.draw.texture != NULL) != data->drawstate.texturing)) {
         if (!cmd->data.draw.texture) {
             data->glDisable(data->textype);
             data->drawstate.texturing = SDL_FALSE;
@@ -1116,6 +1119,7 @@ static int SetDrawState(GL_RenderData *data, const SDL_RenderCommand *cmd, const
             data->glEnable(data->textype);
             data->drawstate.texturing = SDL_TRUE;
         }
+        data->drawstate.texturing_dirty = SDL_FALSE;
     }
 
     vertex_array = cmd->command == SDL_RENDERCMD_DRAW_POINTS || cmd->command == SDL_RENDERCMD_DRAW_LINES || cmd->command == SDL_RENDERCMD_GEOMETRY;
@@ -1193,6 +1197,25 @@ static int SetCopyState(GL_RenderData *data, const SDL_RenderCommand *cmd)
     return 0;
 }
 
+static void GL_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    GL_DrawStateCache *cache = &((GL_RenderData *)renderer->driverdata)->drawstate;
+    cache->viewport_dirty = SDL_TRUE;
+    cache->texture = NULL;
+    cache->drawablew = 0;
+    cache->drawableh = 0;
+    cache->blend = SDL_BLENDMODE_INVALID;
+    cache->shader = SHADER_INVALID;
+    cache->cliprect_enabled_dirty = SDL_TRUE;
+    cache->cliprect_dirty = SDL_TRUE;
+    cache->texturing_dirty = SDL_TRUE;
+    cache->vertex_array = SDL_FALSE;  /* !!! FIXME: this resets to false at the end of GL_RunCommandQueue, but we could cache this more aggressively. */
+    cache->color_array = SDL_FALSE;  /* !!! FIXME: this resets to false at the end of GL_RunCommandQueue, but we could cache this more aggressively. */
+    cache->texture_array = SDL_FALSE;  /* !!! FIXME: this resets to false at the end of GL_RunCommandQueue, but we could cache this more aggressively. */
+    cache->color_dirty = SDL_TRUE;
+    cache->clear_color_dirty = SDL_TRUE;
+}
+
 static int GL_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     /* !!! FIXME: it'd be nice to use a vertex buffer instead of immediate mode... */
@@ -1230,9 +1253,10 @@ static int GL_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
             const Uint8 b = cmd->data.color.b;
             const Uint8 a = cmd->data.color.a;
             const Uint32 color = (((Uint32)a << 24) | (r << 16) | (g << 8) | b);
-            if (color != data->drawstate.color) {
+            if ((data->drawstate.color_dirty) || (color != data->drawstate.color)) {
                 data->glColor4ub((GLubyte)r, (GLubyte)g, (GLubyte)b, (GLubyte)a);
                 data->drawstate.color = color;
+                data->drawstate.color_dirty = SDL_FALSE;
             }
             break;
         }
@@ -1269,13 +1293,14 @@ static int GL_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
             const Uint8 b = cmd->data.color.b;
             const Uint8 a = cmd->data.color.a;
             const Uint32 color = (((Uint32)a << 24) | (r << 16) | (g << 8) | b);
-            if (color != data->drawstate.clear_color) {
+            if ((data->drawstate.clear_color_dirty) || (color != data->drawstate.clear_color)) {
                 const GLfloat fr = ((GLfloat)r) * inv255f;
                 const GLfloat fg = ((GLfloat)g) * inv255f;
                 const GLfloat fb = ((GLfloat)b) * inv255f;
                 const GLfloat fa = ((GLfloat)a) * inv255f;
                 data->glClearColor(fr, fg, fb, fa);
                 data->drawstate.clear_color = color;
+                data->drawstate.clear_color_dirty = SDL_FALSE;
             }
 
             if (data->drawstate.cliprect_enabled || data->drawstate.cliprect_enabled_dirty) {
@@ -1797,6 +1822,7 @@ static SDL_Renderer *GL_CreateRenderer(SDL_Window *window, SDL_PropertiesID crea
     renderer->QueueDrawPoints = GL_QueueDrawPoints;
     renderer->QueueDrawLines = GL_QueueDrawLines;
     renderer->QueueGeometry = GL_QueueGeometry;
+    renderer->InvalidateCachedState = GL_InvalidateCachedState;
     renderer->RunCommandQueue = GL_RunCommandQueue;
     renderer->RenderReadPixels = GL_RenderReadPixels;
     renderer->RenderPresent = GL_RenderPresent;
diff --git a/src/render/opengles2/SDL_render_gles2.c b/src/render/opengles2/SDL_render_gles2.c
index 1eb17c6e17ad..9ff464c591bb 100644
--- a/src/render/opengles2/SDL_render_gles2.c
+++ b/src/render/opengles2/SDL_render_gles2.c
@@ -138,7 +138,9 @@ typedef struct
     SDL_bool cliprect_dirty;
     SDL_Rect cliprect;
     SDL_bool texturing;
+    SDL_bool texturing_dirty;
     Uint32 clear_color;
+    SDL_bool clear_color_dirty;
     int drawablew;
     int drawableh;
     GLES2_ProgramCacheEntry *program;
@@ -949,7 +951,7 @@ static int SetDrawState(GLES2_RenderData *data, const SDL_RenderCommand *cmd, co
         data->drawstate.cliprect_dirty = SDL_FALSE;
     }
 
-    if ((texture != NULL) != data->drawstate.texturing) {
+    if (data->drawstate.texturing_dirty || ((texture != NULL) != data->drawstate.texturing)) {
         if (!texture) {
             data->glDisableVertexAttribArray((GLenum)GLES2_ATTRIBUTE_TEXCOORD);
             data->drawstate.texturing = SDL_FALSE;
@@ -957,6 +959,7 @@ static int SetDrawState(GLES2_RenderData *data, const SDL_RenderCommand *cmd, co
             data->glEnableVertexAttribArray((GLenum)GLES2_ATTRIBUTE_TEXCOORD);
             data->drawstate.texturing = SDL_TRUE;
         }
+        data->drawstate.texturing_dirty = SDL_FALSE;
     }
 
     if (texture) {
@@ -1150,6 +1153,21 @@ static int SetCopyState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, vo
     return ret;
 }
 
+static void GLES2_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    GLES2_DrawStateCache *cache = &((GLES2_RenderData *)renderer->driverdata)->drawstate;
+    cache->viewport_dirty = SDL_TRUE;
+    cache->texture = NULL;
+    cache->blend = SDL_BLENDMODE_INVALID;
+    cache->cliprect_enabled_dirty = SDL_TRUE;
+    cache->cliprect_dirty = SDL_TRUE;
+    cache->texturing_dirty = SDL_TRUE;
+    cache->clear_color_dirty = SDL_TRUE;
+    cache->drawablew = 0;
+    cache->drawableh = 0;
+    cache->program = NULL;
+}
+
 static int GLES2_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     GLES2_RenderData *data = (GLES2_RenderData *)renderer->driverdata;
@@ -1233,13 +1251,14 @@ static int GLES2_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd,
             const Uint8 b = colorswap ? cmd->data.color.r : cmd->data.color.b;
             const Uint8 a = cmd->data.color.a;
             const Uint32 color = (((Uint32)a << 24) | (r << 16) | (g << 8) | b);
-            if (color != data->drawstate.clear_color) {
+            if (data->drawstate.clear_color_dirty || (color != data->drawstate.clear_color)) {
                 const GLfloat fr = ((GLfloat)r) * inv255f;
                 const GLfloat fg = ((GLfloat)g) * inv255f;
                 const GLfloat fb = ((GLfloat)b) * inv255f;
                 const GLfloat fa = ((GLfloat)a) * inv255f;
                 data->glClearColor(fr, fg, fb, fa);
                 data->drawstate.clear_color = color;
+                data->drawstate.clear_color_dirty = SDL_FALSE;
             }
 
             if (data->drawstate.cliprect_enabled || data->drawstate.cliprect_enabled_dirty) {
@@ -2196,6 +2215,7 @@ static SDL_Renderer *GLES2_CreateRenderer(SDL_Window *window, SDL_PropertiesID c
     renderer->QueueDrawPoints = GLES2_QueueDrawPoints;
     renderer->QueueDrawLines = GLES2_QueueDrawLines;
     renderer->QueueGeometry = GLES2_QueueGeometry;
+    renderer->InvalidateCachedState = GLES2_InvalidateCachedState;
     renderer->RunCommandQueue = GLES2_RunCommandQueue;
     renderer->RenderReadPixels = GLES2_RenderReadPixels;
     renderer->RenderPresent = GLES2_RenderPresent;
diff --git a/src/render/ps2/SDL_render_ps2.c b/src/render/ps2/SDL_render_ps2.c
index 9d02c3e768ab..d21c005b4e09 100644
--- a/src/render/ps2/SDL_render_ps2.c
+++ b/src/render/ps2/SDL_render_ps2.c
@@ -446,6 +446,11 @@ int PS2_RenderPoints(SDL_Renderer *renderer, void *vertices, SDL_RenderCommand *
     return 0;
 }
 
+static void PS2_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    /* currently this doesn't do anything. If this needs to do something (and someone is mixing their own rendering calls in!), update this. */
+}
+
 static int PS2_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     while (cmd) {
@@ -656,6 +661,7 @@ static SDL_Renderer *PS2_CreateRenderer(SDL_Window *window, SDL_PropertiesID cre
     renderer->QueueDrawPoints = PS2_QueueDrawPoints;
     renderer->QueueDrawLines = PS2_QueueDrawPoints;
     renderer->QueueGeometry = PS2_QueueGeometry;
+    renderer->InvalidateCachedState = PS2_InvalidateCachedState;
     renderer->RunCommandQueue = PS2_RunCommandQueue;
     renderer->RenderReadPixels = PS2_RenderReadPixels;
     renderer->RenderPresent = PS2_RenderPresent;
diff --git a/src/render/psp/SDL_render_psp.c b/src/render/psp/SDL_render_psp.c
index 3fe45e2af0bc..c2f8c3103015 100644
--- a/src/render/psp/SDL_render_psp.c
+++ b/src/render/psp/SDL_render_psp.c
@@ -1021,6 +1021,11 @@ static void PSP_SetBlendState(PSP_RenderData *data, PSP_BlendState *state)
     *current = *state;
 }
 
+static void PSP_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    /* currently this doesn't do anything. If this needs to do something (and someone is mixing their own rendering calls in!), update this. */
+}
+
 static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     PSP_RenderData *data = (PSP_RenderData *)renderer->driverdata;
@@ -1325,6 +1330,7 @@ SDL_Renderer *PSP_CreateRenderer(SDL_Window *window, SDL_PropertiesID create_pro
     renderer->QueueFillRects = PSP_QueueFillRects;
     renderer->QueueCopy = PSP_QueueCopy;
     renderer->QueueCopyEx = PSP_QueueCopyEx;
+    renderer->InvalidateCachedState = PSP_InvalidateCachedState;
     renderer->RunCommandQueue = PSP_RunCommandQueue;
     renderer->RenderReadPixels = PSP_RenderReadPixels;
     renderer->RenderPresent = PSP_RenderPresent;
diff --git a/src/render/software/SDL_render_sw.c b/src/render/software/SDL_render_sw.c
index 8f10ef7f0f76..5f70345d9bd1 100644
--- a/src/render/software/SDL_render_sw.c
+++ b/src/render/software/SDL_render_sw.c
@@ -658,6 +658,12 @@ static void SetDrawState(SDL_Surface *surface, SW_DrawStateCache *drawstate)
     }
 }
 
+static void SW_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    /* SW_DrawStateCache only lives during SW_RunCommandQueue, so nothing to do here! */
+}
+
+
 static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     SDL_Surface *surface = SW_ActivateRenderer(renderer);
@@ -1134,6 +1140,7 @@ SDL_Renderer *SW_CreateRendererForSurface(SDL_Surface *surface)
     renderer->QueueCopy = SW_QueueCopy;
     renderer->QueueCopyEx = SW_QueueCopyEx;
     renderer->QueueGeometry = SW_QueueGeometry;
+    renderer->InvalidateCachedState = SW_InvalidateCachedState;
     renderer->RunCommandQueue = SW_RunCommandQueue;
     renderer->RenderReadPixels = SW_RenderReadPixels;
     renderer->RenderPresent = SW_RenderPresent;
diff --git a/src/render/vitagxm/SDL_render_vita_gxm.c b/src/render/vitagxm/SDL_render_vita_gxm.c
index ec0fe146a507..43cc0e2d213c 100644
--- a/src/render/vitagxm/SDL_render_vita_gxm.c
+++ b/src/render/vitagxm/SDL_render_vita_gxm.c
@@ -89,6 +89,8 @@ static int VITA_GXM_QueueGeometry(SDL_Renderer *renderer, SDL_RenderCommand *cmd
 
 static int VITA_GXM_RenderClear(SDL_Renderer *renderer, SDL_RenderCommand *cmd);
 
+static void VITA_GXM_InvalidateCachedState(SDL_Renderer *renderer);
+
 static int VITA_GXM_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize);
 
 static int VITA_GXM_RenderReadPixels(SDL_Renderer *renderer, const SDL_Rect *rect,
@@ -244,6 +246,7 @@ SDL_Renderer *VITA_GXM_CreateRenderer(SDL_Window *window, SDL_PropertiesID creat
     renderer->QueueDrawPoints = VITA_GXM_QueueDrawPoints;
     renderer->QueueDrawLines = VITA_GXM_QueueDrawLines;
     renderer->QueueGeometry = VITA_GXM_QueueGeometry;
+    renderer->InvalidateCachedState = VITA_GXM_InvalidateCachedState;
     renderer->RunCommandQueue = VITA_GXM_RunCommandQueue;
     renderer->RenderReadPixels = VITA_GXM_RenderReadPixels;
     renderer->RenderPresent = VITA_GXM_RenderPresent;
@@ -929,6 +932,11 @@ static int SetDrawState(VITA_GXM_RenderData *data, const SDL_RenderCommand *cmd)
     return 0;
 }
 
+static void VITA_GXM_InvalidateCachedState(SDL_Renderer *renderer)
+{
+    /* currently this doesn't do anything. If this needs to do something (and someone is mixing their own rendering calls in!), update this. */
+}
+
 static int VITA_GXM_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
 {
     VITA_GXM_RenderData *data = (VITA_GXM_RenderData *)renderer->driverdata;

(Patch may be truncated, please check the link at the top of this post.)