SDL: Cache the draw color state in the software and PSP renderers

From 9c8b47b726f67aac24f31f340291f9fc74d17679 Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Wed, 31 Jan 2024 23:54:47 -0800
Subject: [PATCH] Cache the draw color state in the software and PSP renderers

Converting the color from float is a bit expensive, so let's cache that state
---
 src/render/psp/SDL_render_psp.c     | 52 ++++++++++++-----------------
 src/render/software/SDL_render_sw.c | 51 ++++++++++++++++------------
 2 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/src/render/psp/SDL_render_psp.c b/src/render/psp/SDL_render_psp.c
index 97ed45b5da3c..e8db7e664a1a 100644
--- a/src/render/psp/SDL_render_psp.c
+++ b/src/render/psp/SDL_render_psp.c
@@ -81,6 +81,11 @@ typedef struct
     SDL_Texture *texture;
 } PSP_BlendState;
 
+typedef struct
+{
+    unsigned int color;
+} PSP_DrawStateCache;
+
 typedef struct
 {
     void *frontbuffer;         /**< main screen buffer */
@@ -1036,6 +1041,10 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
 {
     PSP_RenderData *data = (PSP_RenderData *)renderer->driverdata;
     Uint8 *gpumem = NULL;
+    PSP_DrawStateCache drawstate;
+
+    drawstate.color = 0;
+
     StartDrawing(renderer);
 
     /* note that before the renderer interface change, this would do extremely small
@@ -1054,7 +1063,12 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
         switch (cmd->command) {
         case SDL_RENDERCMD_SETDRAWCOLOR:
         {
-            break; /* !!! FIXME: we could cache drawstate like color */
+            const Uint8 r = (Uint8)SDL_roundf(cmd->data.color.color.r * 255.0f);
+            const Uint8 g = (Uint8)SDL_roundf(cmd->data.color.color.g * 255.0f);
+            const Uint8 b = (Uint8)SDL_roundf(cmd->data.color.color.b * 255.0f);
+            const Uint8 a = (Uint8)SDL_roundf(cmd->data.color.color.a * 255.0f);
+            drawstate.color = GU_RGBA(r, g, b, a);
+            break;
         }
 
         case SDL_RENDERCMD_SETVIEWPORT:
@@ -1094,12 +1108,8 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
         {
             const size_t count = cmd->data.draw.count;
             const VertV *verts = (VertV *)(gpumem + cmd->data.draw.first);
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
             PSP_BlendState state = {
-                .color = GU_RGBA(r, g, b, a),
+                .color = drawstate.color,
                 .texture = NULL,
                 .mode = cmd->data.draw.blend,
                 .shadeModel = GU_FLAT
@@ -1113,12 +1123,8 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
         {
             const size_t count = cmd->data.draw.count;
             const VertV *verts = (VertV *)(gpumem + cmd->data.draw.first);
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
             PSP_BlendState state = {
-                .color = GU_RGBA(r, g, b, a),
+                .color = drawstate.color,
                 .texture = NULL,
                 .mode = cmd->data.draw.blend,
                 .shadeModel = GU_FLAT
@@ -1132,12 +1138,8 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
         {
             const size_t count = cmd->data.draw.count;
             const VertV *verts = (VertV *)(gpumem + cmd->data.draw.first);
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
             PSP_BlendState state = {
-                .color = GU_RGBA(r, g, b, a),
+                .color = drawstate.color,
                 .texture = NULL,
                 .mode = cmd->data.draw.blend,
                 .shadeModel = GU_FLAT
@@ -1151,12 +1153,8 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
         {
             const size_t count = cmd->data.draw.count;
             const VertTV *verts = (VertTV *)(gpumem + cmd->data.draw.first);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
             PSP_BlendState state = {
-                .color = GU_RGBA(r, g, b, a),
+                .color = drawstate.color,
                 .texture = cmd->data.draw.texture,
                 .mode = cmd->data.draw.blend,
                 .shadeModel = GU_SMOOTH
@@ -1169,12 +1167,8 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
         case SDL_RENDERCMD_COPY_EX:
         {
             const VertTV *verts = (VertTV *)(gpumem + cmd->data.draw.first);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
             PSP_BlendState state = {
-                .color = GU_RGBA(r, g, b, a),
+                .color = drawstate.color,
                 .texture = cmd->data.draw.texture,
                 .mode = cmd->data.draw.blend,
                 .shadeModel = GU_SMOOTH
@@ -1195,12 +1189,8 @@ static int PSP_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, v
                 sceGuEnable(GU_TEXTURE_2D);
             } else {
                 const VertTCV *verts = (VertTCV *)(gpumem + cmd->data.draw.first);
-                const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
-                const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-                const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-                const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
                 PSP_BlendState state = {
-                    .color = GU_RGBA(r, g, b, a),
+                    .color = drawstate.color,
                     .texture = NULL,
                     .mode = cmd->data.draw.blend,
                     .shadeModel = GU_FLAT
diff --git a/src/render/software/SDL_render_sw.c b/src/render/software/SDL_render_sw.c
index 300f71a3c0e7..d814b544312c 100644
--- a/src/render/software/SDL_render_sw.c
+++ b/src/render/software/SDL_render_sw.c
@@ -41,6 +41,7 @@ typedef struct
     const SDL_Rect *viewport;
     const SDL_Rect *cliprect;
     SDL_bool surface_cliprect_dirty;
+    SDL_Color color;
 } SW_DrawStateCache;
 
 typedef struct
@@ -624,12 +625,12 @@ static int SW_QueueGeometry(SDL_Renderer *renderer, SDL_RenderCommand *cmd, SDL_
     return 0;
 }
 
-static void PrepTextureForCopy(const SDL_RenderCommand *cmd)
+static void PrepTextureForCopy(const SDL_RenderCommand *cmd, SW_DrawStateCache *drawstate)
 {
-    const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-    const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-    const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-    const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
+    const Uint8 r = drawstate->color.r;
+    const Uint8 g = drawstate->color.g;
+    const Uint8 b = drawstate->color.b;
+    const Uint8 a = drawstate->color.a;
     const SDL_BlendMode blend = cmd->data.draw.blend;
     SDL_Texture *texture = cmd->data.draw.texture;
     SDL_Surface *surface = (SDL_Surface *)texture->driverdata;
@@ -687,12 +688,20 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
     drawstate.viewport = NULL;
     drawstate.cliprect = NULL;
     drawstate.surface_cliprect_dirty = SDL_TRUE;
+    drawstate.color.r = 0;
+    drawstate.color.g = 0;
+    drawstate.color.b = 0;
+    drawstate.color.a = 0;
 
     while (cmd) {
         switch (cmd->command) {
         case SDL_RENDERCMD_SETDRAWCOLOR:
         {
-            break; /* Not used in this backend. */
+            drawstate.color.r = (Uint8)SDL_roundf(cmd->data.color.color.r * 255.0f);
+            drawstate.color.g = (Uint8)SDL_roundf(cmd->data.color.color.g * 255.0f);
+            drawstate.color.b = (Uint8)SDL_roundf(cmd->data.color.color.b * 255.0f);
+            drawstate.color.a = (Uint8)SDL_roundf(cmd->data.color.color.a * 255.0f);
+            break;
         }
 
         case SDL_RENDERCMD_SETVIEWPORT:
@@ -724,10 +733,10 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
 
         case SDL_RENDERCMD_DRAW_POINTS:
         {
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
+            const Uint8 r = drawstate.color.r;
+            const Uint8 g = drawstate.color.g;
+            const Uint8 b = drawstate.color.b;
+            const Uint8 a = drawstate.color.a;
             const int count = (int)cmd->data.draw.count;
             SDL_Point *verts = (SDL_Point *)(((Uint8 *)vertices) + cmd->data.draw.first);
             const SDL_BlendMode blend = cmd->data.draw.blend;
@@ -752,10 +761,10 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
 
         case SDL_RENDERCMD_DRAW_LINES:
         {
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
+            const Uint8 r = drawstate.color.r;
+            const Uint8 g = drawstate.color.g;
+            const Uint8 b = drawstate.color.b;
+            const Uint8 a = drawstate.color.a;
             const int count = (int)cmd->data.draw.count;
             SDL_Point *verts = (SDL_Point *)(((Uint8 *)vertices) + cmd->data.draw.first);
             const SDL_BlendMode blend = cmd->data.draw.blend;
@@ -780,10 +789,10 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
 
         case SDL_RENDERCMD_FILL_RECTS:
         {
-            const Uint8 r = (Uint8)SDL_roundf(cmd->data.draw.color.r * 255.0f);
-            const Uint8 g = (Uint8)SDL_roundf(cmd->data.draw.color.g * 255.0f);
-            const Uint8 b = (Uint8)SDL_roundf(cmd->data.draw.color.b * 255.0f);
-            const Uint8 a = (Uint8)SDL_roundf(cmd->data.draw.color.a * 255.0f);
+            const Uint8 r = drawstate.color.r;
+            const Uint8 g = drawstate.color.g;
+            const Uint8 b = drawstate.color.b;
+            const Uint8 a = drawstate.color.a;
             const int count = (int)cmd->data.draw.count;
             SDL_Rect *verts = (SDL_Rect *)(((Uint8 *)vertices) + cmd->data.draw.first);
             const SDL_BlendMode blend = cmd->data.draw.blend;
@@ -816,7 +825,7 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
 
             SetDrawState(surface, &drawstate);
 
-            PrepTextureForCopy(cmd);
+            PrepTextureForCopy(cmd, &drawstate);
 
             /* Apply viewport */
             if (drawstate.viewport && (drawstate.viewport->x || drawstate.viewport->y)) {
@@ -875,7 +884,7 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
         {
             CopyExData *copydata = (CopyExData *)(((Uint8 *)vertices) + cmd->data.draw.first);
             SetDrawState(surface, &drawstate);
-            PrepTextureForCopy(cmd);
+            PrepTextureForCopy(cmd, &drawstate);
 
             /* Apply viewport */
             if (drawstate.viewport && (drawstate.viewport->x || drawstate.viewport->y)) {
@@ -904,7 +913,7 @@ static int SW_RunCommandQueue(SDL_Renderer *renderer, SDL_RenderCommand *cmd, vo
 
                 GeometryCopyData *ptr = (GeometryCopyData *)verts;
 
-                PrepTextureForCopy(cmd);
+                PrepTextureForCopy(cmd, &drawstate);
 
                 /* Apply viewport */
                 if (drawstate.viewport && (drawstate.viewport->x || drawstate.viewport->y)) {