SDL: loongarch: add SDL_FillSurfaceRect4LSX opt

From dc5b13693060770b36b43900347ca67ac35ababa Mon Sep 17 00:00:00 2001
From: yuanhecai <[EMAIL REDACTED]>
Date: Thu, 30 Oct 2025 13:54:20 +0800
Subject: [PATCH] loongarch: add SDL_FillSurfaceRect4LSX opt

---
 CMakeLists.txt           |  2 ++
 src/video/SDL_fillrect.c | 69 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 39cd099d69959..fdaee72811bb5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -935,11 +935,13 @@ if(SDL_ASSEMBLY)
         set_property(SOURCE
             "${SDL3_SOURCE_DIR}/src/video/yuv2rgb/yuv_rgb_lsx.c"
             "${SDL3_SOURCE_DIR}/src/video/SDL_blit_A.c"
+            "${SDL3_SOURCE_DIR}/src/video/SDL_fillrect.c"
             APPEND PROPERTY COMPILE_OPTIONS "-mlsx")
 
         set_property(SOURCE
             "${SDL3_SOURCE_DIR}/src/video/yuv2rgb/yuv_rgb_lsx.c"
             "${SDL3_SOURCE_DIR}/src/video/SDL_blit_A.c"
+            "${SDL3_SOURCE_DIR}/src/video/SDL_fillrect.c"
             PROPERTY SKIP_PRECOMPILE_HEADERS 1)
         set(HAVE_LSX TRUE)
       endif()
diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c
index 5be1eec52784e..8b3d156c1d268 100644
--- a/src/video/SDL_fillrect.c
+++ b/src/video/SDL_fillrect.c
@@ -133,6 +133,69 @@ DEFINE_SSE_FILLRECT(4, Uint32)
 /* *INDENT-ON* */ // clang-format on
 #endif            // __SSE__
 
+#ifdef SDL_LSX_INTRINSICS
+/* *INDENT-OFF* */ // clang-format off
+
+#define LSX_BEGIN __m128i c128 = __lsx_vreplgr2vr_w(color);
+
+#define LSX_WORK \
+    for (i = n / 64; i--;) { \
+        __lsx_vst(c128, p, 0); \
+        __lsx_vst(c128, p, 16); \
+        __lsx_vst(c128, p, 32); \
+        __lsx_vst(c128, p, 48); \
+        p += 64; \
+    }
+
+#define DEFINE_LSX_FILLRECT(bpp, type) \
+static void SDL_TARGETING("lsx") SDL_FillSurfaceRect##bpp##LSX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+    int i, n; \
+    Uint8 *p = NULL; \
+  \
+    /* If the number of bytes per row is equal to the pitch, treat */ \
+    /* all rows as one long continuous row (for better performance) */ \
+    if ((w) * (bpp) == pitch) { \
+        w = w * h; \
+        h = 1; \
+    } \
+ \
+    LSX_BEGIN; \
+ \
+    while (h--) { \
+        n = (w) * (bpp); \
+        p = pixels; \
+ \
+        if (n > 63) { \
+            int adjust = 16 - ((uintptr_t)p & 15); \
+            if (adjust < 16) { \
+                n -= adjust; \
+                adjust /= (bpp); \
+                while (adjust--) { \
+                    *((type *)p) = (type)color; \
+                    p += (bpp); \
+                } \
+            } \
+            LSX_WORK; \
+        } \
+        if (n & 63) { \
+            int remainder = (n & 63); \
+            remainder /= (bpp); \
+            while (remainder--) { \
+                *((type *)p) = (type)color; \
+                p += (bpp); \
+            } \
+        } \
+        pixels += pitch; \
+    } \
+ \
+}
+
+DEFINE_LSX_FILLRECT(4, Uint32)
+
+/* *INDENT-ON* */ // clang-format on
+#endif /* __LSX__ */
+
 static void SDL_FillSurfaceRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
 {
     int n;
@@ -339,6 +402,12 @@ bool SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, Ui
                 fill_function = SDL_FillSurfaceRect4SSE;
                 break;
             }
+#endif
+#ifdef SDL_LSX_INTRINSICS
+            if (SDL_HasLSX()) {
+                fill_function = SDL_FillSurfaceRect4LSX;
+                break;
+            }
 #endif
             fill_function = SDL_FillSurfaceRect4;
             break;