From 5cec91e27a970233446d3d122f52bf3661ac5281 Mon Sep 17 00:00:00 2001
From: Isaac Aronson <[EMAIL REDACTED]>
Date: Tue, 10 Oct 2023 08:09:10 -0500
Subject: [PATCH] Implement accurate, performant 32-bit scalar blitter for ARGB
dst case
---
src/video/SDL_blit.h | 30 +++++++++++++++++++++++++++-
src/video/SDL_blit_A.c | 44 ++++++++++++++++++++++++++++--------------
2 files changed, 58 insertions(+), 16 deletions(-)
diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h
index 04525d7ee080c..1d677cb7be268 100644
--- a/src/video/SDL_blit.h
+++ b/src/video/SDL_blit.h
@@ -493,6 +493,13 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
} \
}
+/* Convert any 32-bit 4-bpp pixel to ARGB format */
+#define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst) \
+ do { \
+ Uint8 a, r, g, b; \
+ RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \
+ dst = a << 24 | r << 16 | g << 8 | b; \
+ } while (0)
/* Blend a single color channel or alpha value */
#define ALPHA_BLEND_CHANNEL(sC, dC, sA) \
do { \
@@ -509,7 +516,28 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
ALPHA_BLEND_CHANNEL(sG, dG, A); \
ALPHA_BLEND_CHANNEL(sB, dB, A); \
} while (0)
-
+/* Blend the ARGB values of two 32-bit pixels */
+#define ALPHA_BLEND_ARGB_PIXELS(src, dst) \
+ do { \
+ Uint32 srcA = src >> 24; \
+ src |= 0xFF000000; \
+ \
+ Uint32 srcRB = src & 0x00FF00FF; \
+ Uint32 dstRB = dst & 0x00FF00FF; \
+ \
+ Uint32 srcGA = (src >> 8) & 0x00FF00FF; \
+ Uint32 dstGA = (dst >> 8) & 0x00FF00FF; \
+ \
+ Uint32 resRB = ((srcRB - dstRB) * srcA) + (dstRB << 8) - dstRB; \
+ resRB += 0x00010001; \
+ resRB += (resRB >> 8) & 0x00FF00FF; \
+ resRB = (resRB >> 8) & 0x00FF00FF; \
+ Uint32 resGA = ((srcGA - dstGA) * srcA) + (dstGA << 8) - dstGA; \
+ resGA += 0x00010001; \
+ resGA += (resGA >> 8) & 0x00FF00FF; \
+ resGA &= 0xFF00FF00; \
+ dst = resRB | resGA; \
+ } while (0)
/* Blend the RGBA values of two pixels */
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
do { \
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index 3707ca891c964..7268fa5ad90b3 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -1073,22 +1073,36 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
}
while (height--) {
- /* *INDENT-OFF* */ /* clang-format off */
- DUFFS_LOOP4(
- {
- DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
- if (sA) {
- DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
- ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
- ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
+ if (srcbpp == 4 && dstbpp == 4 && dstfmt->Ashift == 24 && dstfmt->Rshift == 16 && dstfmt->Gshift == 8 &&
+ dstfmt->Bshift == 0) {
+ DUFFS_LOOP4(
+ {
+ PIXEL_TO_ARGB_PIXEL(*(Uint32 *) src, srcfmt, Pixel);
+ Uint32 blended = *(Uint32 *) dst;
+ ALPHA_BLEND_ARGB_PIXELS(Pixel, blended);
+ *(Uint32*)dst = blended;
+ src += srcbpp;
+ dst += dstbpp;
+ },
+ width);
+ } else {
+ /* *INDENT-OFF* */ /* clang-format off */
+ DUFFS_LOOP4(
+ {
+ DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
+ if (sA) {
+ DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
+ ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
+ ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
+ }
+ src += srcbpp;
+ dst += dstbpp;
+ },
+ width);
+ /* *INDENT-ON* */ /* clang-format on */
+ src += srcskip;
+ dst += dstskip;
}
- src += srcbpp;
- dst += dstbpp;
- },
- width);
- /* *INDENT-ON* */ /* clang-format on */
- src += srcskip;
- dst += dstskip;
}
if (freeFormat) {
SDL_DestroyPixelFormat(dstfmt);