From 8e3afd56f5d93a77de358dc7b59ec3dd72eedd11 Mon Sep 17 00:00:00 2001
From: Isaac Aronson <[EMAIL REDACTED]>
Date: Tue, 10 Oct 2023 08:48:14 -0500
Subject: [PATCH] Restore BlitARGB 555 and 565 blending algorithms
---
src/video/SDL_blit_A.c | 98 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index 7268fa5ad90b3..ae850a59f562e 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -952,6 +952,97 @@ static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
}
}
+/* fast ARGB8888->RGB565 blending with pixel alpha */
+static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint32 *srcp = (Uint32 *)info->src;
+ int srcskip = info->src_skip >> 2;
+ Uint16 *dstp = (Uint16 *)info->dst;
+ int dstskip = info->dst_skip >> 1;
+
+ while (height--) {
+ /* *INDENT-OFF* */ /* clang-format off */
+ DUFFS_LOOP4({
+ Uint32 s = *srcp;
+ unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
+ /* FIXME: Here we special-case opaque alpha since the
+ compositioning used (>>8 instead of /255) doesn't handle
+ it correctly. Also special-case alpha=0 for speed?
+ Benchmark this! */
+ if (alpha) {
+ if (alpha == (SDL_ALPHA_OPAQUE >> 3)) {
+ *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
+ } else {
+ Uint32 d = *dstp;
+ /*
+ * convert source and destination to G0RAB65565
+ * and blend all components at the same time
+ */
+ s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
+ + (s >> 3 & 0x1f);
+ d = (d | d << 16) & 0x07e0f81f;
+ d += (s - d) * alpha >> 5;
+ d &= 0x07e0f81f;
+ *dstp = (Uint16)(d | d >> 16);
+ }
+ }
+ srcp++;
+ dstp++;
+ }, width);
+ /* *INDENT-ON* */ /* clang-format on */
+ srcp += srcskip;
+ dstp += dstskip;
+ }
+}
+
+/* fast ARGB8888->RGB555 blending with pixel alpha */
+static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
+{
+ int width = info->dst_w;
+ int height = info->dst_h;
+ Uint32 *srcp = (Uint32 *)info->src;
+ int srcskip = info->src_skip >> 2;
+ Uint16 *dstp = (Uint16 *)info->dst;
+ int dstskip = info->dst_skip >> 1;
+
+ while (height--) {
+ /* *INDENT-OFF* */ /* clang-format off */
+ DUFFS_LOOP4({
+ unsigned alpha;
+ Uint32 s = *srcp;
+ alpha = s >> 27; /* downscale alpha to 5 bits */
+ /* FIXME: Here we special-case opaque alpha since the
+ compositioning used (>>8 instead of /255) doesn't handle
+ it correctly. Also special-case alpha=0 for speed?
+ Benchmark this! */
+ if (alpha) {
+ if (alpha == (SDL_ALPHA_OPAQUE >> 3)) {
+ *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
+ } else {
+ Uint32 d = *dstp;
+ /*
+ * convert source and destination to G0RAB65565
+ * and blend all components at the same time
+ */
+ s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
+ + (s >> 3 & 0x1f);
+ d = (d | d << 16) & 0x03e07c1f;
+ d += (s - d) * alpha >> 5;
+ d &= 0x03e07c1f;
+ *dstp = (Uint16)(d | d >> 16);
+ }
+ }
+ srcp++;
+ dstp++;
+ }, width);
+ /* *INDENT-ON* */ /* clang-format on */
+ srcp += srcskip;
+ dstp += dstskip;
+ }
+}
+
/* General (slow) N->N blending with per-surface alpha */
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
{
@@ -1127,6 +1218,13 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
}
case 2:
+ if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
+ if (df->Gmask == 0x7e0) {
+ return BlitARGBto565PixelAlpha;
+ } else if (df->Gmask == 0x3e0) {
+ return BlitARGBto555PixelAlpha;
+ }
+ }
return BlitNtoNPixelAlpha;
case 4: