SDL: Added SDL_PremultiplyAlpha() to premultiply alpha on a block of SDL_PIXELFORMAT_ARGB8888 pixels

From c97b721868b2f005d7f2b1cbd502a6d6ad5fdd85 Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Sun, 21 Nov 2021 12:18:10 -0800
Subject: [PATCH] Added SDL_PremultiplyAlpha() to premultiply alpha on a block
 of SDL_PIXELFORMAT_ARGB8888 pixels

---
 WhatsNew.txt                         |  3 ++
 include/SDL_surface.h                | 28 ++++++++++-
 src/dynapi/SDL_dynapi_overrides.h    |  1 +
 src/dynapi/SDL_dynapi_procs.h        |  1 +
 src/video/SDL_pixels.c               | 29 ------------
 src/video/SDL_pixels_c.h             |  1 -
 src/video/SDL_surface.c              | 69 +++++++++++++++++++++++++++-
 src/video/kmsdrm/SDL_kmsdrmmouse.c   |  4 +-
 src/video/wayland/SDL_waylandmouse.c |  7 ++-
 9 files changed, 109 insertions(+), 34 deletions(-)

diff --git a/WhatsNew.txt b/WhatsNew.txt
index 732f1a51da9..5b22900266c 100644
--- a/WhatsNew.txt
+++ b/WhatsNew.txt
@@ -5,6 +5,9 @@ This is a list of major changes in SDL's version history.
 2.0.18:
 ---------------------------------------------------------------------------
 
+General:
+* Added SDL_PremultiplyAlpha() to premultiply alpha on a block of SDL_PIXELFORMAT_ARGB8888 pixels
+
 Linux:
 * Added the hint SDL_HINT_LINUX_JOYSTICK_CLASSIC to control whether /dev/input/js* or /dev/input/event* are used as joystick devices
 * Added the hint SDL_HINT_JOYSTICK_DEVICE to allow the user to specify devices that will be opened in addition to the normal joystick detection
diff --git a/include/SDL_surface.h b/include/SDL_surface.h
index 29ebfb663b4..0b18fa00279 100644
--- a/include/SDL_surface.h
+++ b/include/SDL_surface.h
@@ -705,7 +705,7 @@ extern DECLSPEC SDL_Surface *SDLCALL SDL_ConvertSurfaceFormat
  * \param height the height of the block to copy, in pixels
  * \param src_format an SDL_PixelFormatEnum value of the `src` pixels format
  * \param src a pointer to the source pixels
- * \param src_pitch the pitch of the block to copy, in bytes
+ * \param src_pitch the pitch of the source pixels, in bytes
  * \param dst_format an SDL_PixelFormatEnum value of the `dst` pixels format
  * \param dst a pointer to be filled in with new pixel data
  * \param dst_pitch the pitch of the destination pixels, in bytes
@@ -720,6 +720,32 @@ extern DECLSPEC int SDLCALL SDL_ConvertPixels(int width, int height,
                                               Uint32 dst_format,
                                               void * dst, int dst_pitch);
 
+/**
+ * Premultiply the alpha on a block of pixels.
+ *
+ * This is safe to use with src == dst, but not for other overlapping areas.
+ *
+ * This function is currently only implemented for SDL_PIXELFORMAT_ARGB8888.
+ *
+ * \param width the width of the block to convert, in pixels
+ * \param height the height of the block to convert, in pixels
+ * \param src_format an SDL_PixelFormatEnum value of the `src` pixels format
+ * \param src a pointer to the source pixels
+ * \param src_pitch the pitch of the source pixels, in bytes
+ * \param dst_format an SDL_PixelFormatEnum value of the `dst` pixels format
+ * \param dst a pointer to be filled in with premultiplied pixel data
+ * \param dst_pitch the pitch of the destination pixels, in bytes
+ * \returns 0 on success or a negative error code on failure; call
+ *          SDL_GetError() for more information.
+ *
+ * \since This function is available since SDL 2.0.18.
+ */
+extern DECLSPEC int SDLCALL SDL_PremultiplyAlpha(int width, int height,
+                                                 Uint32 src_format,
+                                                 const void * src, int src_pitch,
+                                                 Uint32 dst_format,
+                                                 void * dst, int dst_pitch);
+
 /**
  * Perform a fast fill of a rectangle with a specific color.
  *
diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h
index 62364c32916..6c0de513f10 100644
--- a/src/dynapi/SDL_dynapi_overrides.h
+++ b/src/dynapi/SDL_dynapi_overrides.h
@@ -854,3 +854,4 @@
 #define SDL_GameControllerHasRumble SDL_GameControllerHasRumble_REAL
 #define SDL_GameControllerHasRumbleTriggers SDL_GameControllerHasRumbleTriggers_REAL
 #define SDL_hid_ble_scan SDL_hid_ble_scan_REAL
+#define SDL_PremultiplyAlpha SDL_PremultiplyAlpha_REAL
diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h
index 4953f2902cd..d110e3165a0 100644
--- a/src/dynapi/SDL_dynapi_procs.h
+++ b/src/dynapi/SDL_dynapi_procs.h
@@ -923,3 +923,4 @@ SDL_DYNAPI_PROC(SDL_bool,SDL_JoystickHasRumbleTriggers,(SDL_Joystick *a),(a),ret
 SDL_DYNAPI_PROC(SDL_bool,SDL_GameControllerHasRumble,(SDL_GameController *a),(a),return)
 SDL_DYNAPI_PROC(SDL_bool,SDL_GameControllerHasRumbleTriggers,(SDL_GameController *a),(a),return)
 SDL_DYNAPI_PROC(void,SDL_hid_ble_scan,(SDL_bool a),(a),)
+SDL_DYNAPI_PROC(int,SDL_PremultiplyAlpha,(int a, int b, Uint32 c, const void *d, int e, Uint32 f, void *g, int h),(a,b,c,d,e,f,g,h),return)
diff --git a/src/video/SDL_pixels.c b/src/video/SDL_pixels.c
index 14021672030..eff0c94085c 100644
--- a/src/video/SDL_pixels.c
+++ b/src/video/SDL_pixels.c
@@ -1227,33 +1227,4 @@ SDL_CalculateGammaRamp(float gamma, Uint16 * ramp)
     }
 }
 
-/* Creates a copy of an ARGB8888-format surface's pixels with premultiplied alpha */
-void
-SDL_PremultiplySurfaceAlphaToARGB8888(SDL_Surface *src, Uint32 *dst)
-{
-    Uint8 A, R, G, B;
-    int x, y;
-
-    if (SDL_MUSTLOCK(src))
-        SDL_LockSurface(src);
-
-    for (y = 0; y < src->h; ++y) {
-        Uint32 *src_px = (Uint32*)((Uint8 *)src->pixels + (y * src->pitch));
-        for (x = 0; x < src->w; ++x) {
-            /* Component bytes extraction. */
-            SDL_GetRGBA(*src_px++, src->format, &R, &G, &B, &A);
-
-            /* Alpha pre-multiplication of each component. */
-            R = ((Uint32)A * R) / 255;
-            G = ((Uint32)A * G) / 255;
-            B = ((Uint32)A * B) / 255;
-
-            /* ARGB8888 pixel recomposition. */
-            *dst++ = (((Uint32)A << 24) | ((Uint32)R << 16) | ((Uint32)G << 8) | (B << 0));
-        }
-    }
-    if (SDL_MUSTLOCK(src))
-        SDL_UnlockSurface(src);
-}
-
 /* vi: set ts=4 sw=4 expandtab: */
diff --git a/src/video/SDL_pixels_c.h b/src/video/SDL_pixels_c.h
index 91db5b446e4..1974797fc59 100644
--- a/src/video/SDL_pixels_c.h
+++ b/src/video/SDL_pixels_c.h
@@ -43,7 +43,6 @@ extern void SDL_InvalidateAllBlitMap(SDL_Surface *surface);
 extern void SDL_DitherColors(SDL_Color * colors, int bpp);
 extern Uint8 SDL_FindColor(SDL_Palette * pal, Uint8 r, Uint8 g, Uint8 b, Uint8 a);
 extern void SDL_DetectPalette(SDL_Palette *pal, SDL_bool *is_opaque, SDL_bool *has_alpha_channel);
-extern void SDL_PremultiplySurfaceAlphaToARGB8888(SDL_Surface *src, Uint32 *dst);
 
 #endif /* SDL_pixels_c_h_ */
 
diff --git a/src/video/SDL_surface.c b/src/video/SDL_surface.c
index bb2e5327ae4..45a64c9a6bb 100644
--- a/src/video/SDL_surface.c
+++ b/src/video/SDL_surface.c
@@ -1381,7 +1381,12 @@ int SDL_ConvertPixels(int width, int height,
     void *nonconst_src = (void *) src;
     int ret;
 
-    /* Check to make sure we are blitting somewhere, so we don't crash */
+    if (!src) {
+        return SDL_InvalidParamError("src");
+    }
+    if (!src_pitch) {
+        return SDL_InvalidParamError("src_pitch");
+    }
     if (!dst) {
         return SDL_InvalidParamError("dst");
     }
@@ -1440,6 +1445,68 @@ int SDL_ConvertPixels(int width, int height,
     return ret;
 }
 
+/*
+ * Premultiply the alpha on a block of pixels
+ *
+ * This is currently only implemented for SDL_PIXELFORMAT_ARGB8888
+ *
+ * Here are some ideas for optimization:
+ * https://github.com/Wizermil/premultiply_alpha/tree/master/premultiply_alpha
+ * https://developer.arm.com/documentation/101964/0201/Pre-multiplied-alpha-channel-data
+ */
+int SDL_PremultiplyAlpha(int width, int height,
+                         Uint32 src_format, const void * src, int src_pitch,
+                         Uint32 dst_format, void * dst, int dst_pitch)
+{
+    int c;
+    Uint32 srcpixel;
+    Uint32 srcR, srcG, srcB, srcA;
+    Uint32 dstpixel;
+    Uint32 dstR, dstG, dstB, dstA;
+
+    if (!src) {
+        return SDL_InvalidParamError("src");
+    }
+    if (!src_pitch) {
+        return SDL_InvalidParamError("src_pitch");
+    }
+    if (!dst) {
+        return SDL_InvalidParamError("dst");
+    }
+    if (!dst_pitch) {
+        return SDL_InvalidParamError("dst_pitch");
+    }
+    if (src_format != SDL_PIXELFORMAT_ARGB8888) {
+        return SDL_InvalidParamError("src_format");
+    }
+    if (dst_format != SDL_PIXELFORMAT_ARGB8888) {
+        return SDL_InvalidParamError("dst_format");
+    }
+
+    while (height--) {
+        const Uint32 *src_px = (const Uint32 *)src;
+        Uint32 *dst_px = (Uint32 *)dst;
+        for (c = width; c; --c) {
+            /* Component bytes extraction. */
+            srcpixel = *src_px++;
+            RGBA_FROM_ARGB8888(srcpixel, srcR, srcG, srcB, srcA);
+
+            /* Alpha pre-multiplication of each component. */
+            dstA = srcA;
+            dstR = (srcA * srcR) / 255;
+            dstG = (srcA * srcG) / 255;
+            dstB = (srcA * srcB) / 255;
+
+            /* ARGB8888 pixel recomposition. */
+            ARGB8888_FROM_RGBA(dstpixel, dstR, dstG, dstB, dstA);
+            *dst_px++ = dstpixel;
+        }
+        src = (const Uint8 *)src + src_pitch;
+        dst = (Uint8 *)dst + dst_pitch;
+    }
+    return 0;
+}
+
 /*
  * Free a surface created by the above function.
  */
diff --git a/src/video/kmsdrm/SDL_kmsdrmmouse.c b/src/video/kmsdrm/SDL_kmsdrmmouse.c
index 2d0795ef518..08e51aea4f0 100644
--- a/src/video/kmsdrm/SDL_kmsdrmmouse.c
+++ b/src/video/kmsdrm/SDL_kmsdrmmouse.c
@@ -278,7 +278,9 @@ KMSDRM_CreateCursor(SDL_Surface * surface, int hot_x, int hot_y)
        like other backends do. Also, the GBM BO pixels have to be
        alpha-premultiplied, but the SDL surface we receive has
        straight-alpha pixels, so we always have to convert. */ 
-    SDL_PremultiplySurfaceAlphaToARGB8888(surface, curdata->buffer);
+    SDL_PremultiplyAlpha(surface->w, surface->h,
+                         surface->format->format, surface->pixels, surface->pitch,
+                         SDL_PIXELFORMAT_ARGB8888, curdata->buffer, surface->w * 4);
 
     cursor->driverdata = curdata;
 
diff --git a/src/video/wayland/SDL_waylandmouse.c b/src/video/wayland/SDL_waylandmouse.c
index 19a439f6952..67ab721ee9d 100644
--- a/src/video/wayland/SDL_waylandmouse.c
+++ b/src/video/wayland/SDL_waylandmouse.c
@@ -280,7 +280,9 @@ Wayland_CreateCursor(SDL_Surface *surface, int hot_x, int hot_y)
         }
 
         /* Wayland requires premultiplied alpha for its surfaces. */
-        SDL_PremultiplySurfaceAlphaToARGB8888(surface, data->shm_data);
+        SDL_PremultiplyAlpha(surface->w, surface->h,
+                             surface->format->format, surface->pixels, surface->pitch,
+                             SDL_PIXELFORMAT_ARGB8888, data->shm_data, surface->w * 4);
 
         data->surface = wl_compositor_create_surface(wd->compositor);
         wl_surface_set_user_data(data->surface, NULL);
@@ -447,4 +449,7 @@ Wayland_FiniMouse(SDL_VideoData *data)
     }
     SDL_free(data->cursor_themes);
 }
+
 #endif  /* SDL_VIDEO_DRIVER_WAYLAND */
+
+/* vi: set ts=4 sw=4 expandtab: */