From 508450e9c05405e306a26dbe7972e6ba60656e15 Mon Sep 17 00:00:00 2001
From: Gabriel Wang <[EMAIL REDACTED]>
Date: Mon, 18 May 2026 15:31:22 +0800
Subject: [PATCH] adds Blit565to565SurfaceAlphaSVE2
---
src/video/SDL_blit_A.c | 5 ++
src/video/arm/SDL_sve2_blit_A.c | 97 ++++++++++++++++++++++++++++++
src/video/arm/SDL_sve2_blit_A.h | 2 +
src/video/arm/SDL_sve2_extension.h | 17 ++++++
4 files changed, 121 insertions(+)
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index 60e48e71d9790..1057493e63889 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -1570,6 +1570,11 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
if (SDL_HasMMX()) {
return Blit565to565SurfaceAlphaMMX;
} else
+#endif
+#ifdef SDL_SVE2_INTRINSICS
+ if (SDL_HasSVE2()) {
+ return Blit565to565SurfaceAlphaSVE2;
+ } else
#endif
{
return Blit565to565SurfaceAlpha;
diff --git a/src/video/arm/SDL_sve2_blit_A.c b/src/video/arm/SDL_sve2_blit_A.c
index 606df3b060a3f..ef4dd5fff52e6 100644
--- a/src/video/arm/SDL_sve2_blit_A.c
+++ b/src/video/arm/SDL_sve2_blit_A.c
@@ -86,4 +86,101 @@ size_t SDL_GetSVEVectorSize(void)
return svlen(svundef_u8()) * 8;
}
+/*-----------------------------------------------------------------------------*
+ * RGB565 Blend with Surface Alpha *
+ *-----------------------------------------------------------------------------*/
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 2)
+static inline void sdl_sve_rgb565_stride_blend_with_opacity(uint16_t *SDL_RESTRICT phwSource,
+ uint16_t *SDL_RESTRICT phwTarget,
+ size_t uStride,
+ uint16_t hwOpacity)
+{
+ sdl_sve_stride_loop_rgb16(uStride, vTailPred)
+ {
+
+ svuint16x3_t vSource16x3 =
+ sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwSource));
+
+ svuint16x3_t vTarget16x3 =
+ sdl_sve_rgb565_unpack(svld1_u16(vTailPred, phwTarget));
+
+ sdl_sve_pixel_ccc_foreach_chn(
+ vSource16x3,
+ vTarget16x3,
+ {
+ sve_target_u16 = sdl_sve_chn_blend_with_opacity_fast(
+ sve_source_u16,
+ sve_target_u16,
+ hwOpacity);
+ });
+
+ svst1_u16(vTailPred, phwTarget, sdl_sve_rgb565_pack(vTarget16x3));
+
+ phwSource += sve_iteration_advance;
+ phwTarget += sve_iteration_advance;
+ }
+}
+
+SDL_TARGETING("arch=armv8-a+sve2")
+ARM_NONNULL(1, 3)
+static inline void sdl_sve_rgb565_blend_with_opacity(uint8_t *SDL_RESTRICT pchSource,
+ size_t uSourceStride,
+ uint8_t *SDL_RESTRICT pchTarget,
+ size_t uTargetStride,
+ int nWidth,
+ int nHeight,
+ uint16_t hwOpacity)
+{
+ hwOpacity += hwOpacity == 255;
+ assert(0 == ((uintptr_t)pchSource & 0x01));
+ assert(0 == ((uintptr_t)pchTarget & 0x01));
+
+ while (nHeight--) {
+
+ sdl_sve_rgb565_stride_blend_with_opacity((uint16_t *)pchSource,
+ (uint16_t *)pchTarget,
+ nWidth,
+ hwOpacity);
+
+ pchSource += uSourceStride;
+ pchTarget += uTargetStride;
+ }
+}
+
+// fast RGB565->RGB565 blending with surface alpha
+SDL_TARGETING("arch=armv8-a+sve2")
+void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info)
+{
+ uint16_t alpha = info->a;
+
+ int width = info->dst_w;
+ int height = info->dst_h;
+ uint8_t *src = info->src;
+ int srcskip = info->src_skip;
+ uint8_t *dst = info->dst;
+ int dstskip = info->dst_skip;
+
+ const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
+ const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
+
+ // Set up some basic variables
+ int srcbpp = srcfmt->bytes_per_pixel;
+ int dstbpp = dstfmt->bytes_per_pixel;
+
+ assert(srcbpp == 2);
+ assert(dstbpp == 2);
+
+ int srcstride = srcskip + srcbpp * width;
+ int dststride = dstskip + dstbpp * width;
+
+ sdl_sve_rgb565_blend_with_opacity(src,
+ srcstride,
+ dst,
+ dststride,
+ width,
+ height,
+ alpha);
+}
+
#endif /* SDL_SVE2_INTRINSICS */
\ No newline at end of file
diff --git a/src/video/arm/SDL_sve2_blit_A.h b/src/video/arm/SDL_sve2_blit_A.h
index 2a7e2b8149859..2a8629556677b 100644
--- a/src/video/arm/SDL_sve2_blit_A.h
+++ b/src/video/arm/SDL_sve2_blit_A.h
@@ -30,6 +30,8 @@
void Blit8888to8888PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
void Blit8888to565PixelAlphaSwizzleSVE2(SDL_BlitInfo *info);
+void Blit565to565SurfaceAlphaSVE2(SDL_BlitInfo *info);
+
size_t SDL_GetSVEVectorSize(void);
#endif /* SDL_SVE2_INTRINSICS */
diff --git a/src/video/arm/SDL_sve2_extension.h b/src/video/arm/SDL_sve2_extension.h
index 3e2327a79c550..b9db084bba413 100644
--- a/src/video/arm/SDL_sve2_extension.h
+++ b/src/video/arm/SDL_sve2_extension.h
@@ -964,6 +964,23 @@ static inline svuint16_t sdl_sve_chn_blend_with_opacity(svuint16_t vSource,
return svlsr_n_u16_m(svptrue_b16(), vTarget, 8); // vTarget >> 8;
}
+/*! \note the hwOpacity range [0, 0x100]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_opacity_fast(svuint16_t vSource,
+ svuint16_t vTarget,
+ uint16_t hwOpacity)
+{
+ // vTarget = vSource * vMask + vTarget * (255 - vMask);
+ svuint16_t vTemp0 = svmul_n_u16_m(svptrue_b16(), vSource, hwOpacity);
+ vTemp0 = svmla_n_u16_m(svptrue_b16(),
+ vTemp0,
+ vTarget,
+ 256 - hwOpacity);
+
+ return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
+}
+
/*! \note the Element range of vMask is [0, 0xFF]
* \note the hwOpacity range [0, 0x100]
*/