From e07cfdff2ccf2fb3a6abf5d3093c30fbd594af9b Mon Sep 17 00:00:00 2001
From: Gabriel Wang <[EMAIL REDACTED]>
Date: Mon, 18 May 2026 14:32:10 +0800
Subject: [PATCH] adds sdl_sve_chn_blend_with_mask_fast for RGB565
alpha-blending
---
src/video/arm/SDL_sve2_blit_A.c | 12 ++++++------
src/video/arm/SDL_sve2_extension.h | 23 ++++++++++++++++++++++-
2 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/src/video/arm/SDL_sve2_blit_A.c b/src/video/arm/SDL_sve2_blit_A.c
index be029bcc70031..606df3b060a3f 100644
--- a/src/video/arm/SDL_sve2_blit_A.c
+++ b/src/video/arm/SDL_sve2_blit_A.c
@@ -51,12 +51,12 @@
}
#undef sdl_sve_rgb32_blend_to_rgb565_op
-#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
- do { \
- svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
- sve_target_u16 = sdl_sve_chn_blend_with_mask(sve_source_u16, \
- sve_target_u16, \
- vMask); \
+#define sdl_sve_rgb32_blend_to_rgb565_op(ma_alpha_chn_idx) \
+ do { \
+ svuint16_t vMask = svget4(sve_source_u16x4, (ma_alpha_chn_idx)); \
+ sve_target_u16 = sdl_sve_chn_blend_with_mask_fast(sve_source_u16, \
+ sve_target_u16, \
+ vMask); \
} while (0)
#include "SDL_sve2_swizzle.h"
diff --git a/src/video/arm/SDL_sve2_extension.h b/src/video/arm/SDL_sve2_extension.h
index 2f5a74a12bb59..3e2327a79c550 100644
--- a/src/video/arm/SDL_sve2_extension.h
+++ b/src/video/arm/SDL_sve2_extension.h
@@ -902,7 +902,9 @@ static inline void svst4ub_u16(svbool_t vPredu8,
/*! \note the Element range of vMask is [0, 0xFF]
*/
SDL_TARGETING("arch=armv8-a+sve2")
-static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint16_t vTarget, svuint16_t vMask)
+static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource,
+ svuint16_t vTarget,
+ svuint16_t vMask)
{
// vTarget = vSource * vMask + vTarget * (255 - vMask);
svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
@@ -924,6 +926,25 @@ static inline svuint16_t sdl_sve_chn_blend_with_mask(svuint16_t vSource, svuint1
return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
}
+/*! \note the Element range of vMask is [0, 0xFF]
+ */
+SDL_TARGETING("arch=armv8-a+sve2")
+static inline svuint16_t sdl_sve_chn_blend_with_mask_fast(svuint16_t vSource,
+ svuint16_t vTarget,
+ svuint16_t vMask)
+{
+ // vTarget = vSource * vMask + vTarget * (255 - vMask);
+ svuint16_t vTemp0 = svmul_u16_m(svptrue_b16(), vSource, vMask);
+ vTemp0 = svmla_u16_m(svptrue_b16(),
+ vTemp0,
+ vTarget,
+ svsub_u16_m(svptrue_b16(),
+ svdup_u16(255),
+ vMask));
+
+ return svlsr_n_u16_m(svptrue_b16(), vTemp0, 8); // vTarget >> 8;
+}
+
/*! \note the hwOpacity range [0, 0x100]
*/
SDL_TARGETING("arch=armv8-a+sve2")