SDL: Replace custom NEON cast macros with vreinterpret__ (c9bdf)

SDLPushPostBot · May 14, 2026, 4:16pm

From c9bdfd8fed3c8da0ac63b6e6f52476cb5e9276d3 Mon Sep 17 00:00:00 2001
From: Cameron Cawley <[EMAIL REDACTED]>
Date: Thu, 14 May 2026 14:50:21 +0100
Subject: [PATCH] Replace custom NEON cast macros with vreinterpret_*_*

(cherry picked from commit a3376acc2e2fc940a909659f3d95fb199a548194)
---
 src/video/SDL_stretch.c | 48 +++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/src/video/SDL_stretch.c b/src/video/SDL_stretch.c
index d432c0fbbd230..073f62c09cb3f 100644
--- a/src/video/SDL_stretch.c
+++ b/src/video/SDL_stretch.c
@@ -375,20 +375,6 @@ static bool scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch, Ui
     return true;
 }
 
-#ifdef SDL_NEON_INTRINSICS
-#define CAST_uint8x8_t       (uint8x8_t)
-#define CAST_uint32x2_t      (uint32x2_t)
-#endif
-
-#if defined(_MSC_VER)
-#ifdef SDL_NEON_INTRINSICS
-#undef CAST_uint8x8_t
-#undef CAST_uint32x2_t
-#define CAST_uint8x8_t
-#define CAST_uint32x2_t
-#endif
-#endif
-
 #ifdef SDL_SSE2_INTRINSICS
 
 #if 0
@@ -587,8 +573,8 @@ static SDL_INLINE void INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1
     uint16x8_t d0;
     uint8x8_t e0;
 
-    x_00_01 = CAST_uint8x8_t vld1_u32(s0); // Load 2 pixels
-    x_10_11 = CAST_uint8x8_t vld1_u32(s1);
+    x_00_01 = vreinterpret_u8_u32(vld1_u32(s0)); // Load 2 pixels
+    x_10_11 = vreinterpret_u8_u32(vld1_u32(s1));
 
     /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */
     k0 = vmull_u8(x_00_01, v_frac_h1);     /* k0 := x0 * (1 - frac)    */
@@ -608,7 +594,7 @@ static SDL_INLINE void INTERPOL_BILINEAR_NEON(const Uint32 *s0, const Uint32 *s1
     e0 = vmovn_u16(d0);
 
     // Store 1 pixel
-    *dst = vget_lane_u32(CAST_uint32x2_t e0, 0);
+    *dst = vget_lane_u32(vreinterpret_u32_u8(e0), 0);
 }
 
 static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitch, Uint32 *dst, int dst_w, int dst_h, int dst_pitch)
@@ -672,14 +658,14 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc
             s_16_17 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_3);
 
             // Interpolation vertical
-            x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01); // Load 2 pixels
-            x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03);
-            x_04_05 = CAST_uint8x8_t vld1_u32(s_04_05);
-            x_06_07 = CAST_uint8x8_t vld1_u32(s_06_07);
-            x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11);
-            x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13);
-            x_14_15 = CAST_uint8x8_t vld1_u32(s_14_15);
-            x_16_17 = CAST_uint8x8_t vld1_u32(s_16_17);
+            x_00_01 = vreinterpret_u8_u32(vld1_u32(s_00_01)); // Load 2 pixels
+            x_02_03 = vreinterpret_u8_u32(vld1_u32(s_02_03));
+            x_04_05 = vreinterpret_u8_u32(vld1_u32(s_04_05));
+            x_06_07 = vreinterpret_u8_u32(vld1_u32(s_06_07));
+            x_10_11 = vreinterpret_u8_u32(vld1_u32(s_10_11));
+            x_12_13 = vreinterpret_u8_u32(vld1_u32(s_12_13));
+            x_14_15 = vreinterpret_u8_u32(vld1_u32(s_14_15));
+            x_16_17 = vreinterpret_u8_u32(vld1_u32(s_16_17));
 
             /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */
             k0 = vmull_u8(x_00_01, v_frac_h1);     /* k0 := x0 * (1 - frac)    */
@@ -729,7 +715,7 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc
             // Narrow again
             e1 = vmovn_u16(d1);
 
-            f0 = vcombine_u32(CAST_uint32x2_t e0, CAST_uint32x2_t e1);
+            f0 = vcombine_u32(vreinterpret_u32_u8(e0), vreinterpret_u32_u8(e1));
             // Store 4 pixels
             vst1q_u32(dst, f0);
 
@@ -768,10 +754,10 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc
             s_12_13 = (const Uint32 *)((const Uint8 *)src_h1 + index_w_1);
 
             // Interpolation vertical
-            x_00_01 = CAST_uint8x8_t vld1_u32(s_00_01); // Load 2 pixels
-            x_02_03 = CAST_uint8x8_t vld1_u32(s_02_03);
-            x_10_11 = CAST_uint8x8_t vld1_u32(s_10_11);
-            x_12_13 = CAST_uint8x8_t vld1_u32(s_12_13);
+            x_00_01 = vreinterpret_u8_u32(vld1_u32(s_00_01)); // Load 2 pixels
+            x_02_03 = vreinterpret_u8_u32(vld1_u32(s_02_03));
+            x_10_11 = vreinterpret_u8_u32(vld1_u32(s_10_11));
+            x_12_13 = vreinterpret_u8_u32(vld1_u32(s_12_13));
 
             /* Interpolated == x0 + frac * (x1 - x0) == x0 * (1 - frac) + x1 * frac */
             k0 = vmull_u8(x_00_01, v_frac_h1);     /* k0 := x0 * (1 - frac)    */
@@ -801,7 +787,7 @@ static bool scale_mat_NEON(const Uint32 *src, int src_w, int src_h, int src_pitc
             e0 = vmovn_u16(d0);
 
             // Store 2 pixels
-            vst1_u32(dst, CAST_uint32x2_t e0);
+            vst1_u32(dst, vreinterpret_u32_u8(e0));
             dst += 2;
         }

SDL: Replace custom NEON cast macros with vreinterpret_*_* (c9bdf)

SDL: Replace custom NEON cast macros with vreinterpret__ (c9bdf)