SDL: cmake: Allow build system to disable loongarch intrinsics

From 46de6241d79c6f4531dbb729b2ed5992ec61b4a2 Mon Sep 17 00:00:00 2001
From: Anonymous Maarten <[EMAIL REDACTED]>
Date: Sun, 26 Feb 2023 00:35:57 +0100
Subject: [PATCH] cmake: Allow build system to disable loongarch intrinsics

---
 CMakeLists.txt                                |  8 +++
 include/SDL3/SDL_intrin.h                     |  6 +-
 include/build_config/SDL_build_config.h.cmake |  2 +
 src/SDL_internal.h                            |  8 +++
 src/video/SDL_yuv.c                           |  2 +-
 src/video/yuv2rgb/yuv_rgb.c                   | 62 +++++++++----------
 6 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dd6616648640..739ccce23853 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1020,6 +1020,14 @@ if(NOT HAVE_SSE3)
   set(SDL_DISABLE_SSE3 1)
 endif()
 
+if(NOT HAVE_LSX)
+  set(SDL_DISABLE_LSX 1)
+endif()
+
+if(NOT HAVE_LASX)
+  set(SDL_DISABLE_LASX 1)
+endif()
+
 # TODO: Can't deactivate on FreeBSD? w/o LIBC, SDL_stdinc.h can't define
 # anything.
 if(SDL_LIBC)
diff --git a/include/SDL3/SDL_intrin.h b/include/SDL3/SDL_intrin.h
index be9b29b0aefd..9ca942c3de1d 100644
--- a/include/SDL3/SDL_intrin.h
+++ b/include/SDL3/SDL_intrin.h
@@ -93,13 +93,11 @@ _m_prefetch(void *__P)
 #endif
 #endif /* compiler version */
 
-#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX_H)
+#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX)
 #include <lsxintrin.h>
-#define __LSX__
 #endif
-#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX_H)
+#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX)
 #include <lasxintrin.h>
-#define __LASX__
 #endif
 #if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
 #include <immintrin.h>
diff --git a/include/build_config/SDL_build_config.h.cmake b/include/build_config/SDL_build_config.h.cmake
index 9197c94c3f93..a35650f9e2f3 100644
--- a/include/build_config/SDL_build_config.h.cmake
+++ b/include/build_config/SDL_build_config.h.cmake
@@ -592,5 +592,7 @@ typedef unsigned int uintptr_t;
 #cmakedefine SDL_DISABLE_SSE3 1
 #cmakedefine SDL_DISABLE_AVX 1
 #cmakedefine SDL_DISABLE_MMX 1
+#cmakedefine SDL_DISABLE_LSX 1
+#cmakedefine SDL_DISABLE_LASX 1
 
 #endif /* SDL_build_config_h_ */
diff --git a/src/SDL_internal.h b/src/SDL_internal.h
index 8e1e14191d17..208b92943b78 100644
--- a/src/SDL_internal.h
+++ b/src/SDL_internal.h
@@ -211,6 +211,14 @@
 #define HAVE_AVX_INTRINSICS 1
 #endif
 
+#if defined(__loongarch_sx) && !defined(SDL_DISABLE_LSX)
+#define HAVE_LSX_INTRINSICS 1
+#endif
+
+#if defined(__loongarch_asx) && !defined(SDL_DISABLE_LASX)
+#define HAVE_LASX_INTRINSICS 1
+#endif
+
 #if defined __clang__
 #if (!__has_attribute(target))
 #undef HAVE_AVX_INTRINSICS
diff --git a/src/video/SDL_yuv.c b/src/video/SDL_yuv.c
index acf2bae058b4..06a7fa708b06 100644
--- a/src/video/SDL_yuv.c
+++ b/src/video/SDL_yuv.c
@@ -419,7 +419,7 @@ static SDL_bool yuv_rgb_lsx(
     Uint8 *rgb, Uint32 rgb_stride,
     YCbCrType yuv_type)
 {
-#ifdef __loongarch_sx
+#if HAVE_LSX_INTRINSICS
     if (!SDL_HasLSX()) {
         return SDL_FALSE;
     }
diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb.c
index 68139bdbc638..5b96a4b7e47b 100644
--- a/src/video/yuv2rgb/yuv_rgb.c
+++ b/src/video/yuv2rgb/yuv_rgb.c
@@ -73,7 +73,7 @@ static const RGB2YUVParam RGB2YUV[3] = {
 // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
 static uint8_t clampU8(int32_t v)
 {
-	static const uint8_t lut[512] = 
+	static const uint8_t lut[512] =
 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
@@ -183,52 +183,52 @@ static uint8_t clampU8(int32_t v)
 #include "yuv_rgb_std_func.h"
 
 void rgb24_yuv420_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *RGB, uint32_t RGB_stride, 
-	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 
+	uint32_t width, uint32_t height,
+	const uint8_t *RGB, uint32_t RGB_stride,
+	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
 	YCbCrType yuv_type)
 {
 	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
-	
+
 	uint32_t x, y;
 	for(y=0; y<(height-1); y+=2)
 	{
 		const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
 			*rgb_ptr2=RGB+(y+1)*RGB_stride;
-			
+
 		uint8_t *y_ptr1=Y+y*Y_stride,
 			*y_ptr2=Y+(y+1)*Y_stride,
 			*u_ptr=U+(y/2)*UV_stride,
 			*v_ptr=V+(y/2)*UV_stride;
-		
+
 		for(x=0; x<(width-1); x+=2)
 		{
 			// compute yuv for the four pixels, u and v values are summed
 			int32_t y_tmp, u_tmp, v_tmp;
-			
+
 			y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
 			u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
 			v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
 			y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-			
+
 			y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
 			u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
 			v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
 			y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-			
+
 			y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
 			u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
 			v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
 			y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-			
+
 			y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
 			u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
 			v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
 			y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-			
+
 			u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
 			v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
-			
+
 			rgb_ptr1 += 6;
 			rgb_ptr2 += 6;
 			y_ptr1 += 2;
@@ -609,35 +609,35 @@ V = _mm_srai_epi16(V, PRECISION);
 	SAVE_SI128((__m128i*)(u_ptr), u1); \
 	SAVE_SI128((__m128i*)(v_ptr), v1);
 
-void rgb24_yuv420_sse(uint32_t width, uint32_t height, 
-	const uint8_t *RGB, uint32_t RGB_stride, 
-	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 
+void rgb24_yuv420_sse(uint32_t width, uint32_t height,
+	const uint8_t *RGB, uint32_t RGB_stride,
+	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
 	YCbCrType yuv_type)
 {
 	#define LOAD_SI128 _mm_load_si128
 	#define SAVE_SI128 _mm_stream_si128
 	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
-	
+
 	uint32_t xpos, ypos;
 	for(ypos=0; ypos<(height-1); ypos+=2)
 	{
 		const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
 			*rgb_ptr2=RGB+(ypos+1)*RGB_stride;
-		
+
 		uint8_t *y_ptr1=Y+ypos*Y_stride,
 			*y_ptr2=Y+(ypos+1)*Y_stride,
 			*u_ptr=U+(ypos/2)*UV_stride,
 			*v_ptr=V+(ypos/2)*UV_stride;
-		
+
 		for(xpos=0; xpos<(width-31); xpos+=32)
 		{
 			RGB2YUV_32
-			
+
 			rgb_ptr1+=96;
 			rgb_ptr2+=96;
 			y_ptr1+=32;
 			y_ptr2+=32;
-			u_ptr+=16; 
+			u_ptr+=16;
 			v_ptr+=16;
 		}
 	}
@@ -645,35 +645,35 @@ void rgb24_yuv420_sse(uint32_t width, uint32_t height,
 	#undef SAVE_SI128
 }
 
-void rgb24_yuv420_sseu(uint32_t width, uint32_t height, 
-	const uint8_t *RGB, uint32_t RGB_stride, 
-	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, 
+void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
+	const uint8_t *RGB, uint32_t RGB_stride,
+	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
 	YCbCrType yuv_type)
 {
 	#define LOAD_SI128 _mm_loadu_si128
 	#define SAVE_SI128 _mm_storeu_si128
 	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
-	
+
 	uint32_t xpos, ypos;
 	for(ypos=0; ypos<(height-1); ypos+=2)
 	{
 		const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
 			*rgb_ptr2=RGB+(ypos+1)*RGB_stride;
-		
+
 		uint8_t *y_ptr1=Y+ypos*Y_stride,
 			*y_ptr2=Y+(ypos+1)*Y_stride,
 			*u_ptr=U+(ypos/2)*UV_stride,
 			*v_ptr=V+(ypos/2)*UV_stride;
-		
+
 		for(xpos=0; xpos<(width-31); xpos+=32)
 		{
 			RGB2YUV_32
-			
+
 			rgb_ptr1+=96;
 			rgb_ptr2+=96;
 			y_ptr1+=32;
 			y_ptr2+=32;
-			u_ptr+=16; 
+			u_ptr+=16;
 			v_ptr+=16;
 		}
 	}
@@ -684,7 +684,7 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
 
 #endif //HAVE_SSE2_INTRINSICS
 
-#ifdef __loongarch_sx
+#if HAVE_LSX_INTRINSICS
 
 #define LSX_FUNCTION_NAME	yuv420_rgb24_lsx
 #define STD_FUNCTION_NAME	yuv420_rgb24_std
@@ -716,6 +716,6 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
 #define RGB_FORMAT			RGB_FORMAT_ABGR
 #include "yuv_rgb_lsx_func.h"
 
-#endif  //__loongarch_sx
+#endif  //HAVE_LSX_INTRINSICS
 
 #endif /* SDL_HAVE_YUV */