SDL: Fixed bug #4841 - Out of bounds read (by 1 byte) in yuvnv12_rgb24_sseu

From 50f969c1b2534ea655ea052509bba3cba2efcab6 Mon Sep 17 00:00:00 2001
From: Sylvain <[EMAIL REDACTED]>
Date: Sun, 17 Oct 2021 22:02:19 +0200
Subject: [PATCH] Fixed bug #4841 - Out of bounds read (by 1 byte) in
 yuvnv12_rgb24_sseu

---
 src/video/yuv2rgb/yuv_rgb_sse_func.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/video/yuv2rgb/yuv_rgb_sse_func.h b/src/video/yuv2rgb/yuv_rgb_sse_func.h
index f81140e18e..3c5ee0db4d 100644
--- a/src/video/yuv2rgb/yuv_rgb_sse_func.h
+++ b/src/video/yuv2rgb/yuv_rgb_sse_func.h
@@ -415,6 +415,17 @@ void SSE_FUNCTION_NAME(uint32_t width, uint32_t height,
 #error Unknown RGB pixel size
 #endif
 
+#if YUV_FORMAT == YUV_FORMAT_NV12
+	/* For NV12 formats (where U/V are interleaved)
+	 * SSE READ_UV does an invalid read access at the very last pixel.
+	 * As a workaround. Make sure not to decode the last column using assembly but with STD fallback path.
+	 * see https://github.com/libsdl-org/SDL/issues/4841
+	 */
+	const int fix_read_nv12 = ((width & 31) == 0);
+#else
+	const int fix_read_nv12 = 0;
+#endif
+
 	if (width >= 32) {
 		uint32_t xpos, ypos;
 		for(ypos=0; ypos<(height-(uv_y_sample_interval-1)); ypos+=uv_y_sample_interval)
@@ -427,7 +438,7 @@ void SSE_FUNCTION_NAME(uint32_t width, uint32_t height,
 			uint8_t *rgb_ptr1=RGB+ypos*RGB_stride,
 				*rgb_ptr2=RGB+(ypos+1)*RGB_stride;
 			
-			for(xpos=0; xpos<(width-31); xpos+=32)
+			for(xpos=0; xpos<(width-31) - fix_read_nv12; xpos+=32)
 			{
 				YUV2RGB_32
 				{
@@ -464,6 +475,9 @@ void SSE_FUNCTION_NAME(uint32_t width, uint32_t height,
 	/* Catch the right column, if needed */
 	{
 		int converted = (width & ~31);
+		if (fix_read_nv12) {
+			converted -= 32;
+		}
 		if (converted != width)
 		{
 			const uint8_t *y_ptr=Y+converted*y_pixel_stride,