SDL_ttf: Revert "Let SDL handle the alignment of surface pixels"

From df64d84fd305312fa245e0676237dc2f5426badc Mon Sep 17 00:00:00 2001
From: Sam Lantinga <[EMAIL REDACTED]>
Date: Wed, 11 May 2022 10:16:50 -0700
Subject: [PATCH] Revert "Let SDL handle the alignment of surface pixels"

This reverts commit 591eab73f0577f0867580b8c361e4aadd4a11916.

SDL_ttf has additional alignment constraints that SDL's surface code doesn't account for:
https://github.com/libsdl-org/SDL_ttf/issues/209#issuecomment-1124025606
---
 SDL_ttf.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 149 insertions(+), 24 deletions(-)

diff --git a/SDL_ttf.c b/SDL_ttf.c
index bb026c1..8a4bb3d 100644
--- a/SDL_ttf.c
+++ b/SDL_ttf.c
@@ -123,6 +123,22 @@ int TTF_SetScript(int script) /* hb_script_t */
 #  define HAVE_NEON_INTRINSICS 1
 #endif
 
+/* Round glyph width to 8 bytes */
+#define HAVE_BLIT_GLYPH_64
+
+/* Android armeabi-v7a doesn't like int64 (Maybe all other __ARM_ARCH < 7 ?),
+ * un-activate it, especially if NEON isn't detected */
+#if defined(__ARM_ARCH)
+#  if __ARM_ARCH < 8
+#    if defined(HAVE_BLIT_GLYPH_64)
+#      undef HAVE_BLIT_GLYPH_64
+#    endif
+#  endif
+#endif
+
+/* Default: round glyph width to 4 bytes to copy them faster */
+#define HAVE_BLIT_GLYPH_32
+
 /* Use Duff's device to unroll loops */
 //#define USE_DUFFS_LOOP
 
@@ -578,6 +594,7 @@ static SDL_INLINE void BG_Blended(const TTF_Image *image, Uint32 *destination, S
     }
 }
 
+#if defined(HAVE_BLIT_GLYPH_32) || defined(HAVE_BLIT_GLYPH_64)
 static SDL_INLINE void BG_Blended_Opaque_32(const TTF_Image *image, Uint32 *destination, Sint32 srcskip, Uint32 dstskip)
 {
     const Uint8 *src    = image->buffer;
@@ -625,6 +642,7 @@ static SDL_INLINE void BG_Blended_32(const TTF_Image *image, Uint32 *destination
         dst  = (Uint32 *)((Uint8 *)dst + dstskip);
     }
 }
+#endif
 
 #if defined(HAVE_SSE2_INTRINSICS)
 /* Apply: alpha_table[i] = i << 24; */
@@ -886,6 +904,7 @@ static SDL_INLINE void BG(const TTF_Image *image, Uint8 *destination, Sint32 src
     }
 }
 
+#if defined(HAVE_BLIT_GLYPH_64)
 static SDL_INLINE void BG_64(const TTF_Image *image, Uint8 *destination, Sint32 srcskip, Uint32 dstskip)
 {
     const Uint64 *src    = (Uint64 *)image->buffer;
@@ -903,7 +922,7 @@ static SDL_INLINE void BG_64(const TTF_Image *image, Uint8 *destination, Sint32
         dst = (Uint64 *)((Uint8 *)dst + dstskip);
     }
 }
-
+#elif defined(HAVE_BLIT_GLYPH_32)
 static SDL_INLINE void BG_32(const TTF_Image *image, Uint8 *destination, Sint32 srcskip, Uint32 dstskip)
 {
     const Uint32 *src    = (Uint32 *)image->buffer;
@@ -921,6 +940,7 @@ static SDL_INLINE void BG_32(const TTF_Image *image, Uint8 *destination, Sint32
         dst = (Uint32 *)((Uint8 *)dst + dstskip);
     }
 }
+#endif
 
 #if defined(HAVE_SSE2_INTRINSICS)
 static SDL_INLINE void BG_SSE(const TTF_Image *image, Uint8 *destination, Sint32 srcskip, Uint32 dstskip)
@@ -1065,6 +1085,30 @@ static void clip_glyph(int *_x, int *_y, TTF_Image *image, const SDL_Surface *te
     *_y = y;
 }
 
+/* Glyph width is rounded, dst addresses are aligned, src addresses are not aligned */
+static int Get_Alignement()
+{
+#if defined(HAVE_NEON_INTRINSICS)
+    if (hasNEON()) {
+        return 16;
+    }
+#endif
+
+#if defined(HAVE_SSE2_INTRINSICS)
+    if (hasSSE2()) {
+        return 16;
+    }
+#endif
+
+#if defined(HAVE_BLIT_GLYPH_64)
+    return 8;
+#elif defined(HAVE_BLIT_GLYPH_32)
+    return 4;
+#else
+    return 1;
+#endif
+}
+
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-value"
@@ -1074,7 +1118,7 @@ static void clip_glyph(int *_x, int *_y, TTF_Image *image, const SDL_Surface *te
 static SDL_INLINE                                                                                                       \
 int Render_Line_##NAME(TTF_Font *font, SDL_Surface *textbuf, int xstart, int ystart, SDL_Color *fg)                     \
 {                                                                                                                       \
-    const int alignment = SDL_SIMDGetAlignment() - 1;                                                                         \
+    const int alignment = Get_Alignement() - 1;                                                                         \
     const int bpp = ((IS_BLENDED || IS_LCD) ? 4 : 1);                                                                   \
     unsigned int i;                                                                                                     \
     Uint8 fg_alpha = (fg ? fg->a : 0);                                                                                  \
@@ -1213,6 +1257,7 @@ BUILD_RENDER_LINE(NEON_LCD              , 0, 0, 1,    LCD, 0     ,
 BUILD_RENDER_LINE(NEON_LCD_SP           , 0, 0, 1,    LCD, SUBPIX,                       ,                ,            )
 #endif
 
+#if defined(HAVE_BLIT_GLYPH_64)
 BUILD_RENDER_LINE(64_Shaded             , 0, 0, 0, PIXMAP, 0     ,                       ,                , BG_64      )
 BUILD_RENDER_LINE(64_Blended            , 1, 0, 0,  COLOR, 0     ,                       , BG_Blended_32  ,            )
 BUILD_RENDER_LINE(64_Blended_Opaque     , 1, 1, 0,  COLOR, 0     , BG_Blended_Opaque_32  ,                ,            )
@@ -1222,7 +1267,7 @@ BUILD_RENDER_LINE(64_Blended_SP         , 1, 0, 0,  COLOR, SUBPIX,
 BUILD_RENDER_LINE(64_Blended_Opaque_SP  , 1, 1, 0,  COLOR, SUBPIX, BG_Blended_Opaque_32  ,                ,            )
 BUILD_RENDER_LINE(64_LCD                , 0, 0, 1,    LCD, 0     ,                       ,                ,            )
 BUILD_RENDER_LINE(64_LCD_SP             , 0, 0, 1,    LCD, SUBPIX,                       ,                ,            )
-
+#elif defined(HAVE_BLIT_GLYPH_32)
 BUILD_RENDER_LINE(32_Shaded             , 0, 0, 0, PIXMAP, 0     ,                       ,                , BG_32      )
 BUILD_RENDER_LINE(32_Blended            , 1, 0, 0,  COLOR, 0     ,                       , BG_Blended_32  ,            )
 BUILD_RENDER_LINE(32_Blended_Opaque     , 1, 1, 0,  COLOR, 0     , BG_Blended_Opaque_32  ,                ,            )
@@ -1232,6 +1277,18 @@ BUILD_RENDER_LINE(32_Blended_SP         , 1, 0, 0,  COLOR, SUBPIX,
 BUILD_RENDER_LINE(32_Blended_Opaque_SP  , 1, 1, 0,  COLOR, SUBPIX, BG_Blended_Opaque_32  ,                ,            )
 BUILD_RENDER_LINE(32_LCD                , 0, 0, 1,    LCD, 0     ,                       ,                ,            )
 BUILD_RENDER_LINE(32_LCD_SP             , 0, 0, 1,    LCD, SUBPIX,                       ,                ,            )
+#else
+BUILD_RENDER_LINE(8_Shaded              , 0, 0, 0, PIXMAP, 0     ,                       ,                , BG         )
+BUILD_RENDER_LINE(8_Blended             , 1, 0, 0,  COLOR, 0     ,                       , BG_Blended     ,            )
+BUILD_RENDER_LINE(8_Blended_Opaque      , 1, 1, 0,  COLOR, 0     , BG_Blended_Opaque     ,                ,            )
+BUILD_RENDER_LINE(8_Solid               , 0, 0, 0, BITMAP, 0     ,                       ,                , BG         )
+BUILD_RENDER_LINE(8_Shaded_SP           , 0, 0, 0, PIXMAP, SUBPIX,                       ,                , BG         )
+BUILD_RENDER_LINE(8_Blended_SP          , 1, 0, 0,  COLOR, SUBPIX,                       , BG_Blended     ,            )
+BUILD_RENDER_LINE(8_Blended_Opaque_SP   , 1, 1, 0,  COLOR, SUBPIX, BG_Blended_Opaque     ,                ,            )
+BUILD_RENDER_LINE(8_LCD                 , 0, 0, 1,    LCD, 0     ,                       ,                ,            )
+BUILD_RENDER_LINE(8_LCD_SP              , 0, 0, 1,    LCD, SUBPIX,                       ,                ,            )
+#endif
+
 
 #if TTF_USE_SDF
 static int (*Render_Line_SDF_Shaded)(TTF_Font *font, SDL_Surface *textbuf, int xstart, int ystart, SDL_Color *fg) = NULL;
@@ -1294,23 +1351,56 @@ static SDL_INLINE int Render_Line(const render_mode_t render_mode, int subpixel,
     }
 #endif
 
+#if defined(HAVE_NEON_INTRINSICS)
+    if (hasNEON()) {
+        Call_Specific_Render_Line(NEON)
+    }
+#endif
 #if defined(HAVE_SSE2_INTRINSICS)
     if (hasSSE2()) {
         Call_Specific_Render_Line(SSE)
     }
 #endif
-#if defined(HAVE_NEON_INTRINSICS)
-    if (hasNEON()) {
-        Call_Specific_Render_Line(NEON)
-    }
+#if defined(HAVE_BLIT_GLYPH_64)
+    Call_Specific_Render_Line(64)
+#elif defined(HAVE_BLIT_GLYPH_32)
+    Call_Specific_Render_Line(32)
+#else
+    Call_Specific_Render_Line(8)
 #endif
-    if (sizeof(void*) >= 8) {
-        Call_Specific_Render_Line(64)
-    } else {
-        Call_Specific_Render_Line(32)
+}
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#if !SDL_VERSION_ATLEAST(2, 23, 1)
+SDL_FORCE_INLINE int compat_size_add_overflow (size_t a,
+                                               size_t b,
+                                               size_t *ret)
+{
+    if (b > SIZE_MAX - a) {
+        return -1;
     }
+    *ret = a + b;
+    return 0;
+}
+
+SDL_FORCE_INLINE int compat_size_mul_overflow (size_t a,
+                                               size_t b,
+                                               size_t *ret)
+{
+    if (a != 0 && b > SIZE_MAX / a) {
+        return -1;
+    }
+    *ret = a * b;
+    return 0;
 }
 
+#define SDL_size_add_overflow(a, b, r) compat_size_add_overflow(a, b, r)
+#define SDL_size_mul_overflow(a, b, r) compat_size_mul_overflow(a, b, r)
+#endif /* SDL < 2.23.1 */
+
 /* Create a surface with memory:
  * - pitch is rounded to alignment
  * - address is aligned
@@ -1323,23 +1413,58 @@ static SDL_INLINE int Render_Line(const render_mode_t render_mode, int subpixel,
  */
 static SDL_Surface *AllocateAlignedPixels(size_t width, size_t height, SDL_PixelFormatEnum format, Uint32 bgcolor)
 {
+    const size_t alignment = Get_Alignement() - 1;
+    const size_t bytes_per_pixel = SDL_BYTESPERPIXEL(format);
     SDL_Surface *textbuf = NULL;
+    size_t size;
+    size_t data_bytes;
+    void *pixels, *ptr;
+    size_t pitch;
+
+    /* Worst case at the end of line pulling 'alignment' extra blank pixels */
+    if (width > SDL_MAX_SINT32 ||
+        height > SDL_MAX_SINT32 ||
+        SDL_size_add_overflow(width, alignment, &pitch) ||
+        SDL_size_mul_overflow(pitch, bytes_per_pixel, &pitch) ||
+        SDL_size_add_overflow(pitch, alignment, &pitch) ||
+        pitch > SDL_MAX_SINT32) {
+        return NULL;
+    }
+    pitch &= ~alignment;
 
-    if (width > SDL_MAX_SINT32 || height > SDL_MAX_SINT32) {
-        SDL_OutOfMemory();
+    if (SDL_size_mul_overflow(height, pitch, &data_bytes) ||
+        SDL_size_add_overflow(data_bytes, sizeof (void *) + alignment, &size) ||
+        size > SDL_MAX_SINT32) {
+        /* Overflow... */
         return NULL;
     }
 
-    textbuf = SDL_CreateRGBSurfaceWithFormat(0, (int)width, (int)height, 0, format);
-    if (textbuf) {
-        size_t data_bytes = (size_t)textbuf->h * textbuf->pitch;
-        if (SDL_BYTESPERPIXEL(format) == 4) {
-            SDL_memset4(textbuf->pixels, bgcolor, data_bytes / 4);
-        }
-        else {
-            SDL_memset(textbuf->pixels, (bgcolor & 0xff), data_bytes);
-        }
+    ptr = SDL_malloc(size);
+    if (ptr == NULL) {
+        return NULL;
+    }
+
+    /* address is aligned */
+    pixels = (void *)(((uintptr_t)ptr + sizeof(void *) + alignment) & ~alignment);
+    ((void **)pixels)[-1] = ptr;
+
+    textbuf = SDL_CreateRGBSurfaceWithFormatFrom(pixels, (int)width, (int)height, 0, (int)pitch, format);
+    if (textbuf == NULL) {
+        SDL_free(ptr);
+        return NULL;
+    }
+
+    /* Let SDL handle the memory allocation */
+    textbuf->flags &= ~SDL_PREALLOC;
+    textbuf->flags |= SDL_SIMD_ALIGNED;
+
+    if (bytes_per_pixel == 4) {
+        SDL_memset4(pixels, bgcolor, data_bytes / 4);
     }
+    else {
+        SDL_memset(pixels, (bgcolor & 0xff), data_bytes);
+    }
+
     return textbuf;
 }
 
@@ -1545,7 +1670,7 @@ int TTF_Init(void)
     compil_neon = 1;
 #  endif
     SDL_Log("SDL_ttf: hasSSE2=%d hasNEON=%d alignment=%d duffs_loop=%d compil_sse2=%d compil_neon=%d",
-            sse2, neon, SDL_SIMDGetAlignment(), duffs, compil_sse2, compil_neon);
+            sse2, neon, Get_Alignement(), duffs, compil_sse2, compil_neon);
 
     SDL_Log("Sizeof TTF_Image: %d c_glyph: %d TTF_Font: %d", sizeof (TTF_Image), sizeof (c_glyph), sizeof (TTF_Font));
 #endif
@@ -1957,7 +2082,7 @@ static void Flush_Cache(TTF_Font *font)
 
 static FT_Error Load_Glyph(TTF_Font *font, c_glyph *cached, int want, int translation)
 {
-    const int alignment = SDL_SIMDGetAlignment() - 1;
+    const int alignment = Get_Alignement() - 1;
     FT_GlyphSlot slot;
     FT_Error error;