SDL: Remove ARM32 assembly/pixman blitters

From 0f351cd6af832663ebd00e20719010d15c853417 Mon Sep 17 00:00:00 2001
From: Isaac Aronson <[EMAIL REDACTED]>
Date: Wed, 13 Sep 2023 10:10:14 -0500
Subject: [PATCH] Remove ARM32 assembly/pixman blitters

---
 CMakeLists.txt                      |   62 --
 cmake/3rdparty.cmake                |    4 -
 src/video/SDL_blit.h                |    6 -
 src/video/SDL_blit_A.c              |   88 +-
 src/video/SDL_fillrect.c            |   83 +-
 src/video/arm/pixman-arm-asm.h      |   36 -
 src/video/arm/pixman-arm-neon-asm.S |  375 ---------
 src/video/arm/pixman-arm-neon-asm.h | 1184 ---------------------------
 src/video/arm/pixman-arm-simd-asm.S |  532 ------------
 src/video/arm/pixman-arm-simd-asm.h | 1034 -----------------------
 10 files changed, 3 insertions(+), 3401 deletions(-)
 delete mode 100644 src/video/arm/pixman-arm-asm.h
 delete mode 100644 src/video/arm/pixman-arm-neon-asm.S
 delete mode 100644 src/video/arm/pixman-arm-neon-asm.h
 delete mode 100644 src/video/arm/pixman-arm-simd-asm.S
 delete mode 100644 src/video/arm/pixman-arm-simd-asm.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a40bc609a4c14..14be990895425 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -292,7 +292,6 @@ dep_option(SDL_MMX                 "Use MMX assembly routines" ON "SDL_ASSEMBLY;
 dep_option(SDL_ALTIVEC             "Use Altivec assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_POWERPC32 OR SDL_CPU_POWERPC64" OFF)
 dep_option(SDL_ARMSIMD             "Use SIMD assembly blitters on ARM" OFF "SDL_ASSEMBLY;SDL_CPU_ARM32" OFF)
 dep_option(SDL_ARMNEON             "Use NEON assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_ARM32 OR SDL_CPU_ARM64" OFF)
-dep_option(SDL_ARMNEON_BLITTERS    "Use NEON assembly blitters on ARM32" OFF "SDL_VIDEO;SDL_ASSEMBLY;SDL_ARMNEON;SDL_CPU_ARM32" OFF)
 dep_option(SDL_LSX                 "Use LSX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
 dep_option(SDL_LASX                "Use LASX assembly routines" ON "SDL_ASSEMBLY;SDL_CPU_LOONGARCH64" OFF)
 
@@ -883,67 +882,6 @@ if(SDL_ASSEMBLY)
       endif()
     endif()
 
-    if(SDL_ARMSIMD)
-      cmake_push_check_state()
-      string(APPEND CMAKE_REQUIRED_FLAGS " -x assembler-with-cpp")
-      list(APPEND CMAKE_REQUIRED_LINK_OPTIONS -x none)
-      check_c_source_compiles("
-        .text
-        .arch armv6
-        .object_arch armv4
-        .arm
-        .altmacro
-        #ifndef __ARM_EABI__
-        #error EABI is required (to be sure that calling conventions are compatible)
-        #endif
-        main:
-        .global main
-        pld [r0]
-        uqadd8 r0, r0, r0
-      " ARMSIMD_FOUND)
-      cmake_pop_check_state()
-
-      if(ARMSIMD_FOUND)
-        set(HAVE_ARMSIMD TRUE)
-        set(SDL_ARM_SIMD_BLITTERS 1)
-        enable_language(ASM)
-        sdl_glob_sources("${SDL3_SOURCE_DIR}/src/video/arm/pixman-arm-simd*.S")
-        set_property(SOURCE ${ARMSIMD_SOURCES} APPEND PROPERTY COMPILE_OPTIONS -x assembler-with-cpp)
-        set(WARN_ABOUT_ARM_SIMD_ASM_MIT TRUE)
-      endif()
-    endif()
-
-    if(SDL_ARMNEON_BLITTERS)
-      cmake_push_check_state()
-      string(APPEND CMAKE_REQUIRED_FLAGS " -x assembler-with-cpp")
-      list(APPEND CMAKE_REQUIRED_LINK_OPTIONS -x none)
-      check_c_source_compiles("
-        .text
-        .fpu neon
-        .arch armv7a
-        .object_arch armv4
-        .eabi_attribute 10, 0
-        .arm
-        .altmacro
-        #ifndef __ARM_EABI__
-        #error EABI is required (to be sure that calling conventions are compatible)
-        #endif
-        main:
-        .global main
-        pld [r0]
-        vmovn.u16 d0, q0
-      " COMPILER_SUPPORTS_ARMNEON_ASSEMBLY)
-      cmake_pop_check_state()
-      if(COMPILER_SUPPORTS_ARMNEON_ASSEMBLY)
-        set(HAVE_ARMNEON_BLITTERS TRUE)
-        set(SDL_ARM_NEON_BLITTERS 1)
-        enable_language(ASM)
-        sdl_glob_sources("${SDL3_SOURCE_DIR}/src/video/arm/pixman-arm-neon*.S")
-        set_property(SOURCE ${ARMNEON_SOURCES} APPEND PROPERTY COMPILE_OPTIONS -x assembler-with-cpp)
-        set(WARN_ABOUT_ARM_NEON_ASM_MIT TRUE)
-      endif()
-    endif()
-
     if(SDL_ARMNEON)
       check_c_source_compiles("
         #include <arm_neon.h>
diff --git a/cmake/3rdparty.cmake b/cmake/3rdparty.cmake
index becb6d3abebe3..8b38a5ded6b6c 100644
--- a/cmake/3rdparty.cmake
+++ b/cmake/3rdparty.cmake
@@ -25,10 +25,6 @@ function(get_clang_tidy_ignored_files OUTVAR)
       # HIDAPI Steam controller
       "controller_constants.h"
       "controller_structs.h"
-      # Nokia Pixman
-      "pixman-arm-asm.h"
-      "pixman-arm-neon-asm.h"
-      "pixman-arm-simd-asm.h"
       # YUV2RGB
       "yuv_rgb.c"
       "yuv_rgb_lsx_func.h"
diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h
index ba5b95904e3a1..90588153adbe7 100644
--- a/src/video/SDL_blit.h
+++ b/src/video/SDL_blit.h
@@ -23,12 +23,6 @@
 #ifndef SDL_blit_h_
 #define SDL_blit_h_
 
-/* pixman ARM blitters are 32 bit only : */
-#if defined(__aarch64__) || defined(_M_ARM64)
-#undef SDL_ARM_SIMD_BLITTERS
-#undef SDL_ARM_NEON_BLITTERS
-#endif
-
 /* Table to do pixel byte expansion */
 extern const Uint8 *SDL_expand_byte[9];
 extern const Uint16 SDL_expand_byte_10[];
diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c
index ed3c941e04c46..7f272fd0905cc 100644
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -421,66 +421,6 @@ static void SDL_TARGETING("mmx") BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
 
 #endif /* SDL_MMX_INTRINSICS */
 
-#ifdef SDL_ARM_SIMD_BLITTERS
-void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
-
-static void BlitARGBto565PixelAlphaARMSIMD(SDL_BlitInfo *info)
-{
-    int32_t width = info->dst_w;
-    int32_t height = info->dst_h;
-    uint16_t *dstp = (uint16_t *)info->dst;
-    int32_t dststride = width + (info->dst_skip >> 1);
-    uint32_t *srcp = (uint32_t *)info->src;
-    int32_t srcstride = width + (info->src_skip >> 2);
-
-    BlitARGBto565PixelAlphaARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
-}
-
-void BlitRGBtoRGBPixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
-
-static void BlitRGBtoRGBPixelAlphaARMSIMD(SDL_BlitInfo *info)
-{
-    int32_t width = info->dst_w;
-    int32_t height = info->dst_h;
-    uint32_t *dstp = (uint32_t *)info->dst;
-    int32_t dststride = width + (info->dst_skip >> 2);
-    uint32_t *srcp = (uint32_t *)info->src;
-    int32_t srcstride = width + (info->src_skip >> 2);
-
-    BlitRGBtoRGBPixelAlphaARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride);
-}
-#endif
-
-#ifdef SDL_ARM_NEON_BLITTERS
-void BlitARGBto565PixelAlphaARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
-
-static void BlitARGBto565PixelAlphaARMNEON(SDL_BlitInfo *info)
-{
-    int32_t width = info->dst_w;
-    int32_t height = info->dst_h;
-    uint16_t *dstp = (uint16_t *)info->dst;
-    int32_t dststride = width + (info->dst_skip >> 1);
-    uint32_t *srcp = (uint32_t *)info->src;
-    int32_t srcstride = width + (info->src_skip >> 2);
-
-    BlitARGBto565PixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
-}
-
-void BlitRGBtoRGBPixelAlphaARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
-
-static void BlitRGBtoRGBPixelAlphaARMNEON(SDL_BlitInfo *info)
-{
-    int32_t width = info->dst_w;
-    int32_t height = info->dst_h;
-    uint32_t *dstp = (uint32_t *)info->dst;
-    int32_t dststride = width + (info->dst_skip >> 2);
-    uint32_t *srcp = (uint32_t *)info->src;
-    int32_t srcstride = width + (info->src_skip >> 2);
-
-    BlitRGBtoRGBPixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
-}
-#endif
-
 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
 static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
 {
@@ -1274,21 +1214,7 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
             }
 
         case 2:
-#if defined(SDL_ARM_NEON_BLITTERS) || defined(SDL_ARM_SIMD_BLITTERS)
-            if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && df->Gmask == 0x7e0 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
-#ifdef SDL_ARM_NEON_BLITTERS
-                if (SDL_HasNEON()) {
-                    return BlitARGBto565PixelAlphaARMNEON;
-                }
-#endif
-#ifdef SDL_ARM_SIMD_BLITTERS
-                if (SDL_HasARMSIMD()) {
-                    return BlitARGBto565PixelAlphaARMSIMD;
-                }
-#endif
-            }
-#endif
-            if (sf->bytes_per_pixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
+            if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 && sf->Gmask == 0xff00 && ((sf->Rmask == 0xff && df->Rmask == 0x1f) || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
                 if (df->Gmask == 0x7e0) {
                     return BlitARGBto565PixelAlpha;
                 } else if (df->Gmask == 0x3e0) {
@@ -1311,18 +1237,6 @@ SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
                     }
                 }
 #endif /* SDL_MMX_INTRINSICS */
-                if (sf->Amask == 0xff000000) {
-#ifdef SDL_ARM_NEON_BLITTERS
-                    if (SDL_HasNEON()) {
-                        return BlitRGBtoRGBPixelAlphaARMNEON;
-                    }
-#endif
-#ifdef SDL_ARM_SIMD_BLITTERS
-                    if (SDL_HasARMSIMD()) {
-                        return BlitRGBtoRGBPixelAlphaARMSIMD;
-                    }
-#endif
-                }
             }
             return BlitNtoNPixelAlpha;
 
diff --git a/src/video/SDL_fillrect.c b/src/video/SDL_fillrect.c
index e38d7f73d90b7..04aa925fbeaf7 100644
--- a/src/video/SDL_fillrect.c
+++ b/src/video/SDL_fillrect.c
@@ -247,54 +247,6 @@ int SDL_FillSurfaceRect(SDL_Surface *dst, const SDL_Rect *rect, Uint32 color)
     return SDL_FillSurfaceRects(dst, rect, 1, color);
 }
 
-#ifdef SDL_ARM_NEON_BLITTERS
-void FillSurfaceRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
-void FillSurfaceRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
-void FillSurfaceRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
-
-static void fill_8_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
-{
-    FillSurfaceRect8ARMNEONAsm(w, h, (uint8_t *)pixels, pitch >> 0, color);
-    return;
-}
-
-static void fill_16_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
-{
-    FillSurfaceRect16ARMNEONAsm(w, h, (uint16_t *)pixels, pitch >> 1, color);
-    return;
-}
-
-static void fill_32_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
-{
-    FillSurfaceRect32ARMNEONAsm(w, h, (uint32_t *)pixels, pitch >> 2, color);
-    return;
-}
-#endif
-
-#ifdef SDL_ARM_SIMD_BLITTERS
-void FillSurfaceRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
-void FillSurfaceRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
-void FillSurfaceRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
-
-static void fill_8_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
-{
-    FillSurfaceRect8ARMSIMDAsm(w, h, (uint8_t *)pixels, pitch >> 0, color);
-    return;
-}
-
-static void fill_16_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
-{
-    FillSurfaceRect16ARMSIMDAsm(w, h, (uint16_t *)pixels, pitch >> 1, color);
-    return;
-}
-
-static void fill_32_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
-{
-    FillSurfaceRect32ARMSIMDAsm(w, h, (uint32_t *)pixels, pitch >> 2, color);
-    return;
-}
-#endif
-
 int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
                   Uint32 color)
 {
@@ -339,39 +291,8 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
         return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format");
     }
 
-#ifdef SDL_ARM_NEON_BLITTERS
-    if (SDL_HasNEON() && dst->format->bytes_per_pixel != 3 && !fill_function) {
-        switch (dst->format->bytes_per_pixel) {
-        case 1:
-            fill_function = fill_8_neon;
-            break;
-        case 2:
-            fill_function = fill_16_neon;
-            break;
-        case 4:
-            fill_function = fill_32_neon;
-            break;
-        }
-    }
-#endif
-#ifdef SDL_ARM_SIMD_BLITTERS
-    if (SDL_HasARMSIMD() && dst->format->bytes_per_pixel != 3 && !fill_function) {
-        switch (dst->format->bytes_per_pixel) {
-        case 1:
-            fill_function = fill_8_simd;
-            break;
-        case 2:
-            fill_function = fill_16_simd;
-            break;
-        case 4:
-            fill_function = fill_32_simd;
-            break;
-        }
-    }
-#endif
-
-    if (!fill_function) {
-        switch (dst->format->bytes_per_pixel) {
+    if (fill_function == NULL) {
+        switch (dst->format->BytesPerPixel) {
         case 1:
         {
             color |= (color << 8);
diff --git a/src/video/arm/pixman-arm-asm.h b/src/video/arm/pixman-arm-asm.h
deleted file mode 100644
index 3f13ba0495da5..0000000000000
--- a/src/video/arm/pixman-arm-asm.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright © 2010 Nokia Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- *
- */
-
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
-	.func fname
-	.global fname
-#ifdef __ELF__
-	.hidden fname
-	.type fname, %function
-#endif
-fname:
-.endm
diff --git a/src/video/arm/pixman-arm-neon-asm.S b/src/video/arm/pixman-arm-neon-asm.S
deleted file mode 100644
index f9549621acbd2..0000000000000
--- a/src/video/arm/pixman-arm-neon-asm.S
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * Copyright (c) 2018 RISC OS Open Ltd
- *
- * This software is provided 'as-is', without any express or implied
- * warranty.  In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- *    claim that you wrote the original software. If you use this software
- *    in a product, an acknowledgment in the product documentation would be
- *    appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- *    misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
-    .text
-    .fpu neon
-    .arch armv7a
-    .object_arch armv4
-    .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
-    .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
-    .arm
-    .altmacro
-    .p2align 2
-
-#include "pixman-arm-asm.h"
-#include "pixman-arm-neon-asm.h"
-
-/* Global configuration options and preferences */
-
-/*
- * The code can optionally make use of unaligned memory accesses to improve
- * performance of handling leading/trailing pixels for each scanline.
- * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
- * example in linux if unaligned memory accesses are not configured to
- * generate.exceptions.
- */
-.set RESPECT_STRICT_ALIGNMENT, 1
-
-/*
- * Set default prefetch type. There is a choice between the following options:
- *
- * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
- * as NOP to workaround some HW bugs or for whatever other reason)
- *
- * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
- * advanced prefetch introduces heavy overhead)
- *
- * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
- * which can run ARM and NEON instructions simultaneously so that extra ARM
- * instructions do not add (many) extra cycles, but improve prefetch efficiency)
- *
- * Note: some types of function can't support advanced prefetch and fallback
- *       to simple one (those which handle 24bpp pixels)
- */
-.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
-
-/* Prefetch distance in pixels for simple prefetch */
-.set PREFETCH_DISTANCE_SIMPLE, 64
-
-/******************************************************************************/
-
-/* We can actually do significantly better than the Pixman macros, at least for
- * the case of fills, by using a carefully scheduled inner loop. Cortex-A53
- * shows an improvement of up to 78% in ideal cases (large fills to L1 cache).
- */
-
-.macro generate_fillrect_function name, bpp, log2Bpp
-/*
- * void name(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
- * On entry:
- * a1 = width, pixels
- * a2 = height, rows
- * a3 = pointer to top-left destination pixel
- * a4 = stride, pixels
- * [sp] = pixel value to fill with
- * Within the function:
- * v1 = width remaining
- * v2 = vst offset
- * v3 = alternate pointer
- * ip = data ARM register
- */
-pixman_asm_function name
-    vld1.\bpp   {d0[],d1[]}, [sp]
-    sub         a4, a1
-    vld1.\bpp   {d2[],d3[]}, [sp]
-    cmp         a1, #(15+64) >> \log2Bpp
-    push        {v1-v3,lr}
-    vmov        ip, s0
-    blo         51f
-
-    /* Long-row case */
-    mov         v2, #64
-1:  mov         v1, a1
-    ands        v3, a3, #15
-    beq         2f
-    /* Leading pixels */
-    rsb         v3, v3, #16  /* number of leading bytes until 16-byte aligned */
-    sub         v1, v1, v3, lsr #\log2Bpp
-    rbit        v3, v3
-.if bpp <= 16
-.if bpp == 8
-    tst         a3, #1       /* bit 0 unaffected by rsb so can avoid register interlock */
-    strneb      ip, [a3], #1
-    tst         v3, #1<<30
-.else
-    tst         a3, #2       /* bit 1 unaffected by rsb (assuming halfword alignment) so can avoid register interlock */
-.endif
-    strneh      ip, [a3], #2
-.endif
-    movs        v3, v3, lsl #3
-    vstmcs      a3!, {s0}
-    vstmmi      a3!, {d0}
-2:  sub         v1, v1, #64 >> \log2Bpp /* simplifies inner loop termination */
-    add         v3, a3, #32
-    /* Inner loop */
-3:  vst1.\bpp   {q0-q1}, [a3 :128], v2
-    subs        v1, v1, #64 >> \log2Bpp
-    vst1.\bpp   {q0-q1}, [v3 :128], v2
-    bhs         3b
-    /* Trailing pixels */
-4:  movs        v1, v1, lsl #27 + \log2Bpp
-    bcc         5f
-    vst1.\bpp   {q0-q1}, [a3 :128]!
-5:  bpl         6f
-    vst1.\bpp   {q0}, [a3 :128]!
-6:  movs        v1, v1, lsl #2
-    vstmcs      a3!, {d0}
-    vstmmi      a3!, {s0}
-.if bpp <= 16
-    movs        v1, v1, lsl #2
-    strcsh      ip, [a3], #2
-.if bpp == 8
-    strmib      ip, [a3], #1
-.endif
-.endif
-    subs        a2, a2, #1
-    add         a3, a3, a4, lsl #\log2Bpp
-    bhi         1b
-    pop         {v1-v3,pc}
-
-    /* Short-row case */
-51: movs        v1, a1
-.if bpp == 8
-    tst         a3, #3
-    beq         53f
-52: subs        v1, v1, #1
-    blo         57f
-    strb        ip, [a3], #1
-    tst         a3, #3
-    bne         52b
-.elseif bpp == 16
-    tstne       a3, #2
-    subne       v1, v1, #1
-    strneh      ip, [a3], #2
-.endif
-53: cmp         v1, #32 >> \log2Bpp
-    bcc         54f
-    vst1.\bpp   {q0-q1}, [a3]!
-    sub         v1, v1, #32 >> \log2Bpp
-    /* Trailing pixels */
-54: movs        v1, v1, lsl #27 + \log2Bpp
-    bcc         55f
-    vst1.\bpp   {q0-q1}, [a3]!
-55: bpl         56f
-    vst1.\bpp   {q0}, [a3]!
-56: movs        v1, v1, lsl #2
-    vstmcs      a3!, {d0}
-    vstmmi      a3!, {s0}
-.if bpp <= 16
-    movs        v1, v1, lsl #2
-    strcsh      ip, [a3], #2
-.if bpp == 8
-    strmib      ip, [a3], #1
-.endif
-.endif
-    subs        a2, a2, #1
-    add         a3, a3, a4, lsl #\log2Bpp
-    bhi         51b
-57: pop         {v1-v3,pc}
-
-.endfunc
-.endm
-
-generate_fillrect_function FillSurfaceRect32ARMNEONAsm, 32, 2
-generate_fillrect_function FillSurfaceRect16ARMNEONAsm, 16, 1
-generate_fillrect_function FillSurfaceRect8ARMNEONAsm,  8,  0
-
-/******************************************************************************/
-
-.macro RGBtoRGBPixelAlpha_process_pixblock_head
-    vmvn        d30, d3  /* get inverted source alpha */
-    vmov        d31, d7  /* dest alpha is always unchanged */
-    vmull.u8    q14, d0, d3
-    vmlal.u8    q14, d4, d30
-    vmull.u8    q0, d1, d3
-    vmlal.u8    q0, d5, d30
-    vmull.u8    q1, d2, d3
-    vmlal.u8    q1, d6, d30
-    vrshr.u16   q2, q14, #8
-    vrshr.u16   q3, q0, #8
-    vraddhn.u16 d28, q14, q2
-    vrshr.u16   q2, q1, #8
-    vraddhn.u16 d29, q0, q3
-    vraddhn.u16 d30, q1, q2
-.endm
-
-.macro RGBtoRGBPixelAlpha_process_pixblock_tail
-    /* nothing */
-.endm
-
-.macro RGBtoRGBPixelAlpha_process_pixblock_tail_head
-    vld4.8      {d0-d3}, [SRC]!
-                                    PF add PF_X, PF_X, #8
-        vst4.8      {d28-d31}, [DST_W :128]!
-                                    PF tst PF_CTL, #0xF
-    vld4.8      {d4-d7}, [DST_R :128]!
-                                    PF addne PF_X, PF_X, #8
-    vmvn        d30, d3  /* get inverted source alpha */
-    vmov        d31, d7  /* dest alpha is always unchanged */
-    vmull.u8    q14, d0, d3
-                                    PF subne PF_CTL, PF_CTL, #1
-    vmlal.u8    q14, d4, d30
-                                    PF cmp PF_X, ORIG_W
-    vmull.u8    q0, d1, d3
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-    vmlal.u8    q0, d5, d30
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-    vmull.u8    q1, d2, d3
-                                    PF subge PF_X, PF_X, ORIG_W
-    vmlal.u8    q1, d6, d30
-                                    PF subges PF_CTL, PF_CTL, #0x10
-    vrshr.u16   q2, q14, #8
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-    vrshr.u16   q3, q0, #8
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vraddhn.u16 d28, q14, q2
-    vrshr.u16   q2, q1, #8
-    vraddhn.u16 d29, q0, q3
-    vraddhn.u16 d30, q1, q2
-.endm
-
-generate_composite_function \
-    BlitRGBtoRGBPixelAlphaARMNEONAsm, 32, 0, 32, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    5, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    RGBtoRGBPixelAlpha_process_pixblock_head, \
-    RGBtoRGBPixelAlpha_process_pixblock_tail, \
-    RGBtoRGBPixelAlpha_process_pixblock_tail_head
-
- /******************************************************************************/
-
-.macro ARGBto565PixelAlpha_process_pixblock_head
-    vmvn        d6, d3
-    vshr.u8     d1, #2
-    vshr.u8     d3, #3
-    vshr.u8     d0, #3
-    vshrn.u16   d7, q2, #3
-    vshrn.u16   d25, q2, #8
-    vbic.i16    q2, #0xe0
-    vshr.u8     d6, #3
-    vshr.u8     d7, #2
-    vshr.u8     d2, #3
-    vmovn.u16   d24, q2
-    vshr.u8     d25, #3
-    vmull.u8    q13, d1, d3
-    vmlal.u8    q13, d7, d6
-    vmull.u8    q14, d0, d3
-    vmlal.u8    q14, d24, d6
-    vmull.u8    q15, d2, d3
-    vmlal.u8    q15, d25, d6
-.endm
-
-.macro ARGBto565PixelAlpha_process_pixblock_tail
-    vsra.u16    q13, #5
-    vsra.u16    q14, #5
-    vsra.u16    q15, #5
-    vrshr.u16   q13, #5
-    vrshr.u16   q14, #5
-    vrshr.u16   q15, #5
-    vsli.u16    q14, q13, #5
-    vsli.u16    q14, q15, #11
-.endm
-
-.macro ARGBto565PixelAlpha_process_pixblock_tail_head
-    vld4.8      {d0-d3}, [SRC]!
-                                    PF add PF_X, PF_X, #8
-        vsra.u16    q13, #5
-                                    PF tst PF_CTL, #0xF
-        vsra.u16    q14, #5
-                                    PF addne PF_X, PF_X, #8
-        vsra.u16    q15, #5
-                                    PF subne PF_CTL, PF_CTL, #1
-        vrshr.u16   q13, #5
-                                    PF cmp PF_X, ORIG_W
-        vrshr.u16   q14, #5
-                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-        vrshr.u16   q15, #5
-                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-    vld1.8      {d4-d5}, [DST_R]!
-                                    PF subge PF_X, PF_X, ORIG_W
-        vsli.u16    q14, q13, #5
-                                    PF subges PF_CTL, PF_CTL, #0x10
-        vsli.u16    q14, q15, #11
-                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-        vst1.8      {q14}, [DST_W :128]!
-    vmvn        d6, d3
-    vshr.u8     d1, #2
-    vshr.u8     d3, #3
-    vshr.u8     d0, #3
-    vshrn.u16   d7, q2, #3
-    vshrn.u16   d25, q2, #8
-    vbic.i16    q2, #0xe0
-                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-    vshr.u8     d6, #3
-    vshr.u8     d7, #2
-    vshr.u8     d2, #3
-    vmovn.u16   d24, q2
-    vshr.u8     d25, #3
-    vmull.u8    q13, d1, d3
-    vmlal.u8    q13, d7, d6
-    vmull.u8    q14, d0, d3
-    vmlal.u8    q14, d24, d6
-    vmull.u8    q15, d2, d3
-    vmlal.u8    q15, d25, d6
-.endm
-
-generate_composite_function \
-    BlitARGBto565PixelAlphaARMNEONAsm, 32, 0, 16, \
-    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
-    8, /* number of pixels, processed in a single block */ \
-    6, /* prefetch distance */ \
-    default_init, \
-    default_cleanup, \
-    ARGBto565PixelAlpha_process_pixblock_head, \
-    ARGBto565PixelAlpha_process_pixblock_tail, \
-    ARGBto565PixelAlpha_process_pixblock_tail_head
diff --git a/src/video/arm/pixman-arm-neon-asm.h b/src/video/arm/pixman-arm-neon-asm.h
deleted file mode 100644
index bdcf6a9d47f30..0000000000000
--- a/src/video/arm/pixman-arm-neon-asm.h
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains a macro ('generate_composite_function') which can
- * construct 2D image processing functions, based on a common template.
- * Any combinations of source, destination and mask images with 8bpp,
- * 16bpp, 24bpp, 32bpp color formats are supported.
- *
- * This macro takes care of:
- *  - handling of leading and trailing unaligned pixels
- *  - doing most of the work related to L2 cache preload
- *  - encourages the use of software pipelining for better instructions
- *    scheduling
- *
- * The user of this macro has to provide some configuration parameters
- * (bit depths for the images, prefetch distance, etc.) and a set of
- * macros, which should implement basic code chunks responsible for
- * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
- * examples.
- *
- * TODO:
- *  - try overlapped pixel method (from Ian Rickards) when processing
- *    exactly two blocks of pixels
- *  - maybe add an option to do reverse scanline processing
- */
-
-/*
- * Bit flags for 'generate_composite_function' macro which are used
- * to tune generated functions behavior.
- */
-.set FLAG_DST_WRITEONLY,       0
-.set FLAG_DST_READWRITE,       1
-.set FLAG_DEINTERLEAVE_32BPP,  2
-
-/*
- * Offset in stack where mask and source pointer/stride can be accessed
- * from 'init' macro. This is useful for doing special handling for solid mask.
- */
-.set ARGS_STACK_OFFSET,        40
-
-/*
- * Constants for selecting preferable prefetch type.
- */
-.set PREFETCH_TYPE_NONE,       0 /* No prefetch at all */
-.set PREFETCH_TYPE_SIMPLE,     1 /* A simple, fixed-distance-ahead prefetch */
-.set PREFETCH_TYPE_ADVANCED,   2 /* Advanced fine-grained prefetch */
-
-/*
- * Definitions of supplementary pixld/pixst macros (for partial load/store of
- * pixel data).
- */
-
-.m

(Patch may be truncated, please check the link at the top of this post.)