aom: Tweak fix for aom_highbd_convolve_copy_neon() bug

From dd1e6c7806d041948fdc355ee6d78e33c829b150 Mon Sep 17 00:00:00 2001
From: Wan-Teh Chang <[EMAIL REDACTED]>
Date: Fri, 28 Jun 2024 13:36:26 -0700
Subject: [PATCH] Tweak fix for aom_highbd_convolve_copy_neon() bug

Use memmove() instead of memcpy(), as in aom_highbd_convolve_copy_c()
and aom_highbd_convolve_copy_avx2(). I don't know if the src and dst
buffers may actually overlap, but it's good to be consistent.

Revert the new tests that use unaligned dst buffers, because
aom_highbd_convolve_copy_sse2() calls _mm_store_si128(), which requires
16-byte alignment.

Bug: 349832592

Change-Id: Ieddb00d39385fb690a4997ad0f9dbd2bfac946d7
---
 aom_dsp/arm/aom_convolve_copy_neon.c |  4 +--
 test/av1_convolve_test.cc            | 37 +++++-----------------------
 2 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/aom_dsp/arm/aom_convolve_copy_neon.c b/aom_dsp/arm/aom_convolve_copy_neon.c
index 447ae37e56..a60f37f9e2 100644
--- a/aom_dsp/arm/aom_convolve_copy_neon.c
+++ b/aom_dsp/arm/aom_convolve_copy_neon.c
@@ -59,11 +59,11 @@ void aom_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
                                    int h) {
   if (w < 4) {  // copy2
     do {
-      memcpy(dst, src, 4);
+      memmove(dst, src, 2 * sizeof(*src));
       src += src_stride;
       dst += dst_stride;
 
-      memcpy(dst, src, 4);
+      memmove(dst, src, 2 * sizeof(*src));
       src += src_stride;
       dst += dst_stride;
       h -= 2;
diff --git a/test/av1_convolve_test.cc b/test/av1_convolve_test.cc
index 79bb942d0b..8a5c166134 100644
--- a/test/av1_convolve_test.cc
+++ b/test/av1_convolve_test.cc
@@ -11,8 +11,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <memory>
-#include <new>
 #include <ostream>
 #include <set>
 #include <vector>
@@ -222,12 +220,12 @@ class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
 
   // Check that two 8-bit output buffers are identical.
   void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width,
-                            int height, ptrdiff_t stride = kOutputStride) {
+                            int height) {
     ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations";
     for (int j = 0; j < height; ++j) {
       if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
-        p1 += stride;
-        p2 += stride;
+        p1 += kOutputStride;
+        p2 += kOutputStride;
         continue;
       }
       for (int i = 0; i < width; ++i) {
@@ -240,12 +238,12 @@ class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
 
   // Check that two 16-bit output buffers are identical.
   void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width,
-                            int height, ptrdiff_t stride = kOutputStride) {
+                            int height) {
     ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations";
     for (int j = 0; j < height; ++j) {
       if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
-        p1 += stride;
-        p2 += stride;
+        p1 += kOutputStride;
+        p2 += kOutputStride;
         continue;
       }
       for (int i = 0; i < width; ++i) {
@@ -1126,17 +1124,6 @@ class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {
     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
     GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
     AssertOutputBufferEq(reference, test, width, height);
-
-    // Test again with dst_stride=width.
-    std::unique_ptr<uint8_t[]> reference2(new (std::nothrow)
-                                              uint8_t[width * height]);
-    ASSERT_NE(reference2, nullptr);
-    aom_convolve_copy_c(input, width, reference2.get(), width, width, height);
-    std::unique_ptr<uint8_t[]> test2(new (std::nothrow)
-                                         uint8_t[width * height]);
-    ASSERT_NE(test2, nullptr);
-    GetParam().TestFunction()(input, width, test2.get(), width, width, height);
-    AssertOutputBufferEq(reference2.get(), test2.get(), width, height, width);
   }
 };
 
@@ -1184,18 +1171,6 @@ class AV1ConvolveCopyHighbdTest
     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
     GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
     AssertOutputBufferEq(reference, test, width, height);
-
-    // Test again with dst_stride=width.
-    std::unique_ptr<uint16_t[]> reference2(new (std::nothrow)
-                                               uint16_t[width * height]);
-    ASSERT_NE(reference2, nullptr);
-    aom_highbd_convolve_copy_c(input, width, reference2.get(), width, width,
-                               height);
-    std::unique_ptr<uint16_t[]> test2(new (std::nothrow)
-                                          uint16_t[width * height]);
-    ASSERT_NE(test2, nullptr);
-    GetParam().TestFunction()(input, width, test2.get(), width, width, height);
-    AssertOutputBufferEq(reference2.get(), test2.get(), width, height, width);
   }
 };