aom: Fix alignment assertions

From 2c308fd916c54ce21e8a8b9f46c17393f273fe44 Mon Sep 17 00:00:00 2001
From: Wan-Teh Chang <[EMAIL REDACTED]>
Date: Fri, 28 Jun 2024 16:26:32 -0700
Subject: [PATCH] Fix alignment assertions

Change-Id: I5bb81929abcb8107b0f2934fe33e6c2dd7ea2318
---
 aom_dsp/x86/aom_convolve_copy_avx2.c | 10 +++++++---
 aom_dsp/x86/aom_convolve_copy_sse2.c |  8 ++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/aom_dsp/x86/aom_convolve_copy_avx2.c b/aom_dsp/x86/aom_convolve_copy_avx2.c
index bdbb4c16e9..5b90b104a8 100644
--- a/aom_dsp/x86/aom_convolve_copy_avx2.c
+++ b/aom_dsp/x86/aom_convolve_copy_avx2.c
@@ -27,7 +27,9 @@ static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
 
 void aom_convolve_copy_avx2(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride, int w, int h) {
-  if (w >= 16) {
+  // The w == 16 case uses _mm_store_si128(), which requires its output address
+  // be aligned on a 16-byte boundary.
+  if (w == 16) {
     assert(!((intptr_t)dst % 16));
     assert(!(dst_stride % 16));
   }
@@ -159,9 +161,11 @@ static INLINE void highbd_copy_128(const uint16_t *src, uint16_t *dst) {
 void aom_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
                                    uint16_t *dst, ptrdiff_t dst_stride, int w,
                                    int h) {
-  if (w >= 16) {
+  // The w == 8 case uses _mm_store_si128(), which requires its output address
+  // be aligned on a 16-byte boundary.
+  if (w == 8) {
     assert(!((intptr_t)dst % 16));
-    assert(!(dst_stride % 16));
+    assert(!(dst_stride % 8));
   }
 
   if (w == 2) {
diff --git a/aom_dsp/x86/aom_convolve_copy_sse2.c b/aom_dsp/x86/aom_convolve_copy_sse2.c
index 887adde962..674a37fa49 100644
--- a/aom_dsp/x86/aom_convolve_copy_sse2.c
+++ b/aom_dsp/x86/aom_convolve_copy_sse2.c
@@ -35,6 +35,8 @@ static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
 
 void aom_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride,
                             uint8_t *dst, ptrdiff_t dst_stride, int w, int h) {
+  // The w >= 16 cases use _mm_store_si128(), which requires its output address
+  // be aligned on a 16-byte boundary.
   if (w >= 16) {
     assert(!((intptr_t)dst % 16));
     assert(!(dst_stride % 16));
@@ -200,9 +202,11 @@ static INLINE void highbd_copy_128(const uint16_t *src, uint16_t *dst) {
 void aom_highbd_convolve_copy_sse2(const uint16_t *src, ptrdiff_t src_stride,
                                    uint16_t *dst, ptrdiff_t dst_stride, int w,
                                    int h) {
-  if (w >= 16) {
+  // The w >= 8 cases use _mm_store_si128(), which requires its output address
+  // be aligned on a 16-byte boundary.
+  if (w >= 8) {
     assert(!((intptr_t)dst % 16));
-    assert(!(dst_stride % 16));
+    assert(!(dst_stride % 8));
   }
 
   if (w == 2) {