aom: rdopt_sse4.c: use xx_loadu_2x64 for unaligned loads

From c5a71030d6adeb5f6cefc987134e335d0d79929a Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Fri, 17 May 2024 19:49:19 -0700
Subject: [PATCH] rdopt_sse4.c: use xx_loadu_2x64 for unaligned loads

This quiets some undefined sanitizer warnings related to unaligned
loads; register/code reordering with gcc-13 & clang-16.

Bug: b:300649160
Change-Id: Icbea76594a70d6bd7b48c11d513f103764682fd7
---
 av1/encoder/x86/rdopt_sse4.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/av1/encoder/x86/rdopt_sse4.c b/av1/encoder/x86/rdopt_sse4.c
index af61df102..76980d673 100644
--- a/av1/encoder/x86/rdopt_sse4.c
+++ b/av1/encoder/x86/rdopt_sse4.c
@@ -29,10 +29,8 @@ INLINE static void horver_correlation_4x4(const int16_t *diff, int stride,
   //                      [ i j k l ]
   //                      [ m n o p ]
 
-  const __m128i pixelsa = _mm_set_epi64x(*(int64_t *)&diff[0 * stride],
-                                         *(int64_t *)&diff[2 * stride]);
-  const __m128i pixelsb = _mm_set_epi64x(*(int64_t *)&diff[1 * stride],
-                                         *(int64_t *)&diff[3 * stride]);
+  const __m128i pixelsa = xx_loadu_2x64(&diff[0 * stride], &diff[2 * stride]);
+  const __m128i pixelsb = xx_loadu_2x64(&diff[1 * stride], &diff[3 * stride]);
   // pixelsa = [d c b a l k j i] as i16
   // pixelsb = [h g f e p o n m] as i16