aom: remove {xx,yy}_set1_64_from_32i & xx_set_64_from_32i

From e06f94e15859b20c00b4313eaad0171c25c21e81 Mon Sep 17 00:00:00 2001
From: James Zern <[EMAIL REDACTED]>
Date: Thu, 9 May 2024 16:56:21 -0700
Subject: [PATCH] remove {xx,yy}_set1_64_from_32i & xx_set_64_from_32i

These were needed for compatibility with Visual Studio versions prior to
2015. The project now requires 2019 or later.

Change-Id: I33b3f0be5ee86ab573a4764553b606f5eb2c14c9
---
 aom_dsp/x86/highbd_sad_avx2.c              |  2 +-
 aom_dsp/x86/sum_squares_avx2.c             |  2 +-
 aom_dsp/x86/sum_squares_sse2.c             |  6 +++---
 aom_dsp/x86/synonyms.h                     | 22 ----------------------
 aom_dsp/x86/synonyms_avx2.h                | 11 -----------
 av1/common/x86/reconinter_avx2.c           |  2 +-
 av1/common/x86/reconinter_ssse3.c          |  2 +-
 av1/encoder/x86/av1_highbd_quantize_sse4.c |  9 +++++----
 av1/encoder/x86/wedge_utils_avx2.c         |  2 +-
 av1/encoder/x86/wedge_utils_sse2.c         |  2 +-
 10 files changed, 14 insertions(+), 46 deletions(-)

diff --git a/aom_dsp/x86/highbd_sad_avx2.c b/aom_dsp/x86/highbd_sad_avx2.c
index 6c78eeeefb..8b3045a610 100644
--- a/aom_dsp/x86/highbd_sad_avx2.c
+++ b/aom_dsp/x86/highbd_sad_avx2.c
@@ -551,7 +551,7 @@ unsigned int aom_highbd_sad128x128_avg_avx2(const uint8_t *src, int src_stride,
 static INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v,
                                                uint32_t *res) {
   __m256i u0, u1, u2, u3;
-  const __m256i mask = yy_set1_64_from_32i(~0);
+  const __m256i mask = _mm256_set1_epi64x(~0u);
   __m128i sad;
 
   // 8 32-bit summation
diff --git a/aom_dsp/x86/sum_squares_avx2.c b/aom_dsp/x86/sum_squares_avx2.c
index 89b9b824bf..c748a7dcce 100644
--- a/aom_dsp/x86/sum_squares_avx2.c
+++ b/aom_dsp/x86/sum_squares_avx2.c
@@ -21,7 +21,7 @@ static uint64_t aom_sum_squares_2d_i16_nxn_avx2(const int16_t *src, int stride,
                                                 int width, int height) {
   uint64_t result;
   __m256i v_acc_q = _mm256_setzero_si256();
-  const __m256i v_zext_mask_q = yy_set1_64_from_32i(~0);
+  const __m256i v_zext_mask_q = _mm256_set1_epi64x(~0u);
   for (int col = 0; col < height; col += 4) {
     __m256i v_acc_d = _mm256_setzero_si256();
     for (int row = 0; row < width; row += 16) {
diff --git a/aom_dsp/x86/sum_squares_sse2.c b/aom_dsp/x86/sum_squares_sse2.c
index cf3ed98974..6c34c44317 100644
--- a/aom_dsp/x86/sum_squares_sse2.c
+++ b/aom_dsp/x86/sum_squares_sse2.c
@@ -84,7 +84,7 @@ uint64_t aom_sum_squares_2d_i16_4xn_sse2(const int16_t *src, int stride,
     src += stride << 2;
     r += 4;
   } while (r < height);
-  const __m128i v_zext_mask_q = xx_set1_64_from_32i(~0);
+  const __m128i v_zext_mask_q = _mm_set1_epi64x(~0u);
   __m128i v_acc_64 = _mm_add_epi64(_mm_srli_epi64(v_acc_q, 32),
                                    _mm_and_si128(v_acc_q, v_zext_mask_q));
   v_acc_64 = _mm_add_epi64(v_acc_64, _mm_srli_si128(v_acc_64, 8));
@@ -116,7 +116,7 @@ aom_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride, int width,
                                 int height) {
   int r = 0;
 
-  const __m128i v_zext_mask_q = xx_set1_64_from_32i(~0);
+  const __m128i v_zext_mask_q = _mm_set1_epi64x(~0u);
   __m128i v_acc_q = _mm_setzero_si128();
 
   do {
@@ -254,7 +254,7 @@ uint64_t aom_sum_sse_2d_i16_sse2(const int16_t *src, int src_stride, int width,
 //////////////////////////////////////////////////////////////////////////////
 
 static uint64_t aom_sum_squares_i16_64n_sse2(const int16_t *src, uint32_t n) {
-  const __m128i v_zext_mask_q = xx_set1_64_from_32i(~0);
+  const __m128i v_zext_mask_q = _mm_set1_epi64x(~0u);
   __m128i v_acc0_q = _mm_setzero_si128();
   __m128i v_acc1_q = _mm_setzero_si128();
 
diff --git a/aom_dsp/x86/synonyms.h b/aom_dsp/x86/synonyms.h
index 0d51cdff48..ab13446b8d 100644
--- a/aom_dsp/x86/synonyms.h
+++ b/aom_dsp/x86/synonyms.h
@@ -70,28 +70,6 @@ static INLINE void xx_storeu_128(void *const a, const __m128i v) {
   _mm_storeu_si128((__m128i *)a, v);
 }
 
-// The _mm_set_epi64x() intrinsic is undefined for some Visual Studio
-// compilers. The following function is equivalent to _mm_set_epi64x()
-// acting on 32-bit integers.
-static INLINE __m128i xx_set_64_from_32i(int32_t e1, int32_t e0) {
-#if defined(_MSC_VER) && _MSC_VER < 1900
-  return _mm_set_epi32(0, e1, 0, e0);
-#else
-  return _mm_set_epi64x((uint32_t)e1, (uint32_t)e0);
-#endif
-}
-
-// The _mm_set1_epi64x() intrinsic is undefined for some Visual Studio
-// compilers. The following function is equivalent to _mm_set1_epi64x()
-// acting on a 32-bit integer.
-static INLINE __m128i xx_set1_64_from_32i(int32_t a) {
-#if defined(_MSC_VER) && _MSC_VER < 1900
-  return _mm_set_epi32(0, a, 0, a);
-#else
-  return _mm_set1_epi64x((uint32_t)a);
-#endif
-}
-
 // Fill an SSE register using an interleaved pair of values, ie. set the
 // 8 channels to {a, b, a, b, a, b, a, b}, using the same channel ordering
 // as when a register is stored to / loaded from memory.
diff --git a/aom_dsp/x86/synonyms_avx2.h b/aom_dsp/x86/synonyms_avx2.h
index d4e8f69111..d78f4e6f98 100644
--- a/aom_dsp/x86/synonyms_avx2.h
+++ b/aom_dsp/x86/synonyms_avx2.h
@@ -53,17 +53,6 @@ static INLINE __m256i yy_set2_epi16(int16_t a, int16_t b) {
   return _mm256_setr_epi16(a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b);
 }
 
-// The _mm256_set1_epi64x() intrinsic is undefined for some Visual Studio
-// compilers. The following function is equivalent to _mm256_set1_epi64x()
-// acting on a 32-bit integer.
-static INLINE __m256i yy_set1_64_from_32i(int32_t a) {
-#if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
-  return _mm256_set_epi32(0, a, 0, a, 0, a, 0, a);
-#else
-  return _mm256_set1_epi64x((uint32_t)a);
-#endif
-}
-
 // Some compilers don't have _mm256_set_m128i defined in immintrin.h. We
 // therefore define an equivalent function using a different intrinsic.
 // ([ hi ], [ lo ]) -> [ hi ][ lo ]
diff --git a/av1/common/x86/reconinter_avx2.c b/av1/common/x86/reconinter_avx2.c
index 71fab7a577..4bc5aa41c3 100644
--- a/av1/common/x86/reconinter_avx2.c
+++ b/av1/common/x86/reconinter_avx2.c
@@ -576,7 +576,7 @@ void av1_build_compound_diffwtd_mask_highbd_avx2(
         }
       }
     } else {
-      const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2);
+      const __m128i xshift = _mm_set1_epi64x(bd - 8 + DIFF_FACTOR_LOG2);
       if (mask_type == DIFFWTD_38_INV) {
         for (int i = 0; i < h; ++i) {
           for (int j = 0; j < w; j += 16) {
diff --git a/av1/common/x86/reconinter_ssse3.c b/av1/common/x86/reconinter_ssse3.c
index c9a3709a62..b177958b83 100644
--- a/av1/common/x86/reconinter_ssse3.c
+++ b/av1/common/x86/reconinter_ssse3.c
@@ -76,7 +76,7 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(
         }
       }
     } else {
-      const __m128i xshift = xx_set1_64_from_32i(bd - 8 + DIFF_FACTOR_LOG2);
+      const __m128i xshift = _mm_set1_epi64x(bd - 8 + DIFF_FACTOR_LOG2);
       if (mask_type == DIFFWTD_38_INV) {
         for (int i = 0; i < h; ++i) {
           for (int j = 0; j < w; j += 8) {
diff --git a/av1/encoder/x86/av1_highbd_quantize_sse4.c b/av1/encoder/x86/av1_highbd_quantize_sse4.c
index 40b3b460b6..f3a0b15de5 100644
--- a/av1/encoder/x86/av1_highbd_quantize_sse4.c
+++ b/av1/encoder/x86/av1_highbd_quantize_sse4.c
@@ -138,8 +138,9 @@ void av1_highbd_quantize_fp_sse4_1(
   const int round0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
 
   qparam[0] = _mm_set_epi32(round1, round1, round1, round0);
-  qparam[1] = xx_set_64_from_32i(quant_ptr[1], quant_ptr[0]);
-  qparam[2] = xx_set_64_from_32i(dequant_ptr[1], dequant_ptr[0]);
+  qparam[1] = _mm_set_epi64x((uint32_t)quant_ptr[1], (uint32_t)quant_ptr[0]);
+  qparam[2] =
+      _mm_set_epi64x((uint32_t)dequant_ptr[1], (uint32_t)dequant_ptr[0]);
   qparam[3] = _mm_set_epi32(dequant_ptr[1], dequant_ptr[1], dequant_ptr[1],
                             dequant_ptr[0]);
 
@@ -149,8 +150,8 @@ void av1_highbd_quantize_fp_sse4_1(
 
   // update round/quan/dquan for AC
   qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
-  qparam[1] = xx_set1_64_from_32i(quant_ptr[1]);
-  qparam[2] = xx_set1_64_from_32i(dequant_ptr[1]);
+  qparam[1] = _mm_set1_epi64x((uint32_t)quant_ptr[1]);
+  qparam[2] = _mm_set1_epi64x((uint32_t)dequant_ptr[1]);
   qparam[3] = _mm_set1_epi32(dequant_ptr[1]);
   quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale,
                         quanAddr, dquanAddr);
diff --git a/av1/encoder/x86/wedge_utils_avx2.c b/av1/encoder/x86/wedge_utils_avx2.c
index 9cde860534..3f61c023c8 100644
--- a/av1/encoder/x86/wedge_utils_avx2.c
+++ b/av1/encoder/x86/wedge_utils_avx2.c
@@ -31,7 +31,7 @@ uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t *r1, const int16_t *d,
   uint64_t csse;
 
   const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE);
-  const __m256i v_zext_q = yy_set1_64_from_32i(~0);
+  const __m256i v_zext_q = _mm256_set1_epi64x(~0u);
 
   __m256i v_acc0_q = _mm256_setzero_si256();
 
diff --git a/av1/encoder/x86/wedge_utils_sse2.c b/av1/encoder/x86/wedge_utils_sse2.c
index d7ac2223f2..c3005790f2 100644
--- a/av1/encoder/x86/wedge_utils_sse2.c
+++ b/av1/encoder/x86/wedge_utils_sse2.c
@@ -31,7 +31,7 @@ uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d,
   uint64_t csse;
 
   const __m128i v_mask_max_w = _mm_set1_epi16(MAX_MASK_VALUE);
-  const __m128i v_zext_q = xx_set1_64_from_32i(~0);
+  const __m128i v_zext_q = _mm_set1_epi64x(~0u);
 
   __m128i v_acc0_q = _mm_setzero_si128();